Total coverage: 229775 (13%)of 1883375
33 2411 1992 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef MM_SLAB_H #define MM_SLAB_H #include <linux/reciprocal_div.h> #include <linux/list_lru.h> #include <linux/local_lock.h> #include <linux/random.h> #include <linux/kobject.h> #include <linux/sched/mm.h> #include <linux/memcontrol.h> #include <linux/kfence.h> #include <linux/kasan.h> /* * Internal slab definitions */ #ifdef CONFIG_64BIT # ifdef system_has_cmpxchg128 # define system_has_freelist_aba() system_has_cmpxchg128() # define try_cmpxchg_freelist try_cmpxchg128 # endif #define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg128 typedef u128 freelist_full_t; #else /* CONFIG_64BIT */ # ifdef system_has_cmpxchg64 # define system_has_freelist_aba() system_has_cmpxchg64() # define try_cmpxchg_freelist try_cmpxchg64 # endif #define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg64 typedef u64 freelist_full_t; #endif /* CONFIG_64BIT */ #if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) #undef system_has_freelist_aba #endif /* * Freelist pointer and counter to cmpxchg together, avoids the typical ABA * problems with cmpxchg of just a pointer. */ struct freelist_counters { union { struct { void *freelist; union { unsigned long counters; struct { unsigned inuse:16; unsigned objects:15; /* * If slab debugging is enabled then the * frozen bit can be reused to indicate * that the slab was corrupted */ unsigned frozen:1; #ifdef CONFIG_64BIT /* * Some optimizations use free bits in 'counters' field * to save memory. In case ->stride field is not available, * such optimizations are disabled. */ unsigned short stride; #endif }; }; }; #ifdef system_has_freelist_aba freelist_full_t freelist_counters; #endif }; }; /* Reuses the bits in struct page */ struct slab { memdesc_flags_t flags; struct kmem_cache *slab_cache; union { struct { union { struct list_head slab_list; struct { /* For deferred deactivate_slab() */ struct llist_node llnode; void *flush_freelist; }; #ifdef CONFIG_SLUB_CPU_PARTIAL struct { struct slab *next; int slabs; /* Nr of slabs left */ }; #endif }; /* Double-word boundary */ struct freelist_counters; }; struct rcu_head rcu_head; }; unsigned int __page_type; atomic_t __page_refcount; #ifdef CONFIG_SLAB_OBJ_EXT unsigned long obj_exts; #endif }; #define SLAB_MATCH(pg, sl) \ static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) SLAB_MATCH(flags, flags); SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ SLAB_MATCH(_refcount, __page_refcount); #ifdef CONFIG_MEMCG SLAB_MATCH(memcg_data, obj_exts); #elif defined(CONFIG_SLAB_OBJ_EXT) SLAB_MATCH(_unused_slab_obj_exts, obj_exts); #endif #undef SLAB_MATCH static_assert(sizeof(struct slab) <= sizeof(struct page)); #if defined(system_has_freelist_aba) static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(struct freelist_counters))); #endif /** * slab_folio - The folio allocated for a slab * @s: The slab. * * Slabs are allocated as folios that contain the individual objects and are * using some fields in the first struct page of the folio - those fields are * now accessed by struct slab. It is occasionally necessary to convert back to * a folio in order to communicate with the rest of the mm. Please use this * helper function instead of casting yourself, as the implementation may change * in the future. */ #define slab_folio(s) (_Generic((s), \ const struct slab *: (const struct folio *)s, \ struct slab *: (struct folio *)s)) /** * page_slab - Converts from struct page to its slab. * @page: A page which may or may not belong to a slab. * * Return: The slab which contains this page or NULL if the page does * not belong to a slab. This includes pages returned from large kmalloc. */ static inline struct slab *page_slab(const struct page *page) { unsigned long head; head = READ_ONCE(page->compound_head); if (head & 1) page = (struct page *)(head - 1); if (data_race(page->page_type >> 24) != PGTY_slab) page = NULL; return (struct slab *)page; } /** * slab_page - The first struct page allocated for a slab * @s: The slab. * * A convenience wrapper for converting slab to the first struct page of the * underlying folio, to communicate with code not yet converted to folio or * struct slab. */ #define slab_page(s) folio_page(slab_folio(s), 0) static inline void *slab_address(const struct slab *slab) { return folio_address(slab_folio(slab)); } static inline int slab_nid(const struct slab *slab) { return memdesc_nid(slab->flags); } static inline pg_data_t *slab_pgdat(const struct slab *slab) { return NODE_DATA(slab_nid(slab)); } static inline struct slab *virt_to_slab(const void *addr) { return page_slab(virt_to_page(addr)); } static inline int slab_order(const struct slab *slab) { return folio_order(slab_folio(slab)); } static inline size_t slab_size(const struct slab *slab) { return PAGE_SIZE << slab_order(slab); } #ifdef CONFIG_SLUB_CPU_PARTIAL #define slub_percpu_partial(c) ((c)->partial) #define slub_set_percpu_partial(c, p) \ ({ \ slub_percpu_partial(c) = (p)->next; \ }) #define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c)) #else #define slub_percpu_partial(c) NULL #define slub_set_percpu_partial(c, p) #define slub_percpu_partial_read_once(c) NULL #endif // CONFIG_SLUB_CPU_PARTIAL /* * Word size structure that can be atomically updated or read and that * contains both the order and the number of objects that a slab of the * given order would contain. */ struct kmem_cache_order_objects { unsigned int x; }; /* * Slab cache management. */ struct kmem_cache { struct kmem_cache_cpu __percpu *cpu_slab; struct lock_class_key lock_key; struct slub_percpu_sheaves __percpu *cpu_sheaves; /* Used for retrieving partial slabs, etc. */ slab_flags_t flags; unsigned long min_partial; unsigned int size; /* Object size including metadata */ unsigned int object_size; /* Object size without metadata */ struct reciprocal_value reciprocal_size; unsigned int offset; /* Free pointer offset */ #ifdef CONFIG_SLUB_CPU_PARTIAL /* Number of per cpu partial objects to keep around */ unsigned int cpu_partial; /* Number of per cpu partial slabs to keep around */ unsigned int cpu_partial_slabs; #endif unsigned int sheaf_capacity; struct kmem_cache_order_objects oo; /* Allocation and freeing of slabs */ struct kmem_cache_order_objects min; gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ void (*ctor)(void *object); /* Object constructor */ unsigned int inuse; /* Offset to metadata */ unsigned int align; /* Alignment */ unsigned int red_left_pad; /* Left redzone padding size */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ #endif #ifdef CONFIG_SLAB_FREELIST_HARDENED unsigned long random; #endif #ifdef CONFIG_NUMA /* * Defragmentation by allocating from a remote node. */ unsigned int remote_node_defrag_ratio; #endif #ifdef CONFIG_SLAB_FREELIST_RANDOM unsigned int *random_seq; #endif #ifdef CONFIG_KASAN_GENERIC struct kasan_cache kasan_info; #endif #ifdef CONFIG_HARDENED_USERCOPY unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ #endif struct kmem_cache_node *node[MAX_NUMNODES]; }; #if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) #define SLAB_SUPPORTS_SYSFS 1 void sysfs_slab_unlink(struct kmem_cache *s); void sysfs_slab_release(struct kmem_cache *s); #else static inline void sysfs_slab_unlink(struct kmem_cache *s) { } static inline void sysfs_slab_release(struct kmem_cache *s) { } #endif void *fixup_red_left(struct kmem_cache *s, void *p); static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab, void *x) { void *object = x - (x - slab_address(slab)) % cache->size; void *last_object = slab_address(slab) + (slab->objects - 1) * cache->size; void *result = (unlikely(object > last_object)) ? last_object : object; result = fixup_red_left(cache, result); return result; } /* Determine object index from a given position */ static inline unsigned int __obj_to_index(const struct kmem_cache *cache, void *addr, void *obj) { return reciprocal_divide(kasan_reset_tag(obj) - addr, cache->reciprocal_size); } static inline unsigned int obj_to_index(const struct kmem_cache *cache, const struct slab *slab, void *obj) { if (is_kfence_address(obj)) return 0; return __obj_to_index(cache, slab_address(slab), obj); } static inline int objs_per_slab(const struct kmem_cache *cache, const struct slab *slab) { return slab->objects; } /* * State of the slab allocator. * * This is used to describe the states of the allocator during bootup. * Allocators use this to gradually bootstrap themselves. Most allocators * have the problem that the structures used for managing slab caches are * allocated from slab caches themselves. */ enum slab_state { DOWN, /* No slab functionality yet */ PARTIAL, /* SLUB: kmem_cache_node available */ UP, /* Slab caches usable but not all extras yet */ FULL /* Everything is working */ }; extern enum slab_state slab_state; /* The slab cache mutex protects the management structures during changes */ extern struct mutex slab_mutex; /* The list of all slab caches on the system */ extern struct list_head slab_caches; /* The slab cache that manages slab cache information */ extern struct kmem_cache *kmem_cache; /* A table of kmalloc cache names and sizes */ extern const struct kmalloc_info_struct { const char *name[NR_KMALLOC_TYPES]; unsigned int size; } kmalloc_info[]; /* Kmalloc array related functions */ void setup_kmalloc_cache_index_table(void); void create_kmalloc_caches(void); extern u8 kmalloc_size_index[24]; static inline unsigned int size_index_elem(unsigned int bytes) { return (bytes - 1) / 8; } /* * Find the kmem_cache structure that serves a given size of * allocation * * This assumes size is larger than zero and not larger than * KMALLOC_MAX_CACHE_SIZE and the caller must check that. */ static inline struct kmem_cache * kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller) { unsigned int index; if (!b) b = &kmalloc_caches[kmalloc_type(flags, caller)]; if (size <= 192) index = kmalloc_size_index[size_index_elem(size)]; else index = fls(size - 1); return (*b)[index]; } gfp_t kmalloc_fix_flags(gfp_t flags); /* Functions provided by the slab allocators */ int do_kmem_cache_create(struct kmem_cache *s, const char *name, unsigned int size, struct kmem_cache_args *args, slab_flags_t flags); void __init kmem_cache_init(void); extern void create_boot_cache(struct kmem_cache *, const char *name, unsigned int size, slab_flags_t flags, unsigned int useroffset, unsigned int usersize); int slab_unmergeable(struct kmem_cache *s); struct kmem_cache *find_mergeable(unsigned size, unsigned align, slab_flags_t flags, const char *name, void (*ctor)(void *)); struct kmem_cache * __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, void (*ctor)(void *)); slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name); static inline bool is_kmalloc_cache(struct kmem_cache *s) { return (s->flags & SLAB_KMALLOC); } static inline bool is_kmalloc_normal(struct kmem_cache *s) { if (!is_kmalloc_cache(s)) return false; return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT)); } bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj); void flush_all_rcu_sheaves(void); void flush_rcu_sheaves_on_cache(struct kmem_cache *s); #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ SLAB_CACHE_DMA32 | SLAB_PANIC | \ SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS | \ SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | SLAB_ACCOUNT | \ SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE) #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ SLAB_TRACE | SLAB_CONSISTENCY_CHECKS) #define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS) bool __kmem_cache_empty(struct kmem_cache *); int __kmem_cache_shutdown(struct kmem_cache *); void __kmem_cache_release(struct kmem_cache *); int __kmem_cache_shrink(struct kmem_cache *); void slab_kmem_cache_release(struct kmem_cache *); struct seq_file; struct file; struct slabinfo { unsigned long active_objs; unsigned long num_objs; unsigned long active_slabs; unsigned long num_slabs; unsigned long shared_avail; unsigned int limit; unsigned int batchcount; unsigned int shared; unsigned int objects_per_slab; unsigned int cache_order; }; void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo); #ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG_ON DECLARE_STATIC_KEY_TRUE(slub_debug_enabled); #else DECLARE_STATIC_KEY_FALSE(slub_debug_enabled); #endif extern void print_tracking(struct kmem_cache *s, void *object); long validate_slab_cache(struct kmem_cache *s); static inline bool __slub_debug_enabled(void) { return static_branch_unlikely(&slub_debug_enabled); } #else static inline void print_tracking(struct kmem_cache *s, void *object) { } static inline bool __slub_debug_enabled(void) { return false; } #endif /* * Returns true if any of the specified slab_debug flags is enabled for the * cache. Use only for flags parsed by setup_slub_debug() as it also enables * the static key. */ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags) { if (IS_ENABLED(CONFIG_SLUB_DEBUG)) VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS)); if (__slub_debug_enabled()) return s->flags & flags; return false; } #if IS_ENABLED(CONFIG_SLUB_DEBUG) && IS_ENABLED(CONFIG_KUNIT) bool slab_in_kunit_test(void); #else static inline bool slab_in_kunit_test(void) { return false; } #endif /* * slub is about to manipulate internal object metadata. This memory lies * outside the range of the allocated object, so accessing it would normally * be reported by kasan as a bounds error. metadata_access_enable() is used * to tell kasan that these accesses are OK. */ static inline void metadata_access_enable(void) { kasan_disable_current(); kmsan_disable_current(); } static inline void metadata_access_disable(void) { kmsan_enable_current(); kasan_enable_current(); } #ifdef CONFIG_SLAB_OBJ_EXT /* * slab_obj_exts - get the pointer to the slab object extension vector * associated with a slab. * @slab: a pointer to the slab struct * * Returns the address of the object extension vector associated with the slab, * or zero if no such vector has been associated yet. * Do not dereference the return value directly; use get/put_slab_obj_exts() * pair and slab_obj_ext() to access individual elements. * * Example usage: * * obj_exts = slab_obj_exts(slab); * if (obj_exts) { * get_slab_obj_exts(obj_exts); * obj_ext = slab_obj_ext(slab, obj_exts, obj_to_index(s, slab, obj)); * // do something with obj_ext * put_slab_obj_exts(obj_exts); * } * * Note that the get/put semantics does not involve reference counting. * Instead, it updates kasan/kmsan depth so that accesses to slabobj_ext * won't be reported as access violations. */ static inline unsigned long slab_obj_exts(struct slab *slab) { unsigned long obj_exts = READ_ONCE(slab->obj_exts); #ifdef CONFIG_MEMCG /* * obj_exts should be either NULL, a valid pointer with * MEMCG_DATA_OBJEXTS bit set or be equal to OBJEXTS_ALLOC_FAIL. */ VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS) && obj_exts != OBJEXTS_ALLOC_FAIL, slab_page(slab)); VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab)); #endif return obj_exts & ~OBJEXTS_FLAGS_MASK; } static inline void get_slab_obj_exts(unsigned long obj_exts) { VM_WARN_ON_ONCE(!obj_exts); metadata_access_enable(); } static inline void put_slab_obj_exts(unsigned long obj_exts) { metadata_access_disable(); } #ifdef CONFIG_64BIT static inline void slab_set_stride(struct slab *slab, unsigned short stride) { slab->stride = stride; } static inline unsigned short slab_get_stride(struct slab *slab) { return slab->stride; } #else static inline void slab_set_stride(struct slab *slab, unsigned short stride) { VM_WARN_ON_ONCE(stride != sizeof(struct slabobj_ext)); } static inline unsigned short slab_get_stride(struct slab *slab) { return sizeof(struct slabobj_ext); } #endif /* * slab_obj_ext - get the pointer to the slab object extension metadata * associated with an object in a slab. * @slab: a pointer to the slab struct * @obj_exts: a pointer to the object extension vector * @index: an index of the object * * Returns a pointer to the object extension associated with the object. * Must be called within a section covered by get/put_slab_obj_exts(). */ static inline struct slabobj_ext *slab_obj_ext(struct slab *slab, unsigned long obj_exts, unsigned int index) { struct slabobj_ext *obj_ext; VM_WARN_ON_ONCE(obj_exts != slab_obj_exts(slab)); obj_ext = (struct slabobj_ext *)(obj_exts + slab_get_stride(slab) * index); return kasan_reset_tag(obj_ext); } int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab); #else /* CONFIG_SLAB_OBJ_EXT */ static inline unsigned long slab_obj_exts(struct slab *slab) { return 0; } static inline struct slabobj_ext *slab_obj_ext(struct slab *slab, unsigned long obj_exts, unsigned int index) { return NULL; } static inline void slab_set_stride(struct slab *slab, unsigned int stride) { } static inline unsigned int slab_get_stride(struct slab *slab) { return 0; } #endif /* CONFIG_SLAB_OBJ_EXT */ static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) { return (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; } #ifdef CONFIG_MEMCG bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, gfp_t flags, size_t size, void **p); void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, int objects, unsigned long obj_exts); #endif void kvfree_rcu_cb(struct rcu_head *head); static inline unsigned int large_kmalloc_order(const struct page *page) { return page[1].flags.f & 0xff; } static inline size_t large_kmalloc_size(const struct page *page) { return PAGE_SIZE << large_kmalloc_order(page); } #ifdef CONFIG_SLUB_DEBUG void dump_unreclaimable_slab(void); #else static inline void dump_unreclaimable_slab(void) { } #endif void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr); #ifdef CONFIG_SLAB_FREELIST_RANDOM int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count, gfp_t gfp); void cache_random_seq_destroy(struct kmem_cache *cachep); #else static inline int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count, gfp_t gfp) { return 0; } static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } #endif /* CONFIG_SLAB_FREELIST_RANDOM */ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) { if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, &init_on_alloc)) { if (c->ctor) return false; if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) return flags & __GFP_ZERO; return true; } return flags & __GFP_ZERO; } static inline bool slab_want_init_on_free(struct kmem_cache *c) { if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, &init_on_free)) return !(c->ctor || (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); return false; } #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG) void debugfs_slab_release(struct kmem_cache *); #else static inline void debugfs_slab_release(struct kmem_cache *s) { } #endif #ifdef CONFIG_PRINTK #define KS_ADDRS_COUNT 16 struct kmem_obj_info { void *kp_ptr; struct slab *kp_slab; void *kp_objp; unsigned long kp_data_offset; struct kmem_cache *kp_slab_cache; void *kp_ret; void *kp_stack[KS_ADDRS_COUNT]; void *kp_free_stack[KS_ADDRS_COUNT]; }; void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); #endif void __check_heap_object(const void *ptr, unsigned long n, const struct slab *slab, bool to_user); void defer_free_barrier(void); static inline bool slub_debug_orig_size(struct kmem_cache *s) { return (kmem_cache_debug_flags(s, SLAB_STORE_USER) && (s->flags & SLAB_KMALLOC)); } #ifdef CONFIG_SLUB_DEBUG void skip_orig_size_check(struct kmem_cache *s, const void *object); #endif #endif /* MM_SLAB_H */
10 93 107 1 107 106 75 32 28 2 2 2 2 103 104 102 98 16 61 51 98 15 2 103 98 17 103 3 85 86 2 84 21 62 82 2 43 1 17 14 3 16 10 2 5 7 8 7 9 5 61 14 74 60 16 39 22 19 42 82 3 56 2 84 141 139 158 12 145 98 99 98 1 30 32 1 5 3 1 27 139 30 1 29 13 12 7 2 80 299 299 102 13 85 103 44 58 84 84 81 3 3 3 1 2 4 4 1 2 1 13 13 13 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 // SPDX-License-Identifier: GPL-2.0-or-later /* * PF_INET6 socket protocol family * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Adapted from linux/net/ipv4/af_inet.c * * Fixes: * piggy, Karl Knutson : Socket protocol table * Hideaki YOSHIFUJI : sin6_scope_id support * Arnaldo Melo : check proc_net_create return, cleanups */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/icmpv6.h> #include <linux/netfilter_ipv6.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/udp.h> #include <net/udplite.h> #include <net/tcp.h> #include <net/ping.h> #include <net/protocol.h> #include <net/inet_common.h> #include <net/route.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/ipv6_stubs.h> #include <net/ndisc.h> #ifdef CONFIG_IPV6_TUNNEL #include <net/ip6_tunnel.h> #endif #include <net/calipso.h> #include <net/seg6.h> #include <net/rpl.h> #include <net/compat.h> #include <net/xfrm.h> #include <net/ioam6.h> #include <net/rawv6.h> #include <net/rps.h> #include <linux/uaccess.h> #include <linux/mroute6.h> #include "ip6_offload.h" MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); /* The inetsw6 table contains everything that inet6_create needs to * build a new socket. */ static struct list_head inetsw6[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw6_lock); struct ipv6_params ipv6_defaults = { .disable_ipv6 = 0, .autoconf = 1, }; static int disable_ipv6_mod; module_param_named(disable, disable_ipv6_mod, int, 0444); MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional"); module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444); MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces"); module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444); MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces"); bool ipv6_mod_enabled(void) { return disable_ipv6_mod == 0; } EXPORT_SYMBOL_GPL(ipv6_mod_enabled); static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->ipv6_pinfo_offset; return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } void inet6_sock_destruct(struct sock *sk) { inet6_cleanup_sock(sk); inet_sock_destruct(sk); } EXPORT_SYMBOL_GPL(inet6_sock_destruct); static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) { struct inet_sock *inet; struct ipv6_pinfo *np; struct sock *sk; struct inet_protosw *answer; struct proto *answer_prot; unsigned char answer_flags; int try_loading_module = 0; int err; if (protocol < 0 || protocol >= IPPROTO_MAX) return -EINVAL; /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) { err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) break; } else { /* Check for the two wild cases. */ if (IPPROTO_IP == protocol) { protocol = answer->protocol; break; } if (IPPROTO_IP == answer->protocol) break; } err = -EPROTONOSUPPORT; } if (err) { if (try_loading_module < 2) { rcu_read_unlock(); /* * Be more specific, e.g. net-pf-10-proto-132-type-1 * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM) */ if (++try_loading_module == 1) request_module("net-pf-%d-proto-%d-type-%d", PF_INET6, protocol, sock->type); /* * Fall back to generic, e.g. net-pf-10-proto-132 * (net-pf-PF_INET6-proto-IPPROTO_SCTP) */ else request_module("net-pf-%d-proto-%d", PF_INET6, protocol); goto lookup_protocol; } else goto out_rcu_unlock; } err = -EPERM; if (sock->type == SOCK_RAW && !kern && !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; answer_prot = answer->prot; answer_flags = answer->flags; rcu_read_unlock(); WARN_ON(!answer_prot->slab); err = -ENOBUFS; sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; sock_init_data(sock, sk); err = 0; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; if (INET_PROTOSW_ICSK & answer_flags) inet_init_csk_locks(sk); inet = inet_sk(sk); inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags); if (SOCK_RAW == sock->type) { inet->inet_num = protocol; if (IPPROTO_RAW == protocol) inet_set_bit(HDRINCL, sk); } sk->sk_destruct = inet6_sock_destruct; sk->sk_family = PF_INET6; sk->sk_protocol = protocol; sk->sk_backlog_rcv = answer->prot->backlog_rcv; inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk); np->hop_limit = -1; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; inet6_set_bit(MC6_LOOP, sk); inet6_set_bit(MC6_ALL, sk); np->pmtudisc = IPV6_PMTUDISC_WANT; inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. */ inet->uc_ttl = -1; inet_set_bit(MC_LOOP, sk); inet->mc_ttl = 1; inet->mc_index = 0; RCU_INIT_POINTER(inet->mc_list, NULL); inet->rcv_tos = 0; if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically shares. */ inet->inet_sport = htons(inet->inet_num); err = sk->sk_prot->hash(sk); if (err) goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); if (err) goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); if (err) goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; out_sk_release: sk_common_release(sk); sock->sk = NULL; goto out; } static int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, u32 flags) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); __be32 v4addr = 0; unsigned short snum; bool saved_ipv6only; int addr_type = 0; int err = 0; if (addr->sin6_family != AF_INET6) return -EAFNOSUPPORT; addr_type = ipv6_addr_type(&addr->sin6_addr); if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM) return -EINVAL; snum = ntohs(addr->sin6_port); if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) && snum && inet_port_requires_bind_service(net, snum) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; if (flags & BIND_WITH_LOCK) lock_sock(sk); /* Check these errors (active socket, double bind). */ if (sk->sk_state != TCP_CLOSE || inet->inet_num) { err = -EINVAL; goto out; } /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { struct net_device *dev = NULL; int chk_addr_ret; /* Binding to v4-mapped address on a v6-only socket * makes no sense */ if (ipv6_only_sock(sk)) { err = -EINVAL; goto out; } rcu_read_lock(); if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out_unlock; } } /* Reproduce AF_INET checks to make the bindings consistent */ v4addr = addr->sin6_addr.s6_addr32[3]; chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr); rcu_read_unlock(); if (!inet_addr_valid_or_nonlocal(net, inet, v4addr, chk_addr_ret)) { err = -EADDRNOTAVAIL; goto out; } } else { if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; rcu_read_lock(); if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { /* Override any existing binding, if another one * is supplied by user. */ sk->sk_bound_dev_if = addr->sin6_scope_id; } /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) { err = -EINVAL; goto out_unlock; } } if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out_unlock; } } /* ipv4 addr of the socket is invalid. Only the * unspecified and mapped address have a v4 equivalent. */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { if (!ipv6_can_nonlocal_bind(net, inet) && !ipv6_chk_addr(net, &addr->sin6_addr, dev, 0)) { err = -EADDRNOTAVAIL; goto out_unlock; } } rcu_read_unlock(); } } inet->inet_rcv_saddr = v4addr; inet->inet_saddr = v4addr; sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; saved_ipv6only = sk->sk_ipv6only; if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED) sk->sk_ipv6only = 1; /* Make sure we are allowed to bind here. */ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) || (flags & BIND_FORCE_ADDRESS_NO_PORT))) { err = sk->sk_prot->get_port(sk, snum); if (err) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); goto out; } if (!(flags & BIND_FROM_BPF)) { err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk); if (err) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); if (sk->sk_prot->put_port) sk->sk_prot->put_port(sk); goto out; } } } if (addr_type != IPV6_ADDR_ANY) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->inet_sport = htons(inet->inet_num); inet->inet_dport = 0; inet->inet_daddr = 0; out: if (flags & BIND_WITH_LOCK) release_sock(sk); return err; out_unlock: rcu_read_unlock(); goto out; } int inet6_bind_sk(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { u32 flags = BIND_WITH_LOCK; const struct proto *prot; int err = 0; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); /* If the socket has its own bind function then use it. */ if (prot->bind) return prot->bind(sk, uaddr, addr_len); if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET6_BIND, &flags); if (err) return err; return __inet6_bind(sk, uaddr, addr_len, flags); } /* bind for INET6 API */ int inet6_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { return inet6_bind_sk(sock->sk, uaddr, addr_len); } EXPORT_SYMBOL(inet6_bind); int inet6_release(struct socket *sock) { struct sock *sk = sock->sk; if (!sk) return -EINVAL; /* Free mc lists */ ipv6_sock_mc_close(sk); /* Free ac lists */ ipv6_sock_ac_close(sk); return inet_release(sock); } EXPORT_SYMBOL(inet6_release); void inet6_cleanup_sock(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; struct ipv6_txoptions *opt; /* Release rx options */ skb = xchg(&np->pktoptions, NULL); kfree_skb(skb); skb = xchg(&np->rxpmtu, NULL); kfree_skb(skb); /* Free flowlabels */ fl6_free_socklist(sk); /* Free tx options */ opt = unrcu_pointer(xchg(&np->opt, NULL)); if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); } } EXPORT_SYMBOL_GPL(inet6_cleanup_sock); /* * This does both peername and sockname. */ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; int sin_addr_len = sizeof(*sin); struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_scope_id = 0; lock_sock(sk); if (peer) { if (!inet->inet_dport || (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && peer == 1)) { release_sock(sk); return -ENOTCONN; } sin->sin6_port = inet->inet_dport; sin->sin6_addr = sk->sk_v6_daddr; if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETPEERNAME); } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) sin->sin6_addr = np->saddr; else sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETSOCKNAME); } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); release_sock(sk); return sin_addr_len; } EXPORT_SYMBOL(inet6_getname); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; struct net *net = sock_net(sk); const struct proto *prot; switch (cmd) { case SIOCADDRT: case SIOCDELRT: { struct in6_rtmsg rtmsg; if (copy_from_user(&rtmsg, argp, sizeof(rtmsg))) return -EFAULT; return ipv6_route_ioctl(net, cmd, &rtmsg); } case SIOCSIFADDR: return addrconf_add_ifaddr(net, argp); case SIOCDIFADDR: return addrconf_del_ifaddr(net, argp); case SIOCSIFDSTADDR: return addrconf_set_dstaddr(net, argp); default: /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); if (!prot->ioctl) return -ENOIOCTLCMD; return sk_ioctl(sk, cmd, (void __user *)arg); } /*NOTREACHED*/ return 0; } EXPORT_SYMBOL(inet6_ioctl); #ifdef CONFIG_COMPAT struct compat_in6_rtmsg { struct in6_addr rtmsg_dst; struct in6_addr rtmsg_src; struct in6_addr rtmsg_gateway; u32 rtmsg_type; u16 rtmsg_dst_len; u16 rtmsg_src_len; u32 rtmsg_metric; u32 rtmsg_info; u32 rtmsg_flags; s32 rtmsg_ifindex; }; static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd, struct compat_in6_rtmsg __user *ur) { struct in6_rtmsg rt; if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst, 3 * sizeof(struct in6_addr)) || get_user(rt.rtmsg_type, &ur->rtmsg_type) || get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) || get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) || get_user(rt.rtmsg_metric, &ur->rtmsg_metric) || get_user(rt.rtmsg_info, &ur->rtmsg_info) || get_user(rt.rtmsg_flags, &ur->rtmsg_flags) || get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex)) return -EFAULT; return ipv6_route_ioctl(sock_net(sk), cmd, &rt); } int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = compat_ptr(arg); struct sock *sk = sock->sk; switch (cmd) { case SIOCADDRT: case SIOCDELRT: return inet6_compat_routing_ioctl(sk, cmd, argp); default: return -ENOIOCTLCMD; } } EXPORT_SYMBOL_GPL(inet6_compat_ioctl); #endif /* CONFIG_COMPAT */ INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, size_t)); int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, sk, msg, size); } INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *, size_t, int, int *)); int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; const struct proto *prot; int addr_len = 0; int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; } const struct proto_ops inet6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = inet_stream_connect, /* ok */ .socketpair = sock_no_socketpair, /* a do nothing */ .accept = inet_accept, /* ok */ .getname = inet6_getname, .poll = tcp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .gettstamp = sock_gettstamp, .listen = inet_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet6_sendmsg, /* retpoline's sake */ .recvmsg = inet6_recvmsg, /* retpoline's sake */ #ifdef CONFIG_MMU .mmap = tcp_mmap, #endif .splice_eof = inet_splice_eof, .sendmsg_locked = tcp_sendmsg_locked, .splice_read = tcp_splice_read, .set_peek_off = sk_set_peek_off, .read_sock = tcp_read_sock, .read_skb = tcp_read_skb, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif .set_rcvlowat = tcp_set_rcvlowat, }; EXPORT_SYMBOL_GPL(inet6_stream_ops); const struct proto_ops inet6_dgram_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = inet_dgram_connect, /* ok */ .socketpair = sock_no_socketpair, /* a do nothing */ .accept = sock_no_accept, /* a do nothing */ .getname = inet6_getname, .poll = udp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .gettstamp = sock_gettstamp, .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet6_sendmsg, /* retpoline's sake */ .recvmsg = inet6_recvmsg, /* retpoline's sake */ .read_skb = udp_read_skb, .mmap = sock_no_mmap, .set_peek_off = udp_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif }; static const struct net_proto_family inet6_family_ops = { .family = PF_INET6, .create = inet6_create, .owner = THIS_MODULE, }; int inet6_register_protosw(struct inet_protosw *p) { struct list_head *lh; struct inet_protosw *answer; struct list_head *last_perm; int protocol = p->protocol; int ret; spin_lock_bh(&inetsw6_lock); ret = -EINVAL; if (p->type >= SOCK_MAX) goto out_illegal; /* If we are trying to override a permanent protocol, bail. */ answer = NULL; ret = -EPERM; last_perm = &inetsw6[p->type]; list_for_each(lh, &inetsw6[p->type]) { answer = list_entry(lh, struct inet_protosw, list); /* Check only the non-wild match. */ if (INET_PROTOSW_PERMANENT & answer->flags) { if (protocol == answer->protocol) break; last_perm = lh; } answer = NULL; } if (answer) goto out_permanent; /* Add the new entry after the last permanent entry if any, so that * the new entry does not override a permanent entry when matched with * a wild-card protocol. But it is allowed to override any existing * non-permanent entry. This means that when we remove this entry, the * system automatically returns to the old behavior. */ list_add_rcu(&p->list, last_perm); ret = 0; out: spin_unlock_bh(&inetsw6_lock); return ret; out_permanent: pr_err("Attempt to override permanent protocol %d\n", protocol); goto out; out_illegal: pr_err("Ignoring attempt to register invalid socket type %d\n", p->type); goto out; } EXPORT_SYMBOL(inet6_register_protosw); void inet6_unregister_protosw(struct inet_protosw *p) { if (INET_PROTOSW_PERMANENT & p->flags) { pr_err("Attempt to unregister permanent protocol %d\n", p->protocol); } else { spin_lock_bh(&inetsw6_lock); list_del_rcu(&p->list); spin_unlock_bh(&inetsw6_lock); synchronize_net(); } } EXPORT_SYMBOL(inet6_unregister_protosw); int inet6_sk_rebuild_header(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct dst_entry *dst; dst = __sk_dst_check(sk, np->dst_cookie); if (!dst) { struct inet_sock *inet = inet_sk(sk); struct in6_addr *final_p, final; struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = np->saddr; fl6.flowlabel = np->flow_label; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sk_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); rcu_read_lock(); final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); rcu_read_unlock(); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { sk->sk_route_caps = 0; WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); return PTR_ERR(dst); } ip6_dst_store(sk, dst, false, false); } return 0; } EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header); bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct inet6_skb_parm *opt) { const struct ipv6_pinfo *np = inet6_sk(sk); if (np->rxopt.all) { if (((opt->flags & IP6SKB_HOPBYHOP) && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) && np->rxopt.bits.rxflow) || (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) return true; } return false; } static struct packet_type ipv6_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .func = ipv6_rcv, .list_func = ipv6_list_rcv, }; static int __init ipv6_packet_init(void) { dev_add_pack(&ipv6_packet_type); return 0; } static void ipv6_packet_cleanup(void) { dev_remove_pack(&ipv6_packet_type); } static int __net_init ipv6_init_mibs(struct net *net) { int i; net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib); if (!net->mib.udp_stats_in6) return -ENOMEM; net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib); if (!net->mib.udplite_stats_in6) goto err_udplite_mib; net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib); if (!net->mib.ipv6_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet6_stats; af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i); u64_stats_init(&af_inet6_stats->syncp); } net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib); if (!net->mib.icmpv6_statistics) goto err_icmp_mib; net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib), GFP_KERNEL); if (!net->mib.icmpv6msg_statistics) goto err_icmpmsg_mib; return 0; err_icmpmsg_mib: free_percpu(net->mib.icmpv6_statistics); err_icmp_mib: free_percpu(net->mib.ipv6_statistics); err_ip_mib: free_percpu(net->mib.udplite_stats_in6); err_udplite_mib: free_percpu(net->mib.udp_stats_in6); return -ENOMEM; } static void ipv6_cleanup_mibs(struct net *net) { free_percpu(net->mib.udp_stats_in6); free_percpu(net->mib.udplite_stats_in6); free_percpu(net->mib.ipv6_statistics); free_percpu(net->mib.icmpv6_statistics); kfree(net->mib.icmpv6msg_statistics); } static int __net_init inet6_net_init(struct net *net) { int err = 0; net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.icmpv6_echo_ignore_all = 0; net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0; net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0; net->ipv6.sysctl.icmpv6_error_anycast_as_unicast = 0; net->ipv6.sysctl.icmpv6_errors_extension_mask = 0; /* By default, rate limit error messages. * Except for pmtu discovery, it would break it. * proc_do_large_bitmap needs pointer to the bitmap. */ bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1); bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1); net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask; net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; net->ipv6.sysctl.flowlabel_state_ranges = 0; net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT; net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT; net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN; net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN; net->ipv6.sysctl.fib_notify_on_flag_change = 0; atomic_set(&net->ipv6.fib6_sernum, 1); net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID; net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE; err = ipv6_init_mibs(net); if (err) return err; #ifdef CONFIG_PROC_FS err = udp6_proc_init(net); if (err) goto out; err = tcp6_proc_init(net); if (err) goto proc_tcp6_fail; err = ac6_proc_init(net); if (err) goto proc_ac6_fail; #endif return err; #ifdef CONFIG_PROC_FS proc_ac6_fail: tcp6_proc_exit(net); proc_tcp6_fail: udp6_proc_exit(net); out: ipv6_cleanup_mibs(net); return err; #endif } static void __net_exit inet6_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS udp6_proc_exit(net); tcp6_proc_exit(net); ac6_proc_exit(net); #endif ipv6_cleanup_mibs(net); } static struct pernet_operations inet6_net_ops = { .init = inet6_net_init, .exit = inet6_net_exit, }; static int ipv6_route_input(struct sk_buff *skb) { ip6_route_input(skb); return skb_dst(skb)->error; } static const struct ipv6_stub ipv6_stub_impl = { .ipv6_sock_mc_join = ipv6_sock_mc_join, .ipv6_sock_mc_drop = ipv6_sock_mc_drop, .ipv6_dst_lookup_flow = ip6_dst_lookup_flow, .ipv6_route_input = ipv6_route_input, .fib6_get_table = fib6_get_table, .fib6_table_lookup = fib6_table_lookup, .fib6_lookup = fib6_lookup, .fib6_select_path = fib6_select_path, .ip6_mtu_from_fib6 = ip6_mtu_from_fib6, .fib6_nh_init = fib6_nh_init, .fib6_nh_release = fib6_nh_release, .fib6_nh_release_dsts = fib6_nh_release_dsts, .fib6_update_sernum = fib6_update_sernum_stub, .fib6_rt_update = fib6_rt_update, .ip6_del_rt = ip6_del_rt, .udpv6_encap_enable = udpv6_encap_enable, .ndisc_send_na = ndisc_send_na, #if IS_ENABLED(CONFIG_XFRM) .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, .xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv, .xfrm6_rcv_encap = xfrm6_rcv_encap, #endif .nd_tbl = &nd_tbl, .ipv6_fragment = ip6_fragment, .ipv6_dev_find = ipv6_dev_find, .ip6_xmit = ip6_xmit, }; static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .inet6_bind = __inet6_bind, .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) { struct list_head *r; int err = 0; sock_skb_cb_check_size(sizeof(struct inet6_skb_parm)); /* Register the socket-side information for inet6_create. */ for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); raw_hashinfo_init(&raw_v6_hashinfo); if (disable_ipv6_mod) { pr_info("Loaded, but administratively disabled, reboot required to enable\n"); goto out; } err = proto_register(&tcpv6_prot, 1); if (err) goto out; err = proto_register(&udpv6_prot, 1); if (err) goto out_unregister_tcp_proto; err = proto_register(&udplitev6_prot, 1); if (err) goto out_unregister_udp_proto; err = proto_register(&rawv6_prot, 1); if (err) goto out_unregister_udplite_proto; err = proto_register(&pingv6_prot, 1); if (err) goto out_unregister_raw_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. */ err = rawv6_init(); if (err) goto out_unregister_ping_proto; /* Register the family here so that the init calls below will * be able to create sockets. (?? is this dangerous ??) */ err = sock_register(&inet6_family_ops); if (err) goto out_sock_register_fail; /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance * in a host available by both INET and INET6 APIs and * able to communicate via both network protocols. */ err = register_pernet_subsys(&inet6_net_ops); if (err) goto register_pernet_fail; err = ip6_mr_init(); if (err) goto ipmr_fail; err = icmpv6_init(); if (err) goto icmp_fail; err = ndisc_init(); if (err) goto ndisc_fail; err = igmp6_init(); if (err) goto igmp_fail; err = ipv6_netfilter_init(); if (err) goto netfilter_fail; /* Create /proc/foo6 entries. */ #ifdef CONFIG_PROC_FS err = -ENOMEM; if (raw6_proc_init()) goto proc_raw6_fail; if (udplite6_proc_init()) goto proc_udplite6_fail; if (ipv6_misc_proc_init()) goto proc_misc6_fail; if (if6_proc_init()) goto proc_if6_fail; #endif err = ip6_route_init(); if (err) goto ip6_route_fail; err = ndisc_late_init(); if (err) goto ndisc_late_fail; err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; err = ipv6_anycast_init(); if (err) goto ipv6_anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; /* Init v6 extension headers. */ err = ipv6_exthdrs_init(); if (err) goto ipv6_exthdrs_fail; err = ipv6_frag_init(); if (err) goto ipv6_frag_fail; /* Init v6 transport protocols. */ err = udpv6_init(); if (err) goto udpv6_fail; err = udplitev6_init(); if (err) goto udplitev6_fail; err = udpv6_offload_init(); if (err) goto udpv6_offload_fail; err = tcpv6_init(); if (err) goto tcpv6_fail; err = ipv6_packet_init(); if (err) goto ipv6_packet_fail; err = pingv6_init(); if (err) goto pingv6_fail; err = calipso_init(); if (err) goto calipso_fail; err = seg6_init(); if (err) goto seg6_fail; err = rpl_init(); if (err) goto rpl_fail; err = ioam6_init(); if (err) goto ioam6_fail; err = igmp6_late_init(); if (err) goto igmp6_late_err; #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) goto sysctl_fail; #endif /* ensure that ipv6 stubs are visible only after ipv6 is ready */ wmb(); ipv6_stub = &ipv6_stub_impl; ipv6_bpf_stub = &ipv6_bpf_stub_impl; out: return err; #ifdef CONFIG_SYSCTL sysctl_fail: igmp6_late_cleanup(); #endif igmp6_late_err: ioam6_exit(); ioam6_fail: rpl_exit(); rpl_fail: seg6_exit(); seg6_fail: calipso_exit(); calipso_fail: pingv6_exit(); pingv6_fail: ipv6_packet_cleanup(); ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: udpv6_offload_exit(); udpv6_offload_fail: udplitev6_exit(); udplitev6_fail: udpv6_exit(); udpv6_fail: ipv6_frag_exit(); ipv6_frag_fail: ipv6_exthdrs_exit(); ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: ipv6_anycast_cleanup(); ipv6_anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); ndisc_late_fail: ip6_route_cleanup(); ip6_route_fail: #ifdef CONFIG_PROC_FS if6_proc_exit(); proc_if6_fail: ipv6_misc_proc_exit(); proc_misc6_fail: udplite6_proc_exit(); proc_udplite6_fail: raw6_proc_exit(); proc_raw6_fail: #endif ipv6_netfilter_fini(); netfilter_fail: igmp6_cleanup(); igmp_fail: ndisc_cleanup(); ndisc_fail: icmpv6_cleanup(); icmp_fail: ip6_mr_cleanup(); ipmr_fail: unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); out_sock_register_fail: rawv6_exit(); out_unregister_ping_proto: proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: proto_unregister(&udplitev6_prot); out_unregister_udp_proto: proto_unregister(&udpv6_prot); out_unregister_tcp_proto: proto_unregister(&tcpv6_prot); goto out; } module_init(inet6_init); MODULE_ALIAS_NETPROTO(PF_INET6);
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 // SPDX-License-Identifier: GPL-2.0-or-later /* * Fujifilm Finepix subdriver * * Copyright (C) 2008 Frank Zago */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "finepix" #include "gspca.h" MODULE_AUTHOR("Frank Zago <frank@zago.net>"); MODULE_DESCRIPTION("Fujifilm FinePix USB V4L2 driver"); MODULE_LICENSE("GPL"); /* Default timeout, in ms */ #define FPIX_TIMEOUT 250 /* Maximum transfer size to use. The windows driver reads by chunks of * 0x2000 bytes, so do the same. Note: reading more seems to work * too. */ #define FPIX_MAX_TRANSFER 0x2000 /* Structure to hold all of our device specific stuff */ struct usb_fpix { struct gspca_dev gspca_dev; /* !! must be the first item */ struct work_struct work_struct; }; /* Delay after which claim the next frame. If the delay is too small, * the camera will return old frames. On the 4800Z, 20ms is bad, 25ms * will fail every 4 or 5 frames, but 30ms is perfect. On the A210, * 30ms is bad while 35ms is perfect. */ #define NEXT_FRAME_DELAY 35 /* These cameras only support 320x200. */ static const struct v4l2_pix_format fpix_mode[1] = { { 320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0} }; /* send a command to the webcam */ static int command(struct gspca_dev *gspca_dev, int order) /* 0: reset, 1: frame request */ { static u8 order_values[2][12] = { {0xc6, 0, 0, 0, 0, 0, 0, 0, 0x20, 0, 0, 0}, /* reset */ {0xd3, 0, 0, 0, 0, 0, 0, 0x01, 0, 0, 0, 0}, /* fr req */ }; memcpy(gspca_dev->usb_buf, order_values[order], 12); return usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), USB_REQ_GET_STATUS, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 0, gspca_dev->usb_buf, 12, FPIX_TIMEOUT); } /* * This function is called as a workqueue function and runs whenever the camera * is streaming data. Because it is a workqueue function it is allowed to sleep * so we can use synchronous USB calls. To avoid possible collisions with other * threads attempting to use gspca_dev->usb_buf we take the usb_lock when * performing USB operations using it. In practice we don't really need this * as the camera doesn't provide any controls. */ static void dostream(struct work_struct *work) { struct usb_fpix *dev = container_of(work, struct usb_fpix, work_struct); struct gspca_dev *gspca_dev = &dev->gspca_dev; struct urb *urb = gspca_dev->urb[0]; u8 *data = urb->transfer_buffer; int ret = 0; int len; gspca_dbg(gspca_dev, D_STREAM, "dostream started\n"); /* loop reading a frame */ again: while (gspca_dev->present && gspca_dev->streaming) { #ifdef CONFIG_PM if (gspca_dev->frozen) break; #endif /* request a frame */ mutex_lock(&gspca_dev->usb_lock); ret = command(gspca_dev, 1); mutex_unlock(&gspca_dev->usb_lock); if (ret < 0) break; #ifdef CONFIG_PM if (gspca_dev->frozen) break; #endif if (!gspca_dev->present || !gspca_dev->streaming) break; /* the frame comes in parts */ for (;;) { ret = usb_bulk_msg(gspca_dev->dev, urb->pipe, data, FPIX_MAX_TRANSFER, &len, FPIX_TIMEOUT); if (ret < 0) { /* Most of the time we get a timeout * error. Just restart. */ goto again; } #ifdef CONFIG_PM if (gspca_dev->frozen) goto out; #endif if (!gspca_dev->present || !gspca_dev->streaming) goto out; if (len < FPIX_MAX_TRANSFER || (data[len - 2] == 0xff && data[len - 1] == 0xd9)) { /* If the result is less than what was asked * for, then it's the end of the * frame. Sometimes the jpeg is not complete, * but there's nothing we can do. We also end * here if the jpeg ends right at the end * of the frame. */ gspca_frame_add(gspca_dev, LAST_PACKET, data, len); break; } /* got a partial image */ gspca_frame_add(gspca_dev, gspca_dev->last_packet_type == LAST_PACKET ? FIRST_PACKET : INTER_PACKET, data, len); } /* We must wait before trying reading the next * frame. If we don't, or if the delay is too short, * the camera will disconnect. */ msleep(NEXT_FRAME_DELAY); } out: gspca_dbg(gspca_dev, D_STREAM, "dostream stopped\n"); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct usb_fpix *dev = (struct usb_fpix *) gspca_dev; struct cam *cam = &gspca_dev->cam; cam->cam_mode = fpix_mode; cam->nmodes = 1; cam->bulk = 1; cam->bulk_size = FPIX_MAX_TRANSFER; INIT_WORK(&dev->work_struct, dostream); return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { return 0; } /* start the camera */ static int sd_start(struct gspca_dev *gspca_dev) { struct usb_fpix *dev = (struct usb_fpix *) gspca_dev; int ret, len; /* Init the device */ ret = command(gspca_dev, 0); if (ret < 0) { pr_err("init failed %d\n", ret); return ret; } /* Read the result of the command. Ignore the result, for it * varies with the device. */ ret = usb_bulk_msg(gspca_dev->dev, gspca_dev->urb[0]->pipe, gspca_dev->urb[0]->transfer_buffer, FPIX_MAX_TRANSFER, &len, FPIX_TIMEOUT); if (ret < 0) { pr_err("usb_bulk_msg failed %d\n", ret); return ret; } /* Request a frame, but don't read it */ ret = command(gspca_dev, 1); if (ret < 0) { pr_err("frame request failed %d\n", ret); return ret; } /* Again, reset bulk in endpoint */ usb_clear_halt(gspca_dev->dev, gspca_dev->urb[0]->pipe); schedule_work(&dev->work_struct); return 0; } /* called on streamoff with alt==0 and on disconnect */ /* the usb_lock is held at entry - restore on exit */ static void sd_stop0(struct gspca_dev *gspca_dev) { struct usb_fpix *dev = (struct usb_fpix *) gspca_dev; /* wait for the work queue to terminate */ mutex_unlock(&gspca_dev->usb_lock); flush_work(&dev->work_struct); mutex_lock(&gspca_dev->usb_lock); } /* Table of supported USB devices */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x04cb, 0x0104)}, {USB_DEVICE(0x04cb, 0x0109)}, {USB_DEVICE(0x04cb, 0x010b)}, {USB_DEVICE(0x04cb, 0x010f)}, {USB_DEVICE(0x04cb, 0x0111)}, {USB_DEVICE(0x04cb, 0x0113)}, {USB_DEVICE(0x04cb, 0x0115)}, {USB_DEVICE(0x04cb, 0x0117)}, {USB_DEVICE(0x04cb, 0x0119)}, {USB_DEVICE(0x04cb, 0x011b)}, {USB_DEVICE(0x04cb, 0x011d)}, {USB_DEVICE(0x04cb, 0x0121)}, {USB_DEVICE(0x04cb, 0x0123)}, {USB_DEVICE(0x04cb, 0x0125)}, {USB_DEVICE(0x04cb, 0x0127)}, {USB_DEVICE(0x04cb, 0x0129)}, {USB_DEVICE(0x04cb, 0x012b)}, {USB_DEVICE(0x04cb, 0x012d)}, {USB_DEVICE(0x04cb, 0x012f)}, {USB_DEVICE(0x04cb, 0x0131)}, {USB_DEVICE(0x04cb, 0x013b)}, {USB_DEVICE(0x04cb, 0x013d)}, {USB_DEVICE(0x04cb, 0x013f)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .start = sd_start, .stop0 = sd_stop0, }; /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct usb_fpix), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
4 4 11 7 1 1 1 1 1 1 1 1 4 4 4 3 1 1 3 5 4 5 5 2 4 5 5 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002 */ #include <linux/fs.h> #include <linux/module.h> #include <linux/completion.h> #include <linux/vfs.h> #include <linux/quotaops.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/moduleparam.h> #include <linux/kthread.h> #include <linux/posix_acl.h> #include <linux/buffer_head.h> #include <linux/exportfs.h> #include <linux/crc32.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/seq_file.h> #include <linux/blkdev.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_inode.h" #include "jfs_metapage.h" #include "jfs_superblock.h" #include "jfs_dmap.h" #include "jfs_imap.h" #include "jfs_acl.h" #include "jfs_debug.h" #include "jfs_xattr.h" #include "jfs_dinode.h" MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); MODULE_LICENSE("GPL"); static struct kmem_cache *jfs_inode_cachep; static const struct super_operations jfs_super_operations; static const struct export_operations jfs_export_operations; static struct file_system_type jfs_fs_type; #define MAX_COMMIT_THREADS 64 static int commit_threads; module_param(commit_threads, int, 0); MODULE_PARM_DESC(commit_threads, "Number of commit threads"); static struct task_struct *jfsCommitThread[MAX_COMMIT_THREADS]; struct task_struct *jfsIOthread; struct task_struct *jfsSyncThread; #ifdef CONFIG_JFS_DEBUG int jfsloglevel = JFS_LOGLEVEL_WARN; module_param(jfsloglevel, int, 0644); MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)"); #endif static void jfs_handle_error(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); if (sb_rdonly(sb)) return; updateSuper(sb, FM_DIRTY); if (sbi->flag & JFS_ERR_PANIC) panic("JFS (device %s): panic forced after error\n", sb->s_id); else if (sbi->flag & JFS_ERR_REMOUNT_RO) { jfs_err("ERROR: (device %s): remounting filesystem as read-only", sb->s_id); sb->s_flags |= SB_RDONLY; } /* nothing is done for continue beyond marking the superblock dirty */ } void jfs_error(struct super_block *sb, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; pr_err("ERROR: (device %s): %ps: %pV\n", sb->s_id, __builtin_return_address(0), &vaf); va_end(args); jfs_handle_error(sb); } static struct inode *jfs_alloc_inode(struct super_block *sb) { struct jfs_inode_info *jfs_inode; jfs_inode = alloc_inode_sb(sb, jfs_inode_cachep, GFP_NOFS); if (!jfs_inode) return NULL; #ifdef CONFIG_QUOTA memset(&jfs_inode->i_dquot, 0, sizeof(jfs_inode->i_dquot)); #endif return &jfs_inode->vfs_inode; } static void jfs_free_inode(struct inode *inode) { kmem_cache_free(jfs_inode_cachep, JFS_IP(inode)); } static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb); s64 maxinodes; struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap; jfs_info("In jfs_statfs"); buf->f_type = JFS_SUPER_MAGIC; buf->f_bsize = sbi->bsize; buf->f_blocks = sbi->bmap->db_mapsize; buf->f_bfree = sbi->bmap->db_nfree; buf->f_bavail = sbi->bmap->db_nfree; /* * If we really return the number of allocated & free inodes, some * applications will fail because they won't see enough free inodes. * We'll try to calculate some guess as to how many inodes we can * really allocate * * buf->f_files = atomic_read(&imap->im_numinos); * buf->f_ffree = atomic_read(&imap->im_numfree); */ maxinodes = min((s64) atomic_read(&imap->im_numinos) + ((sbi->bmap->db_nfree >> imap->im_l2nbperiext) << L2INOSPEREXT), (s64) 0xffffffffLL); buf->f_files = maxinodes; buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) - atomic_read(&imap->im_numfree)); buf->f_fsid.val[0] = crc32_le(0, (char *)&sbi->uuid, sizeof(sbi->uuid)/2); buf->f_fsid.val[1] = crc32_le(0, (char *)&sbi->uuid + sizeof(sbi->uuid)/2, sizeof(sbi->uuid)/2); buf->f_namelen = JFS_NAME_MAX; return 0; } #ifdef CONFIG_QUOTA static int jfs_quota_off(struct super_block *sb, int type); static int jfs_quota_on(struct super_block *sb, int type, int format_id, const struct path *path); static void jfs_quota_off_umount(struct super_block *sb) { int type; for (type = 0; type < MAXQUOTAS; type++) jfs_quota_off(sb, type); } static const struct quotactl_ops jfs_quotactl_ops = { .quota_on = jfs_quota_on, .quota_off = jfs_quota_off, .quota_sync = dquot_quota_sync, .get_state = dquot_get_state, .set_info = dquot_set_dqinfo, .get_dqblk = dquot_get_dqblk, .set_dqblk = dquot_set_dqblk, .get_nextdqblk = dquot_get_next_dqblk, }; #else static inline void jfs_quota_off_umount(struct super_block *sb) { } #endif static void jfs_put_super(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); int rc; jfs_info("In jfs_put_super"); jfs_quota_off_umount(sb); rc = jfs_umount(sb); if (rc) jfs_err("jfs_umount failed with return code %d", rc); unload_nls(sbi->nls_tab); truncate_inode_pages(sbi->direct_inode->i_mapping, 0); iput(sbi->direct_inode); kfree(sbi); } enum { Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask, Opt_discard, Opt_nodiscard, Opt_discard_minblk }; static const struct constant_table jfs_param_errors[] = { {"continue", JFS_ERR_CONTINUE}, {"remount-ro", JFS_ERR_REMOUNT_RO}, {"panic", JFS_ERR_PANIC}, {} }; static const struct fs_parameter_spec jfs_param_spec[] = { fsparam_flag_no ("integrity", Opt_integrity), fsparam_string ("iocharset", Opt_iocharset), fsparam_u64 ("resize", Opt_resize), fsparam_flag ("resize", Opt_resize_nosize), fsparam_enum ("errors", Opt_errors, jfs_param_errors), fsparam_flag ("quota", Opt_quota), fsparam_flag ("noquota", Opt_ignore), fsparam_flag ("usrquota", Opt_usrquota), fsparam_flag ("grpquota", Opt_grpquota), fsparam_uid ("uid", Opt_uid), fsparam_gid ("gid", Opt_gid), fsparam_u32oct ("umask", Opt_umask), fsparam_flag ("discard", Opt_discard), fsparam_u32 ("discard", Opt_discard_minblk), fsparam_flag ("nodiscard", Opt_nodiscard), {} }; struct jfs_context { int flag; kuid_t uid; kgid_t gid; uint umask; uint minblks_trim; void *nls_map; bool resize; s64 newLVSize; }; static int jfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct jfs_context *ctx = fc->fs_private; int reconfigure = (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE); struct fs_parse_result result; struct nls_table *nls_map; int opt; opt = fs_parse(fc, jfs_param_spec, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_integrity: if (result.negated) ctx->flag |= JFS_NOINTEGRITY; else ctx->flag &= ~JFS_NOINTEGRITY; break; case Opt_ignore: /* Silently ignore the quota options */ /* Don't do anything ;-) */ break; case Opt_iocharset: if (ctx->nls_map && ctx->nls_map != (void *) -1) { unload_nls(ctx->nls_map); ctx->nls_map = NULL; } if (!strcmp(param->string, "none")) ctx->nls_map = NULL; else { nls_map = load_nls(param->string); if (!nls_map) { pr_err("JFS: charset not found\n"); return -EINVAL; } ctx->nls_map = nls_map; } break; case Opt_resize: if (!reconfigure) return -EINVAL; ctx->resize = true; ctx->newLVSize = result.uint_64; break; case Opt_resize_nosize: if (!reconfigure) return -EINVAL; ctx->resize = true; break; case Opt_errors: ctx->flag &= ~JFS_ERR_MASK; ctx->flag |= result.uint_32; break; #ifdef CONFIG_QUOTA case Opt_quota: case Opt_usrquota: ctx->flag |= JFS_USRQUOTA; break; case Opt_grpquota: ctx->flag |= JFS_GRPQUOTA; break; #else case Opt_usrquota: case Opt_grpquota: case Opt_quota: pr_err("JFS: quota operations not supported\n"); break; #endif case Opt_uid: ctx->uid = result.uid; break; case Opt_gid: ctx->gid = result.gid; break; case Opt_umask: if (result.uint_32 & ~0777) { pr_err("JFS: Invalid value of umask\n"); return -EINVAL; } ctx->umask = result.uint_32; break; case Opt_discard: /* if set to 1, even copying files will cause * trimming :O * -> user has more control over the online trimming */ ctx->minblks_trim = 64; ctx->flag |= JFS_DISCARD; break; case Opt_nodiscard: ctx->flag &= ~JFS_DISCARD; break; case Opt_discard_minblk: ctx->minblks_trim = result.uint_32; ctx->flag |= JFS_DISCARD; break; default: return -EINVAL; } return 0; } static int jfs_reconfigure(struct fs_context *fc) { struct jfs_context *ctx = fc->fs_private; struct super_block *sb = fc->root->d_sb; int readonly = fc->sb_flags & SB_RDONLY; int rc = 0; int flag = ctx->flag; int ret; sync_filesystem(sb); /* Transfer results of parsing to the sbi */ JFS_SBI(sb)->flag = ctx->flag; JFS_SBI(sb)->uid = ctx->uid; JFS_SBI(sb)->gid = ctx->gid; JFS_SBI(sb)->umask = ctx->umask; JFS_SBI(sb)->minblks_trim = ctx->minblks_trim; if (ctx->nls_map != (void *) -1) { unload_nls(JFS_SBI(sb)->nls_tab); JFS_SBI(sb)->nls_tab = ctx->nls_map; } ctx->nls_map = NULL; if (ctx->resize) { if (sb_rdonly(sb)) { pr_err("JFS: resize requires volume to be mounted read-write\n"); return -EROFS; } if (!ctx->newLVSize) { ctx->newLVSize = sb_bdev_nr_blocks(sb); if (ctx->newLVSize == 0) pr_err("JFS: Cannot determine volume size\n"); } rc = jfs_extendfs(sb, ctx->newLVSize, 0); if (rc) return rc; } if (sb_rdonly(sb) && !readonly) { /* * Invalidate any previously read metadata. fsck may have * changed the on-disk data since we mounted r/o */ truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); JFS_SBI(sb)->flag = flag; ret = jfs_mount_rw(sb, 1); /* mark the fs r/w for quota activity */ sb->s_flags &= ~SB_RDONLY; dquot_resume(sb, -1); return ret; } if (!sb_rdonly(sb) && readonly) { rc = dquot_suspend(sb, -1); if (rc < 0) return rc; rc = jfs_umount_rw(sb); JFS_SBI(sb)->flag = flag; return rc; } if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) { if (!sb_rdonly(sb)) { rc = jfs_umount_rw(sb); if (rc) return rc; JFS_SBI(sb)->flag = flag; ret = jfs_mount_rw(sb, 1); return ret; } } JFS_SBI(sb)->flag = flag; return 0; } static int jfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct jfs_context *ctx = fc->fs_private; int silent = fc->sb_flags & SB_SILENT; struct jfs_sb_info *sbi; struct inode *inode; int rc; int ret = -EINVAL; jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags); sbi = kzalloc(sizeof(struct jfs_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; sb->s_max_links = JFS_LINK_MAX; sb->s_time_min = 0; sb->s_time_max = U32_MAX; sbi->sb = sb; /* Transfer results of parsing to the sbi */ sbi->flag = ctx->flag; sbi->uid = ctx->uid; sbi->gid = ctx->gid; sbi->umask = ctx->umask; if (ctx->nls_map != (void *) -1) { unload_nls(sbi->nls_tab); sbi->nls_tab = ctx->nls_map; } ctx->nls_map = NULL; if (sbi->flag & JFS_DISCARD) { if (!bdev_max_discard_sectors(sb->s_bdev)) { pr_err("JFS: discard option not supported on device\n"); sbi->flag &= ~JFS_DISCARD; } else { sbi->minblks_trim = ctx->minblks_trim; } } #ifdef CONFIG_JFS_POSIX_ACL sb->s_flags |= SB_POSIXACL; #endif if (ctx->resize) { pr_err("resize option for remount only\n"); goto out_unload; } /* * Initialize blocksize to 4K. */ sb_set_blocksize(sb, PSIZE); /* * Set method vectors. */ sb->s_op = &jfs_super_operations; sb->s_export_op = &jfs_export_operations; sb->s_xattr = jfs_xattr_handlers; #ifdef CONFIG_QUOTA sb->dq_op = &dquot_operations; sb->s_qcop = &jfs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif /* * Initialize direct-mapping inode/address-space */ inode = new_inode(sb); if (inode == NULL) { ret = -ENOMEM; goto out_unload; } inode->i_size = bdev_nr_bytes(sb->s_bdev); inode->i_mapping->a_ops = &jfs_metapage_aops; inode_fake_hash(inode); mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); sbi->direct_inode = inode; rc = jfs_mount(sb); if (rc) { if (!silent) jfs_err("jfs_mount failed w/return code = %d", rc); goto out_mount_failed; } if (sb_rdonly(sb)) sbi->log = NULL; else { rc = jfs_mount_rw(sb, 0); if (rc) { if (!silent) { jfs_err("jfs_mount_rw failed, return code = %d", rc); } goto out_no_rw; } } sb->s_magic = JFS_SUPER_MAGIC; if (sbi->mntflag & JFS_OS2) set_default_d_op(sb, &jfs_ci_dentry_operations); inode = jfs_iget(sb, ROOT_I); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto out_no_rw; } sb->s_root = d_make_root(inode); if (!sb->s_root) goto out_no_root; /* logical blocks are represented by 40 bits in pxd_t, etc. * and page cache is indexed by long */ sb->s_maxbytes = min(((loff_t)sb->s_blocksize) << 40, MAX_LFS_FILESIZE); sb->s_time_gran = 1; return 0; out_no_root: jfs_err("jfs_read_super: get root dentry failed"); out_no_rw: rc = jfs_umount(sb); if (rc) jfs_err("jfs_umount failed with return code %d", rc); out_mount_failed: filemap_write_and_wait(sbi->direct_inode->i_mapping); truncate_inode_pages(sbi->direct_inode->i_mapping, 0); make_bad_inode(sbi->direct_inode); iput(sbi->direct_inode); sbi->direct_inode = NULL; out_unload: unload_nls(sbi->nls_tab); kfree(sbi); return ret; } static int jfs_freeze(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; int rc = 0; if (!sb_rdonly(sb)) { txQuiesce(sb); rc = lmLogShutdown(log); if (rc) { jfs_error(sb, "lmLogShutdown failed\n"); /* let operations fail rather than hang */ txResume(sb); return rc; } rc = updateSuper(sb, FM_CLEAN); if (rc) { jfs_err("jfs_freeze: updateSuper failed"); /* * Don't fail here. Everything succeeded except * marking the superblock clean, so there's really * no harm in leaving it frozen for now. */ } } return 0; } static int jfs_unfreeze(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; int rc = 0; if (!sb_rdonly(sb)) { rc = updateSuper(sb, FM_MOUNT); if (rc) { jfs_error(sb, "updateSuper failed\n"); goto out; } rc = lmLogInit(log); if (rc) jfs_error(sb, "lmLogInit failed\n"); out: txResume(sb); } return rc; } static int jfs_get_tree(struct fs_context *fc) { return get_tree_bdev(fc, jfs_fill_super); } static int jfs_sync_fs(struct super_block *sb, int wait) { struct jfs_log *log = JFS_SBI(sb)->log; /* log == NULL indicates read-only mount */ if (log) { /* * Write quota structures to quota file, sync_blockdev() will * write them to disk later */ dquot_writeback_dquots(sb, -1); jfs_flush_journal(log, wait); jfs_syncpt(log, 0); } return 0; } static int jfs_show_options(struct seq_file *seq, struct dentry *root) { struct jfs_sb_info *sbi = JFS_SBI(root->d_sb); if (uid_valid(sbi->uid)) seq_printf(seq, ",uid=%d", from_kuid(&init_user_ns, sbi->uid)); if (gid_valid(sbi->gid)) seq_printf(seq, ",gid=%d", from_kgid(&init_user_ns, sbi->gid)); if (sbi->umask != -1) seq_printf(seq, ",umask=%03o", sbi->umask); if (sbi->flag & JFS_NOINTEGRITY) seq_puts(seq, ",nointegrity"); if (sbi->flag & JFS_DISCARD) seq_printf(seq, ",discard=%u", sbi->minblks_trim); if (sbi->nls_tab) seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset); if (sbi->flag & JFS_ERR_CONTINUE) seq_printf(seq, ",errors=continue"); if (sbi->flag & JFS_ERR_PANIC) seq_printf(seq, ",errors=panic"); #ifdef CONFIG_QUOTA if (sbi->flag & JFS_USRQUOTA) seq_puts(seq, ",usrquota"); if (sbi->flag & JFS_GRPQUOTA) seq_puts(seq, ",grpquota"); #endif return 0; } #ifdef CONFIG_QUOTA /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and no one else should touch the files) * we don't have to be afraid of races */ static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) { struct inode *inode = sb_dqopt(sb)->files[type]; sector_t blk = off >> sb->s_blocksize_bits; int err = 0; int offset = off & (sb->s_blocksize - 1); int tocopy; size_t toread; struct buffer_head tmp_bh; struct buffer_head *bh; loff_t i_size = i_size_read(inode); if (off > i_size) return 0; if (off+len > i_size) len = i_size-off; toread = len; while (toread > 0) { tocopy = min_t(size_t, sb->s_blocksize - offset, toread); tmp_bh.b_state = 0; tmp_bh.b_size = i_blocksize(inode); err = jfs_get_block(inode, blk, &tmp_bh, 0); if (err) return err; if (!buffer_mapped(&tmp_bh)) /* A hole? */ memset(data, 0, tocopy); else { bh = sb_bread(sb, tmp_bh.b_blocknr); if (!bh) return -EIO; memcpy(data, bh->b_data+offset, tocopy); brelse(bh); } offset = 0; toread -= tocopy; data += tocopy; blk++; } return len; } /* Write to quotafile */ static ssize_t jfs_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off) { struct inode *inode = sb_dqopt(sb)->files[type]; sector_t blk = off >> sb->s_blocksize_bits; int err = 0; int offset = off & (sb->s_blocksize - 1); int tocopy; size_t towrite = len; struct buffer_head tmp_bh; struct buffer_head *bh; inode_lock(inode); while (towrite > 0) { tocopy = min_t(size_t, sb->s_blocksize - offset, towrite); tmp_bh.b_state = 0; tmp_bh.b_size = i_blocksize(inode); err = jfs_get_block(inode, blk, &tmp_bh, 1); if (err) goto out; if (offset || tocopy != sb->s_blocksize) bh = sb_bread(sb, tmp_bh.b_blocknr); else bh = sb_getblk(sb, tmp_bh.b_blocknr); if (!bh) { err = -EIO; goto out; } lock_buffer(bh); memcpy(bh->b_data+offset, data, tocopy); flush_dcache_folio(bh->b_folio); set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); brelse(bh); offset = 0; towrite -= tocopy; data += tocopy; blk++; } out: if (len == towrite) { inode_unlock(inode); return err; } if (inode->i_size < off+len-towrite) i_size_write(inode, off+len-towrite); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); mark_inode_dirty(inode); inode_unlock(inode); return len - towrite; } static struct dquot __rcu **jfs_get_dquots(struct inode *inode) { return JFS_IP(inode)->i_dquot; } static int jfs_quota_on(struct super_block *sb, int type, int format_id, const struct path *path) { int err; struct inode *inode; err = dquot_quota_on(sb, type, format_id, path); if (err) return err; inode = d_inode(path->dentry); inode_lock(inode); JFS_IP(inode)->mode2 |= JFS_NOATIME_FL | JFS_IMMUTABLE_FL; inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, S_NOATIME | S_IMMUTABLE); inode_unlock(inode); mark_inode_dirty(inode); return 0; } static int jfs_quota_off(struct super_block *sb, int type) { struct inode *inode = sb_dqopt(sb)->files[type]; int err; if (!inode || !igrab(inode)) goto out; err = dquot_quota_off(sb, type); if (err) goto out_put; inode_lock(inode); JFS_IP(inode)->mode2 &= ~(JFS_NOATIME_FL | JFS_IMMUTABLE_FL); inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); inode_unlock(inode); mark_inode_dirty(inode); out_put: iput(inode); return err; out: return dquot_quota_off(sb, type); } #endif static const struct super_operations jfs_super_operations = { .alloc_inode = jfs_alloc_inode, .free_inode = jfs_free_inode, .dirty_inode = jfs_dirty_inode, .write_inode = jfs_write_inode, .evict_inode = jfs_evict_inode, .put_super = jfs_put_super, .sync_fs = jfs_sync_fs, .freeze_fs = jfs_freeze, .unfreeze_fs = jfs_unfreeze, .statfs = jfs_statfs, .show_options = jfs_show_options, #ifdef CONFIG_QUOTA .quota_read = jfs_quota_read, .quota_write = jfs_quota_write, .get_dquots = jfs_get_dquots, #endif }; static const struct export_operations jfs_export_operations = { .encode_fh = generic_encode_ino32_fh, .fh_to_dentry = jfs_fh_to_dentry, .fh_to_parent = jfs_fh_to_parent, .get_parent = jfs_get_parent, }; static void jfs_init_options(struct fs_context *fc, struct jfs_context *ctx) { if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { struct super_block *sb = fc->root->d_sb; /* Copy over current option values and mount flags */ ctx->uid = JFS_SBI(sb)->uid; ctx->gid = JFS_SBI(sb)->gid; ctx->umask = JFS_SBI(sb)->umask; ctx->nls_map = (void *)-1; ctx->minblks_trim = JFS_SBI(sb)->minblks_trim; ctx->flag = JFS_SBI(sb)->flag; } else { /* * Initialize the mount flag and determine the default * error handler */ ctx->flag = JFS_ERR_REMOUNT_RO; ctx->uid = INVALID_UID; ctx->gid = INVALID_GID; ctx->umask = -1; ctx->nls_map = (void *)-1; } } static void jfs_free_fc(struct fs_context *fc) { struct jfs_context *ctx = fc->fs_private; if (ctx->nls_map != (void *) -1) unload_nls(ctx->nls_map); kfree(ctx); } static const struct fs_context_operations jfs_context_ops = { .parse_param = jfs_parse_param, .get_tree = jfs_get_tree, .reconfigure = jfs_reconfigure, .free = jfs_free_fc, }; static int jfs_init_fs_context(struct fs_context *fc) { struct jfs_context *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; jfs_init_options(fc, ctx); fc->fs_private = ctx; fc->ops = &jfs_context_ops; return 0; } static struct file_system_type jfs_fs_type = { .owner = THIS_MODULE, .name = "jfs", .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, .init_fs_context = jfs_init_fs_context, .parameters = jfs_param_spec, }; MODULE_ALIAS_FS("jfs"); static void init_once(void *foo) { struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); INIT_LIST_HEAD(&jfs_ip->anon_inode_list); init_rwsem(&jfs_ip->rdwrlock); mutex_init(&jfs_ip->commit_mutex); init_rwsem(&jfs_ip->xattr_sem); spin_lock_init(&jfs_ip->ag_lock); jfs_ip->active_ag = -1; inode_init_once(&jfs_ip->vfs_inode); } static int __init init_jfs_fs(void) { int i; int rc; jfs_inode_cachep = kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, offsetof(struct jfs_inode_info, i_inline_all), sizeof_field(struct jfs_inode_info, i_inline_all), init_once); if (jfs_inode_cachep == NULL) return -ENOMEM; /* * Metapage initialization */ rc = metapage_init(); if (rc) { jfs_err("metapage_init failed w/rc = %d", rc); goto free_slab; } /* * Transaction Manager initialization */ rc = txInit(); if (rc) { jfs_err("txInit failed w/rc = %d", rc); goto free_metapage; } /* * I/O completion thread (endio) */ jfsIOthread = kthread_run(jfsIOWait, NULL, "jfsIO"); if (IS_ERR(jfsIOthread)) { rc = PTR_ERR(jfsIOthread); jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); goto end_txmngr; } if (commit_threads < 1) commit_threads = num_online_cpus(); if (commit_threads > MAX_COMMIT_THREADS) commit_threads = MAX_COMMIT_THREADS; for (i = 0; i < commit_threads; i++) { jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL, "jfsCommit"); if (IS_ERR(jfsCommitThread[i])) { rc = PTR_ERR(jfsCommitThread[i]); jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); commit_threads = i; goto kill_committask; } } jfsSyncThread = kthread_run(jfs_sync, NULL, "jfsSync"); if (IS_ERR(jfsSyncThread)) { rc = PTR_ERR(jfsSyncThread); jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); goto kill_committask; } #ifdef PROC_FS_JFS jfs_proc_init(); #endif rc = register_filesystem(&jfs_fs_type); if (!rc) return 0; #ifdef PROC_FS_JFS jfs_proc_clean(); #endif kthread_stop(jfsSyncThread); kill_committask: for (i = 0; i < commit_threads; i++) kthread_stop(jfsCommitThread[i]); kthread_stop(jfsIOthread); end_txmngr: txExit(); free_metapage: metapage_exit(); free_slab: kmem_cache_destroy(jfs_inode_cachep); return rc; } static void __exit exit_jfs_fs(void) { int i; jfs_info("exit_jfs_fs called"); txExit(); metapage_exit(); kthread_stop(jfsIOthread); for (i = 0; i < commit_threads; i++) kthread_stop(jfsCommitThread[i]); kthread_stop(jfsSyncThread); #ifdef PROC_FS_JFS jfs_proc_clean(); #endif unregister_filesystem(&jfs_fs_type); /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(jfs_inode_cachep); } module_init(init_jfs_fs) module_exit(exit_jfs_fs)
44 44 39 39 44 44 44 44 44 44 44 5 5 5 5 5 35 35 34 22 22 9 20 19 20 20 21 4 4 47 29 38 39 39 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 // SPDX-License-Identifier: GPL-2.0 /* Multipath TCP token management * Copyright (c) 2017 - 2019, Intel Corporation. * * Note: This code is based on mptcp_ctrl.c from multipath-tcp.org, * authored by: * * Sébastien Barré <sebastien.barre@uclouvain.be> * Christoph Paasch <christoph.paasch@uclouvain.be> * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi> * Gregory Detal <gregory.detal@uclouvain.be> * Fabien Duchêne <fabien.duchene@uclouvain.be> * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de> * Lavkesh Lahngir <lavkesh51@gmail.com> * Andreas Ripke <ripke@neclab.eu> * Vlad Dogaru <vlad.dogaru@intel.com> * Octavian Purdila <octavian.purdila@intel.com> * John Ronan <jronan@tssg.org> * Catalin Nicutar <catalin.nicutar@gmail.com> * Brandon Heller <brandonh@stanford.edu> */ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/memblock.h> #include <linux/ip.h> #include <linux/tcp.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/protocol.h> #include <net/mptcp.h> #include "protocol.h" #define TOKEN_MAX_CHAIN_LEN 4 struct token_bucket { spinlock_t lock; int chain_len; struct hlist_nulls_head req_chain; struct hlist_nulls_head msk_chain; }; static struct token_bucket *token_hash __read_mostly; static unsigned int token_mask __read_mostly; static struct token_bucket *token_bucket(u32 token) { return &token_hash[token & token_mask]; } /* called with bucket lock held */ static struct mptcp_subflow_request_sock * __token_lookup_req(struct token_bucket *t, u32 token) { struct mptcp_subflow_request_sock *req; struct hlist_nulls_node *pos; hlist_nulls_for_each_entry_rcu(req, pos, &t->req_chain, token_node) if (req->token == token) return req; return NULL; } /* called with bucket lock held */ static struct mptcp_sock * __token_lookup_msk(struct token_bucket *t, u32 token) { struct hlist_nulls_node *pos; struct sock *sk; sk_nulls_for_each_rcu(sk, pos, &t->msk_chain) if (mptcp_sk(sk)->token == token) return mptcp_sk(sk); return NULL; } static bool __token_bucket_busy(struct token_bucket *t, u32 token) { return !token || t->chain_len >= TOKEN_MAX_CHAIN_LEN || __token_lookup_req(t, token) || __token_lookup_msk(t, token); } static void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn) { /* we might consider a faster version that computes the key as a * hash of some information available in the MPTCP socket. Use * random data at the moment, as it's probably the safest option * in case multiple sockets are opened in different namespaces at * the same time. */ get_random_bytes(key, sizeof(u64)); mptcp_crypto_key_sha(*key, token, idsn); } /** * mptcp_token_new_request - create new key/idsn/token for subflow_request * @req: the request socket * * This function is called when a new mptcp connection is coming in. * * It creates a unique token to identify the new mptcp connection, * a secret local key and the initial data sequence number (idsn). * * Returns 0 on success. */ int mptcp_token_new_request(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct token_bucket *bucket; u32 token; mptcp_crypto_key_sha(subflow_req->local_key, &subflow_req->token, &subflow_req->idsn); pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n", req, subflow_req->local_key, subflow_req->token, subflow_req->idsn); token = subflow_req->token; bucket = token_bucket(token); spin_lock_bh(&bucket->lock); if (__token_bucket_busy(bucket, token)) { spin_unlock_bh(&bucket->lock); return -EBUSY; } hlist_nulls_add_head_rcu(&subflow_req->token_node, &bucket->req_chain); bucket->chain_len++; spin_unlock_bh(&bucket->lock); return 0; } /** * mptcp_token_new_connect - create new key/idsn/token for subflow * @ssk: the socket that will initiate a connection * * This function is called when a new outgoing mptcp connection is * initiated. * * It creates a unique token to identify the new mptcp connection, * a secret local key and the initial data sequence number (idsn). * * On success, the mptcp connection can be found again using * the computed token at a later time, this is needed to process * join requests. * * returns 0 on success. */ int mptcp_token_new_connect(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); int retries = MPTCP_TOKEN_MAX_RETRIES; struct sock *sk = subflow->conn; struct token_bucket *bucket; again: mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token, &subflow->idsn); bucket = token_bucket(subflow->token); spin_lock_bh(&bucket->lock); if (__token_bucket_busy(bucket, subflow->token)) { spin_unlock_bh(&bucket->lock); if (!--retries) return -EBUSY; goto again; } pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n", ssk, subflow->local_key, subflow->token, subflow->idsn); WRITE_ONCE(msk->token, subflow->token); __sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain); bucket->chain_len++; spin_unlock_bh(&bucket->lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); return 0; } /** * mptcp_token_accept - replace a req sk with full sock in token hash * @req: the request socket to be removed * @msk: the just cloned socket linked to the new connection * * Called when a SYN packet creates a new logical connection, i.e. * is not a join request. */ void mptcp_token_accept(struct mptcp_subflow_request_sock *req, struct mptcp_sock *msk) { struct mptcp_subflow_request_sock *pos; struct sock *sk = (struct sock *)msk; struct token_bucket *bucket; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); bucket = token_bucket(req->token); spin_lock_bh(&bucket->lock); /* pedantic lookup check for the moved token */ pos = __token_lookup_req(bucket, req->token); if (!WARN_ON_ONCE(pos != req)) hlist_nulls_del_init_rcu(&req->token_node); __sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain); spin_unlock_bh(&bucket->lock); } bool mptcp_token_exists(u32 token) { struct hlist_nulls_node *pos; struct token_bucket *bucket; struct mptcp_sock *msk; struct sock *sk; rcu_read_lock(); bucket = token_bucket(token); again: sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { msk = mptcp_sk(sk); if (READ_ONCE(msk->token) == token) goto found; } if (get_nulls_value(pos) != (token & token_mask)) goto again; rcu_read_unlock(); return false; found: rcu_read_unlock(); return true; } /** * mptcp_token_get_sock - retrieve mptcp connection sock using its token * @net: restrict to this namespace * @token: token of the mptcp connection to retrieve * * This function returns the mptcp connection structure with the given token. * A reference count on the mptcp socket returned is taken. * * returns NULL if no connection with the given token value exists. */ struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token) { struct hlist_nulls_node *pos; struct token_bucket *bucket; struct mptcp_sock *msk; struct sock *sk; rcu_read_lock(); bucket = token_bucket(token); again: sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { msk = mptcp_sk(sk); if (READ_ONCE(msk->token) != token || !net_eq(sock_net(sk), net)) continue; if (!refcount_inc_not_zero(&sk->sk_refcnt)) goto not_found; if (READ_ONCE(msk->token) != token || !net_eq(sock_net(sk), net)) { sock_put(sk); goto again; } goto found; } if (get_nulls_value(pos) != (token & token_mask)) goto again; not_found: msk = NULL; found: rcu_read_unlock(); return msk; } EXPORT_SYMBOL_GPL(mptcp_token_get_sock); /** * mptcp_token_iter_next - iterate over the token container from given pos * @net: namespace to be iterated * @s_slot: start slot number * @s_num: start number inside the given lock * * This function returns the first mptcp connection structure found inside the * token container starting from the specified position, or NULL. * * On successful iteration, the iterator is moved to the next position and * a reference to the returned socket is acquired. */ struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, long *s_num) { struct mptcp_sock *ret = NULL; struct hlist_nulls_node *pos; int slot, num = 0; for (slot = *s_slot; slot <= token_mask; *s_num = 0, slot++) { struct token_bucket *bucket = &token_hash[slot]; struct sock *sk; num = 0; if (hlist_nulls_empty(&bucket->msk_chain)) continue; rcu_read_lock(); sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { ++num; if (!net_eq(sock_net(sk), net)) continue; if (num <= *s_num) continue; if (!refcount_inc_not_zero(&sk->sk_refcnt)) continue; if (!net_eq(sock_net(sk), net)) { sock_put(sk); continue; } ret = mptcp_sk(sk); rcu_read_unlock(); goto out; } rcu_read_unlock(); } out: *s_slot = slot; *s_num = num; return ret; } EXPORT_SYMBOL_GPL(mptcp_token_iter_next); /** * mptcp_token_destroy_request - remove mptcp connection/token * @req: mptcp request socket dropping the token * * Remove the token associated to @req. */ void mptcp_token_destroy_request(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_subflow_request_sock *pos; struct token_bucket *bucket; if (hlist_nulls_unhashed(&subflow_req->token_node)) return; bucket = token_bucket(subflow_req->token); spin_lock_bh(&bucket->lock); pos = __token_lookup_req(bucket, subflow_req->token); if (!WARN_ON_ONCE(pos != subflow_req)) { hlist_nulls_del_init_rcu(&pos->token_node); bucket->chain_len--; } spin_unlock_bh(&bucket->lock); } /** * mptcp_token_destroy - remove mptcp connection/token * @msk: mptcp connection dropping the token * * Remove the token associated to @msk */ void mptcp_token_destroy(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; struct token_bucket *bucket; struct mptcp_sock *pos; if (sk_unhashed((struct sock *)msk)) return; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); bucket = token_bucket(msk->token); spin_lock_bh(&bucket->lock); pos = __token_lookup_msk(bucket, msk->token); if (!WARN_ON_ONCE(pos != msk)) { __sk_nulls_del_node_init_rcu((struct sock *)pos); bucket->chain_len--; } spin_unlock_bh(&bucket->lock); WRITE_ONCE(msk->token, 0); } void __init mptcp_token_init(void) { int i; token_hash = alloc_large_system_hash("MPTCP token", sizeof(struct token_bucket), 0, 20,/* one slot per 1MB of memory */ HASH_ZERO, NULL, &token_mask, 0, 64 * 1024); for (i = 0; i < token_mask + 1; ++i) { INIT_HLIST_NULLS_HEAD(&token_hash[i].req_chain, i); INIT_HLIST_NULLS_HEAD(&token_hash[i].msk_chain, i); spin_lock_init(&token_hash[i].lock); } } #if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST) EXPORT_SYMBOL_GPL(mptcp_token_new_request); EXPORT_SYMBOL_GPL(mptcp_token_new_connect); EXPORT_SYMBOL_GPL(mptcp_token_accept); EXPORT_SYMBOL_GPL(mptcp_token_destroy_request); EXPORT_SYMBOL_GPL(mptcp_token_destroy); #endif
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 // SPDX-License-Identifier: GPL-2.0-or-later /* * OV519 driver * * Copyright (C) 2008-2011 Jean-François Moine <moinejf@free.fr> * Copyright (C) 2009 Hans de Goede <hdegoede@redhat.com> * * This module is adapted from the ov51x-jpeg package, which itself * was adapted from the ov511 driver. * * Original copyright for the ov511 driver is: * * Copyright (c) 1999-2006 Mark W. McClelland * Support for OV519, OV8610 Copyright (c) 2003 Joerg Heckenbach * Many improvements by Bret Wallach <bwallac1@san.rr.com> * Color fixes by by Orion Sky Lawlor <olawlor@acm.org> (2/26/2000) * OV7620 fixes by Charl P. Botha <cpbotha@ieee.org> * Changes by Claudio Matsuoka <claudio@conectiva.com> * * ov51x-jpeg original copyright is: * * Copyright (c) 2004-2007 Romain Beauxis <toots@rastageeks.org> * Support for OV7670 sensors was contributed by Sam Skipsey <aoanla@yahoo.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "ov519" #include <linux/input.h> #include "gspca.h" /* The jpeg_hdr is used by w996Xcf only */ /* The CONEX_CAM define for jpeg.h needs renaming, now its used here too */ #define CONEX_CAM #include "jpeg.h" MODULE_AUTHOR("Jean-Francois Moine <http://moinejf.free.fr>"); MODULE_DESCRIPTION("OV519 USB Camera Driver"); MODULE_LICENSE("GPL"); /* global parameters */ static int frame_rate; /* Number of times to retry a failed I2C transaction. Increase this if you * are getting "Failed to read sensor ID..." */ static int i2c_detect_tries = 10; /* ov519 device descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ struct v4l2_ctrl *jpegqual; struct v4l2_ctrl *freq; struct { /* h/vflip control cluster */ struct v4l2_ctrl *hflip; struct v4l2_ctrl *vflip; }; struct { /* autobrightness/brightness control cluster */ struct v4l2_ctrl *autobright; struct v4l2_ctrl *brightness; }; u8 revision; u8 packet_nr; char bridge; #define BRIDGE_OV511 0 #define BRIDGE_OV511PLUS 1 #define BRIDGE_OV518 2 #define BRIDGE_OV518PLUS 3 #define BRIDGE_OV519 4 /* = ov530 */ #define BRIDGE_OVFX2 5 #define BRIDGE_W9968CF 6 #define BRIDGE_MASK 7 char invert_led; #define BRIDGE_INVERT_LED 8 char snapshot_pressed; char snapshot_needs_reset; /* Determined by sensor type */ u8 sif; #define QUALITY_MIN 50 #define QUALITY_MAX 70 #define QUALITY_DEF 50 u8 stopped; /* Streaming is temporarily paused */ u8 first_frame; u8 frame_rate; /* current Framerate */ u8 clockdiv; /* clockdiv override */ s8 sensor; /* Type of image sensor chip (SEN_*) */ u8 sensor_addr; u16 sensor_width; u16 sensor_height; s16 sensor_reg_cache[256]; u8 jpeg_hdr[JPEG_HDR_SZ]; }; enum sensors { SEN_OV2610, SEN_OV2610AE, SEN_OV3610, SEN_OV6620, SEN_OV6630, SEN_OV66308AF, SEN_OV7610, SEN_OV7620, SEN_OV7620AE, SEN_OV7640, SEN_OV7648, SEN_OV7660, SEN_OV7670, SEN_OV76BE, SEN_OV8610, SEN_OV9600, }; /* Note this is a bit of a hack, but the w9968cf driver needs the code for all the ov sensors which is already present here. When we have the time we really should move the sensor drivers to v4l2 sub drivers. */ #include "w996Xcf.c" /* table of the disabled controls */ struct ctrl_valid { unsigned int has_brightness:1; unsigned int has_contrast:1; unsigned int has_exposure:1; unsigned int has_autogain:1; unsigned int has_sat:1; unsigned int has_hvflip:1; unsigned int has_autobright:1; unsigned int has_freq:1; }; static const struct ctrl_valid valid_controls[] = { [SEN_OV2610] = { .has_exposure = 1, .has_autogain = 1, }, [SEN_OV2610AE] = { .has_exposure = 1, .has_autogain = 1, }, [SEN_OV3610] = { /* No controls */ }, [SEN_OV6620] = { .has_brightness = 1, .has_contrast = 1, .has_sat = 1, .has_autobright = 1, .has_freq = 1, }, [SEN_OV6630] = { .has_brightness = 1, .has_contrast = 1, .has_sat = 1, .has_autobright = 1, .has_freq = 1, }, [SEN_OV66308AF] = { .has_brightness = 1, .has_contrast = 1, .has_sat = 1, .has_autobright = 1, .has_freq = 1, }, [SEN_OV7610] = { .has_brightness = 1, .has_contrast = 1, .has_sat = 1, .has_autobright = 1, .has_freq = 1, }, [SEN_OV7620] = { .has_brightness = 1, .has_contrast = 1, .has_sat = 1, .has_autobright = 1, .has_freq = 1, }, [SEN_OV7620AE] = { .has_brightness = 1, .has_contrast = 1, .has_sat = 1, .has_autobright = 1, .has_freq = 1, }, [SEN_OV7640] = { .has_brightness = 1, .has_sat = 1, .has_freq = 1, }, [SEN_OV7648] = { .has_brightness = 1, .has_sat = 1, .has_freq = 1, }, [SEN_OV7660] = { .has_brightness = 1, .has_contrast = 1, .has_sat = 1, .has_hvflip = 1, .has_freq = 1, }, [SEN_OV7670] = { .has_brightness = 1, .has_contrast = 1, .has_hvflip = 1, .has_freq = 1, }, [SEN_OV76BE] = { .has_brightness = 1, .has_contrast = 1, .has_sat = 1, .has_autobright = 1, .has_freq = 1, }, [SEN_OV8610] = { .has_brightness = 1, .has_contrast = 1, .has_sat = 1, .has_autobright = 1, }, [SEN_OV9600] = { .has_exposure = 1, .has_autogain = 1, }, }; static const struct v4l2_pix_format ov519_vga_mode[] = { {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, {640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 0}, }; static const struct v4l2_pix_format ov519_sif_mode[] = { {160, 120, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 3}, {176, 144, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 2}, {352, 288, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 0}, }; /* Note some of the sizeimage values for the ov511 / ov518 may seem larger then necessary, however they need to be this big as the ov511 / ov518 always fills the entire isoc frame, using 0 padding bytes when it doesn't have any data. So with low framerates the amount of data transferred can become quite large (libv4l will remove all the 0 padding in userspace). */ static const struct v4l2_pix_format ov518_vga_mode[] = { {320, 240, V4L2_PIX_FMT_OV518, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, {640, 480, V4L2_PIX_FMT_OV518, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 2, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 0}, }; static const struct v4l2_pix_format ov518_sif_mode[] = { {160, 120, V4L2_PIX_FMT_OV518, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 70000, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 3}, {176, 144, V4L2_PIX_FMT_OV518, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 70000, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, {320, 240, V4L2_PIX_FMT_OV518, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 2}, {352, 288, V4L2_PIX_FMT_OV518, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 0}, }; static const struct v4l2_pix_format ov511_vga_mode[] = { {320, 240, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, {640, 480, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 2, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 0}, }; static const struct v4l2_pix_format ov511_sif_mode[] = { {160, 120, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 70000, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 3}, {176, 144, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 70000, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, {320, 240, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 2}, {352, 288, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 0}, }; static const struct v4l2_pix_format ovfx2_ov2610_mode[] = { {800, 600, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 800, .sizeimage = 800 * 600, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {1600, 1200, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 1600, .sizeimage = 1600 * 1200, .colorspace = V4L2_COLORSPACE_SRGB}, }; static const struct v4l2_pix_format ovfx2_ov3610_mode[] = { {640, 480, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {800, 600, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 800, .sizeimage = 800 * 600, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {1024, 768, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 1024, .sizeimage = 1024 * 768, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {1600, 1200, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 1600, .sizeimage = 1600 * 1200, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, {2048, 1536, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 2048, .sizeimage = 2048 * 1536, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; static const struct v4l2_pix_format ovfx2_ov9600_mode[] = { {640, 480, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {1280, 1024, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 1280, .sizeimage = 1280 * 1024, .colorspace = V4L2_COLORSPACE_SRGB}, }; /* Registers common to OV511 / OV518 */ #define R51x_FIFO_PSIZE 0x30 /* 2 bytes wide w/ OV518(+) */ #define R51x_SYS_RESET 0x50 /* Reset type flags */ #define OV511_RESET_OMNICE 0x08 #define R51x_SYS_INIT 0x53 #define R51x_SYS_SNAP 0x52 #define R51x_SYS_CUST_ID 0x5f #define R51x_COMP_LUT_BEGIN 0x80 /* OV511 Camera interface register numbers */ #define R511_CAM_DELAY 0x10 #define R511_CAM_EDGE 0x11 #define R511_CAM_PXCNT 0x12 #define R511_CAM_LNCNT 0x13 #define R511_CAM_PXDIV 0x14 #define R511_CAM_LNDIV 0x15 #define R511_CAM_UV_EN 0x16 #define R511_CAM_LINE_MODE 0x17 #define R511_CAM_OPTS 0x18 #define R511_SNAP_FRAME 0x19 #define R511_SNAP_PXCNT 0x1a #define R511_SNAP_LNCNT 0x1b #define R511_SNAP_PXDIV 0x1c #define R511_SNAP_LNDIV 0x1d #define R511_SNAP_UV_EN 0x1e #define R511_SNAP_OPTS 0x1f #define R511_DRAM_FLOW_CTL 0x20 #define R511_FIFO_OPTS 0x31 #define R511_I2C_CTL 0x40 #define R511_SYS_LED_CTL 0x55 /* OV511+ only */ #define R511_COMP_EN 0x78 #define R511_COMP_LUT_EN 0x79 /* OV518 Camera interface register numbers */ #define R518_GPIO_OUT 0x56 /* OV518(+) only */ #define R518_GPIO_CTL 0x57 /* OV518(+) only */ /* OV519 Camera interface register numbers */ #define OV519_R10_H_SIZE 0x10 #define OV519_R11_V_SIZE 0x11 #define OV519_R12_X_OFFSETL 0x12 #define OV519_R13_X_OFFSETH 0x13 #define OV519_R14_Y_OFFSETL 0x14 #define OV519_R15_Y_OFFSETH 0x15 #define OV519_R16_DIVIDER 0x16 #define OV519_R20_DFR 0x20 #define OV519_R25_FORMAT 0x25 /* OV519 System Controller register numbers */ #define OV519_R51_RESET1 0x51 #define OV519_R54_EN_CLK1 0x54 #define OV519_R57_SNAPSHOT 0x57 #define OV519_GPIO_DATA_OUT0 0x71 #define OV519_GPIO_IO_CTRL0 0x72 /*#define OV511_ENDPOINT_ADDRESS 1 * Isoc endpoint number */ /* * The FX2 chip does not give us a zero length read at end of frame. * It does, however, give a short read at the end of a frame, if * necessary, rather than run two frames together. * * By choosing the right bulk transfer size, we are guaranteed to always * get a short read for the last read of each frame. Frame sizes are * always a composite number (width * height, or a multiple) so if we * choose a prime number, we are guaranteed that the last read of a * frame will be short. * * But it isn't that easy: the 2.6 kernel requires a multiple of 4KB, * otherwise EOVERFLOW "babbling" errors occur. I have not been able * to figure out why. [PMiller] * * The constant (13 * 4096) is the largest "prime enough" number less than 64KB. * * It isn't enough to know the number of bytes per frame, in case we * have data dropouts or buffer overruns (even though the FX2 double * buffers, there are some pretty strict real time constraints for * isochronous transfer for larger frame sizes). */ /*jfm: this value does not work for 800x600 - see isoc_init */ #define OVFX2_BULK_SIZE (13 * 4096) /* I2C registers */ #define R51x_I2C_W_SID 0x41 #define R51x_I2C_SADDR_3 0x42 #define R51x_I2C_SADDR_2 0x43 #define R51x_I2C_R_SID 0x44 #define R51x_I2C_DATA 0x45 #define R518_I2C_CTL 0x47 /* OV518(+) only */ #define OVFX2_I2C_ADDR 0x00 /* I2C ADDRESSES */ #define OV7xx0_SID 0x42 #define OV_HIRES_SID 0x60 /* OV9xxx / OV2xxx / OV3xxx */ #define OV8xx0_SID 0xa0 #define OV6xx0_SID 0xc0 /* OV7610 registers */ #define OV7610_REG_GAIN 0x00 /* gain setting (5:0) */ #define OV7610_REG_BLUE 0x01 /* blue channel balance */ #define OV7610_REG_RED 0x02 /* red channel balance */ #define OV7610_REG_SAT 0x03 /* saturation */ #define OV8610_REG_HUE 0x04 /* 04 reserved */ #define OV7610_REG_CNT 0x05 /* Y contrast */ #define OV7610_REG_BRT 0x06 /* Y brightness */ #define OV7610_REG_COM_C 0x14 /* misc common regs */ #define OV7610_REG_ID_HIGH 0x1c /* manufacturer ID MSB */ #define OV7610_REG_ID_LOW 0x1d /* manufacturer ID LSB */ #define OV7610_REG_COM_I 0x29 /* misc settings */ /* OV7660 and OV7670 registers */ #define OV7670_R00_GAIN 0x00 /* Gain lower 8 bits (rest in vref) */ #define OV7670_R01_BLUE 0x01 /* blue gain */ #define OV7670_R02_RED 0x02 /* red gain */ #define OV7670_R03_VREF 0x03 /* Pieces of GAIN, VSTART, VSTOP */ #define OV7670_R04_COM1 0x04 /* Control 1 */ /*#define OV7670_R07_AECHH 0x07 * AEC MS 5 bits */ #define OV7670_R0C_COM3 0x0c /* Control 3 */ #define OV7670_R0D_COM4 0x0d /* Control 4 */ #define OV7670_R0E_COM5 0x0e /* All "reserved" */ #define OV7670_R0F_COM6 0x0f /* Control 6 */ #define OV7670_R10_AECH 0x10 /* More bits of AEC value */ #define OV7670_R11_CLKRC 0x11 /* Clock control */ #define OV7670_R12_COM7 0x12 /* Control 7 */ #define OV7670_COM7_FMT_VGA 0x00 /*#define OV7670_COM7_YUV 0x00 * YUV */ #define OV7670_COM7_FMT_QVGA 0x10 /* QVGA format */ #define OV7670_COM7_FMT_MASK 0x38 #define OV7670_COM7_RESET 0x80 /* Register reset */ #define OV7670_R13_COM8 0x13 /* Control 8 */ #define OV7670_COM8_AEC 0x01 /* Auto exposure enable */ #define OV7670_COM8_AWB 0x02 /* White balance enable */ #define OV7670_COM8_AGC 0x04 /* Auto gain enable */ #define OV7670_COM8_BFILT 0x20 /* Band filter enable */ #define OV7670_COM8_AECSTEP 0x40 /* Unlimited AEC step size */ #define OV7670_COM8_FASTAEC 0x80 /* Enable fast AGC/AEC */ #define OV7670_R14_COM9 0x14 /* Control 9 - gain ceiling */ #define OV7670_R15_COM10 0x15 /* Control 10 */ #define OV7670_R17_HSTART 0x17 /* Horiz start high bits */ #define OV7670_R18_HSTOP 0x18 /* Horiz stop high bits */ #define OV7670_R19_VSTART 0x19 /* Vert start high bits */ #define OV7670_R1A_VSTOP 0x1a /* Vert stop high bits */ #define OV7670_R1E_MVFP 0x1e /* Mirror / vflip */ #define OV7670_MVFP_VFLIP 0x10 /* vertical flip */ #define OV7670_MVFP_MIRROR 0x20 /* Mirror image */ #define OV7670_R24_AEW 0x24 /* AGC upper limit */ #define OV7670_R25_AEB 0x25 /* AGC lower limit */ #define OV7670_R26_VPT 0x26 /* AGC/AEC fast mode op region */ #define OV7670_R32_HREF 0x32 /* HREF pieces */ #define OV7670_R3A_TSLB 0x3a /* lots of stuff */ #define OV7670_R3B_COM11 0x3b /* Control 11 */ #define OV7670_COM11_EXP 0x02 #define OV7670_COM11_HZAUTO 0x10 /* Auto detect 50/60 Hz */ #define OV7670_R3C_COM12 0x3c /* Control 12 */ #define OV7670_R3D_COM13 0x3d /* Control 13 */ #define OV7670_COM13_GAMMA 0x80 /* Gamma enable */ #define OV7670_COM13_UVSAT 0x40 /* UV saturation auto adjustment */ #define OV7670_R3E_COM14 0x3e /* Control 14 */ #define OV7670_R3F_EDGE 0x3f /* Edge enhancement factor */ #define OV7670_R40_COM15 0x40 /* Control 15 */ /*#define OV7670_COM15_R00FF 0xc0 * 00 to FF */ #define OV7670_R41_COM16 0x41 /* Control 16 */ #define OV7670_COM16_AWBGAIN 0x08 /* AWB gain enable */ /* end of ov7660 common registers */ #define OV7670_R55_BRIGHT 0x55 /* Brightness */ #define OV7670_R56_CONTRAS 0x56 /* Contrast control */ #define OV7670_R69_GFIX 0x69 /* Fix gain control */ /*#define OV7670_R8C_RGB444 0x8c * RGB 444 control */ #define OV7670_R9F_HAECC1 0x9f /* Hist AEC/AGC control 1 */ #define OV7670_RA0_HAECC2 0xa0 /* Hist AEC/AGC control 2 */ #define OV7670_RA5_BD50MAX 0xa5 /* 50hz banding step limit */ #define OV7670_RA6_HAECC3 0xa6 /* Hist AEC/AGC control 3 */ #define OV7670_RA7_HAECC4 0xa7 /* Hist AEC/AGC control 4 */ #define OV7670_RA8_HAECC5 0xa8 /* Hist AEC/AGC control 5 */ #define OV7670_RA9_HAECC6 0xa9 /* Hist AEC/AGC control 6 */ #define OV7670_RAA_HAECC7 0xaa /* Hist AEC/AGC control 7 */ #define OV7670_RAB_BD60MAX 0xab /* 60hz banding step limit */ struct ov_regvals { u8 reg; u8 val; }; struct ov_i2c_regvals { u8 reg; u8 val; }; /* Settings for OV2610 camera chip */ static const struct ov_i2c_regvals norm_2610[] = { { 0x12, 0x80 }, /* reset */ }; static const struct ov_i2c_regvals norm_2610ae[] = { {0x12, 0x80}, /* reset */ {0x13, 0xcd}, {0x09, 0x01}, {0x0d, 0x00}, {0x11, 0x80}, {0x12, 0x20}, /* 1600x1200 */ {0x33, 0x0c}, {0x35, 0x90}, {0x36, 0x37}, /* ms-win traces */ {0x11, 0x83}, /* clock / 3 ? */ {0x2d, 0x00}, /* 60 Hz filter */ {0x24, 0xb0}, /* normal colors */ {0x25, 0x90}, {0x10, 0x43}, }; static const struct ov_i2c_regvals norm_3620b[] = { /* * From the datasheet: "Note that after writing to register COMH * (0x12) to change the sensor mode, registers related to the * sensor's cropping window will be reset back to their default * values." * * "wait 4096 external clock ... to make sure the sensor is * stable and ready to access registers" i.e. 160us at 24MHz */ { 0x12, 0x80 }, /* COMH reset */ { 0x12, 0x00 }, /* QXGA, master */ /* * 11 CLKRC "Clock Rate Control" * [7] internal frequency doublers: on * [6] video port mode: master * [5:0] clock divider: 1 */ { 0x11, 0x80 }, /* * 13 COMI "Common Control I" * = 192 (0xC0) 11000000 * COMI[7] "AEC speed selection" * = 1 (0x01) 1....... "Faster AEC correction" * COMI[6] "AEC speed step selection" * = 1 (0x01) .1...... "Big steps, fast" * COMI[5] "Banding filter on off" * = 0 (0x00) ..0..... "Off" * COMI[4] "Banding filter option" * = 0 (0x00) ...0.... "Main clock is 48 MHz and * the PLL is ON" * COMI[3] "Reserved" * = 0 (0x00) ....0... * COMI[2] "AGC auto manual control selection" * = 0 (0x00) .....0.. "Manual" * COMI[1] "AWB auto manual control selection" * = 0 (0x00) ......0. "Manual" * COMI[0] "Exposure control" * = 0 (0x00) .......0 "Manual" */ { 0x13, 0xc0 }, /* * 09 COMC "Common Control C" * = 8 (0x08) 00001000 * COMC[7:5] "Reserved" * = 0 (0x00) 000..... * COMC[4] "Sleep Mode Enable" * = 0 (0x00) ...0.... "Normal mode" * COMC[3:2] "Sensor sampling reset timing selection" * = 2 (0x02) ....10.. "Longer reset time" * COMC[1:0] "Output drive current select" * = 0 (0x00) ......00 "Weakest" */ { 0x09, 0x08 }, /* * 0C COMD "Common Control D" * = 8 (0x08) 00001000 * COMD[7] "Reserved" * = 0 (0x00) 0....... * COMD[6] "Swap MSB and LSB at the output port" * = 0 (0x00) .0...... "False" * COMD[5:3] "Reserved" * = 1 (0x01) ..001... * COMD[2] "Output Average On Off" * = 0 (0x00) .....0.. "Output Normal" * COMD[1] "Sensor precharge voltage selection" * = 0 (0x00) ......0. "Selects internal * reference precharge * voltage" * COMD[0] "Snapshot option" * = 0 (0x00) .......0 "Enable live video output * after snapshot sequence" */ { 0x0c, 0x08 }, /* * 0D COME "Common Control E" * = 161 (0xA1) 10100001 * COME[7] "Output average option" * = 1 (0x01) 1....... "Output average of 4 pixels" * COME[6] "Anti-blooming control" * = 0 (0x00) .0...... "Off" * COME[5:3] "Reserved" * = 4 (0x04) ..100... * COME[2] "Clock output power down pin status" * = 0 (0x00) .....0.. "Tri-state data output pin * on power down" * COME[1] "Data output pin status selection at power down" * = 0 (0x00) ......0. "Tri-state VSYNC, PCLK, * HREF, and CHSYNC pins on * power down" * COME[0] "Auto zero circuit select" * = 1 (0x01) .......1 "On" */ { 0x0d, 0xa1 }, /* * 0E COMF "Common Control F" * = 112 (0x70) 01110000 * COMF[7] "System clock selection" * = 0 (0x00) 0....... "Use 24 MHz system clock" * COMF[6:4] "Reserved" * = 7 (0x07) .111.... * COMF[3] "Manual auto negative offset canceling selection" * = 0 (0x00) ....0... "Auto detect negative * offset and cancel it" * COMF[2:0] "Reserved" * = 0 (0x00) .....000 */ { 0x0e, 0x70 }, /* * 0F COMG "Common Control G" * = 66 (0x42) 01000010 * COMG[7] "Optical black output selection" * = 0 (0x00) 0....... "Disable" * COMG[6] "Black level calibrate selection" * = 1 (0x01) .1...... "Use optical black pixels * to calibrate" * COMG[5:4] "Reserved" * = 0 (0x00) ..00.... * COMG[3] "Channel offset adjustment" * = 0 (0x00) ....0... "Disable offset adjustment" * COMG[2] "ADC black level calibration option" * = 0 (0x00) .....0.. "Use B/G line and G/R * line to calibrate each * channel's black level" * COMG[1] "Reserved" * = 1 (0x01) ......1. * COMG[0] "ADC black level calibration enable" * = 0 (0x00) .......0 "Disable" */ { 0x0f, 0x42 }, /* * 14 COMJ "Common Control J" * = 198 (0xC6) 11000110 * COMJ[7:6] "AGC gain ceiling" * = 3 (0x03) 11...... "8x" * COMJ[5:4] "Reserved" * = 0 (0x00) ..00.... * COMJ[3] "Auto banding filter" * = 0 (0x00) ....0... "Banding filter is always * on off depending on * COMI[5] setting" * COMJ[2] "VSYNC drop option" * = 1 (0x01) .....1.. "SYNC is dropped if frame * data is dropped" * COMJ[1] "Frame data drop" * = 1 (0x01) ......1. "Drop frame data if * exposure is not within * tolerance. In AEC mode, * data is normally dropped * when data is out of * range." * COMJ[0] "Reserved" * = 0 (0x00) .......0 */ { 0x14, 0xc6 }, /* * 15 COMK "Common Control K" * = 2 (0x02) 00000010 * COMK[7] "CHSYNC pin output swap" * = 0 (0x00) 0....... "CHSYNC" * COMK[6] "HREF pin output swap" * = 0 (0x00) .0...... "HREF" * COMK[5] "PCLK output selection" * = 0 (0x00) ..0..... "PCLK always output" * COMK[4] "PCLK edge selection" * = 0 (0x00) ...0.... "Data valid on falling edge" * COMK[3] "HREF output polarity" * = 0 (0x00) ....0... "positive" * COMK[2] "Reserved" * = 0 (0x00) .....0.. * COMK[1] "VSYNC polarity" * = 1 (0x01) ......1. "negative" * COMK[0] "HSYNC polarity" * = 0 (0x00) .......0 "positive" */ { 0x15, 0x02 }, /* * 33 CHLF "Current Control" * = 9 (0x09) 00001001 * CHLF[7:6] "Sensor current control" * = 0 (0x00) 00...... * CHLF[5] "Sensor current range control" * = 0 (0x00) ..0..... "normal range" * CHLF[4] "Sensor current" * = 0 (0x00) ...0.... "normal current" * CHLF[3] "Sensor buffer current control" * = 1 (0x01) ....1... "half current" * CHLF[2] "Column buffer current control" * = 0 (0x00) .....0.. "normal current" * CHLF[1] "Analog DSP current control" * = 0 (0x00) ......0. "normal current" * CHLF[1] "ADC current control" * = 0 (0x00) ......0. "normal current" */ { 0x33, 0x09 }, /* * 34 VBLM "Blooming Control" * = 80 (0x50) 01010000 * VBLM[7] "Hard soft reset switch" * = 0 (0x00) 0....... "Hard reset" * VBLM[6:4] "Blooming voltage selection" * = 5 (0x05) .101.... * VBLM[3:0] "Sensor current control" * = 0 (0x00) ....0000 */ { 0x34, 0x50 }, /* * 36 VCHG "Sensor Precharge Voltage Control" * = 0 (0x00) 00000000 * VCHG[7] "Reserved" * = 0 (0x00) 0....... * VCHG[6:4] "Sensor precharge voltage control" * = 0 (0x00) .000.... * VCHG[3:0] "Sensor array common reference" * = 0 (0x00) ....0000 */ { 0x36, 0x00 }, /* * 37 ADC "ADC Reference Control" * = 4 (0x04) 00000100 * ADC[7:4] "Reserved" * = 0 (0x00) 0000.... * ADC[3] "ADC input signal range" * = 0 (0x00) ....0... "Input signal 1.0x" * ADC[2:0] "ADC range control" * = 4 (0x04) .....100 */ { 0x37, 0x04 }, /* * 38 ACOM "Analog Common Ground" * = 82 (0x52) 01010010 * ACOM[7] "Analog gain control" * = 0 (0x00) 0....... "Gain 1x" * ACOM[6] "Analog black level calibration" * = 1 (0x01) .1...... "On" * ACOM[5:0] "Reserved" * = 18 (0x12) ..010010 */ { 0x38, 0x52 }, /* * 3A FREFA "Internal Reference Adjustment" * = 0 (0x00) 00000000 * FREFA[7:0] "Range" * = 0 (0x00) 00000000 */ { 0x3a, 0x00 }, /* * 3C FVOPT "Internal Reference Adjustment" * = 31 (0x1F) 00011111 * FVOPT[7:0] "Range" * = 31 (0x1F) 00011111 */ { 0x3c, 0x1f }, /* * 44 Undocumented = 0 (0x00) 00000000 * 44[7:0] "It's a secret" * = 0 (0x00) 00000000 */ { 0x44, 0x00 }, /* * 40 Undocumented = 0 (0x00) 00000000 * 40[7:0] "It's a secret" * = 0 (0x00) 00000000 */ { 0x40, 0x00 }, /* * 41 Undocumented = 0 (0x00) 00000000 * 41[7:0] "It's a secret" * = 0 (0x00) 00000000 */ { 0x41, 0x00 }, /* * 42 Undocumented = 0 (0x00) 00000000 * 42[7:0] "It's a secret" * = 0 (0x00) 00000000 */ { 0x42, 0x00 }, /* * 43 Undocumented = 0 (0x00) 00000000 * 43[7:0] "It's a secret" * = 0 (0x00) 00000000 */ { 0x43, 0x00 }, /* * 45 Undocumented = 128 (0x80) 10000000 * 45[7:0] "It's a secret" * = 128 (0x80) 10000000 */ { 0x45, 0x80 }, /* * 48 Undocumented = 192 (0xC0) 11000000 * 48[7:0] "It's a secret" * = 192 (0xC0) 11000000 */ { 0x48, 0xc0 }, /* * 49 Undocumented = 25 (0x19) 00011001 * 49[7:0] "It's a secret" * = 25 (0x19) 00011001 */ { 0x49, 0x19 }, /* * 4B Undocumented = 128 (0x80) 10000000 * 4B[7:0] "It's a secret" * = 128 (0x80) 10000000 */ { 0x4b, 0x80 }, /* * 4D Undocumented = 196 (0xC4) 11000100 * 4D[7:0] "It's a secret" * = 196 (0xC4) 11000100 */ { 0x4d, 0xc4 }, /* * 35 VREF "Reference Voltage Control" * = 76 (0x4c) 01001100 * VREF[7:5] "Column high reference control" * = 2 (0x02) 010..... "higher voltage" * VREF[4:2] "Column low reference control" * = 3 (0x03) ...011.. "Highest voltage" * VREF[1:0] "Reserved" * = 0 (0x00) ......00 */ { 0x35, 0x4c }, /* * 3D Undocumented = 0 (0x00) 00000000 * 3D[7:0] "It's a secret" * = 0 (0x00) 00000000 */ { 0x3d, 0x00 }, /* * 3E Undocumented = 0 (0x00) 00000000 * 3E[7:0] "It's a secret" * = 0 (0x00) 00000000 */ { 0x3e, 0x00 }, /* * 3B FREFB "Internal Reference Adjustment" * = 24 (0x18) 00011000 * FREFB[7:0] "Range" * = 24 (0x18) 00011000 */ { 0x3b, 0x18 }, /* * 33 CHLF "Current Control" * = 25 (0x19) 00011001 * CHLF[7:6] "Sensor current control" * = 0 (0x00) 00...... * CHLF[5] "Sensor current range control" * = 0 (0x00) ..0..... "normal range" * CHLF[4] "Sensor current" * = 1 (0x01) ...1.... "double current" * CHLF[3] "Sensor buffer current control" * = 1 (0x01) ....1... "half current" * CHLF[2] "Column buffer current control" * = 0 (0x00) .....0.. "normal current" * CHLF[1] "Analog DSP current control" * = 0 (0x00) ......0. "normal current" * CHLF[1] "ADC current control" * = 0 (0x00) ......0. "normal current" */ { 0x33, 0x19 }, /* * 34 VBLM "Blooming Control" * = 90 (0x5A) 01011010 * VBLM[7] "Hard soft reset switch" * = 0 (0x00) 0....... "Hard reset" * VBLM[6:4] "Blooming voltage selection" * = 5 (0x05) .101.... * VBLM[3:0] "Sensor current control" * = 10 (0x0A) ....1010 */ { 0x34, 0x5a }, /* * 3B FREFB "Internal Reference Adjustment" * = 0 (0x00) 00000000 * FREFB[7:0] "Range" * = 0 (0x00) 00000000 */ { 0x3b, 0x00 }, /* * 33 CHLF "Current Control" * = 9 (0x09) 00001001 * CHLF[7:6] "Sensor current control" * = 0 (0x00) 00...... * CHLF[5] "Sensor current range control" * = 0 (0x00) ..0..... "normal range" * CHLF[4] "Sensor current" * = 0 (0x00) ...0.... "normal current" * CHLF[3] "Sensor buffer current control" * = 1 (0x01) ....1... "half current" * CHLF[2] "Column buffer current control" * = 0 (0x00) .....0.. "normal current" * CHLF[1] "Analog DSP current control" * = 0 (0x00) ......0. "normal current" * CHLF[1] "ADC current control" * = 0 (0x00) ......0. "normal current" */ { 0x33, 0x09 }, /* * 34 VBLM "Blooming Control" * = 80 (0x50) 01010000 * VBLM[7] "Hard soft reset switch" * = 0 (0x00) 0....... "Hard reset" * VBLM[6:4] "Blooming voltage selection" * = 5 (0x05) .101.... * VBLM[3:0] "Sensor current control" * = 0 (0x00) ....0000 */ { 0x34, 0x50 }, /* * 12 COMH "Common Control H" * = 64 (0x40) 01000000 * COMH[7] "SRST" * = 0 (0x00) 0....... "No-op" * COMH[6:4] "Resolution selection" * = 4 (0x04) .100.... "XGA" * COMH[3] "Master slave selection" * = 0 (0x00) ....0... "Master mode" * COMH[2] "Internal B/R channel option" * = 0 (0x00) .....0.. "B/R use same channel" * COMH[1] "Color bar test pattern" * = 0 (0x00) ......0. "Off" * COMH[0] "Reserved" * = 0 (0x00) .......0 */ { 0x12, 0x40 }, /* * 17 HREFST "Horizontal window start" * = 31 (0x1F) 00011111 * HREFST[7:0] "Horizontal window start, 8 MSBs" * = 31 (0x1F) 00011111 */ { 0x17, 0x1f }, /* * 18 HREFEND "Horizontal window end" * = 95 (0x5F) 01011111 * HREFEND[7:0] "Horizontal Window End, 8 MSBs" * = 95 (0x5F) 01011111 */ { 0x18, 0x5f }, /* * 19 VSTRT "Vertical window start" * = 0 (0x00) 00000000 * VSTRT[7:0] "Vertical Window Start, 8 MSBs" * = 0 (0x00) 00000000 */ { 0x19, 0x00 }, /* * 1A VEND "Vertical window end" * = 96 (0x60) 01100000 * VEND[7:0] "Vertical Window End, 8 MSBs" * = 96 (0x60) 01100000 */ { 0x1a, 0x60 }, /* * 32 COMM "Common Control M" * = 18 (0x12) 00010010 * COMM[7:6] "Pixel clock divide option" * = 0 (0x00) 00...... "/1" * COMM[5:3] "Horizontal window end position, 3 LSBs" * = 2 (0x02) ..010... * COMM[2:0] "Horizontal window start position, 3 LSBs" * = 2 (0x02) .....010 */ { 0x32, 0x12 }, /* * 03 COMA "Common Control A" * = 74 (0x4A) 01001010 * COMA[7:4] "AWB Update Threshold" * = 4 (0x04) 0100.... * COMA[3:2] "Vertical window end line control 2 LSBs" * = 2 (0x02) ....10.. * COMA[1:0] "Vertical window start line control 2 LSBs" * = 2 (0x02) ......10 */ { 0x03, 0x4a }, /* * 11 CLKRC "Clock Rate Control" * = 128 (0x80) 10000000 * CLKRC[7] "Internal frequency doublers on off seclection" * = 1 (0x01) 1....... "On" * CLKRC[6] "Digital video master slave selection" * = 0 (0x00) .0...... "Master mode, sensor * provides PCLK" * CLKRC[5:0] "Clock divider { CLK = PCLK/(1+CLKRC[5:0]) }" * = 0 (0x00) ..000000 */ { 0x11, 0x80 }, /* * 12 COMH "Common Control H" * = 0 (0x00) 00000000 * COMH[7] "SRST" * = 0 (0x00) 0....... "No-op" * COMH[6:4] "Resolution selection" * = 0 (0x00) .000.... "QXGA" * COMH[3] "Master slave selection" * = 0 (0x00) ....0... "Master mode" * COMH[2] "Internal B/R channel option" * = 0 (0x00) .....0.. "B/R use same channel" * COMH[1] "Color bar test pattern" * = 0 (0x00) ......0. "Off" * COMH[0] "Reserved" * = 0 (0x00) .......0 */ { 0x12, 0x00 }, /* * 12 COMH "Common Control H" * = 64 (0x40) 01000000 * COMH[7] "SRST" * = 0 (0x00) 0....... "No-op" * COMH[6:4] "Resolution selection" * = 4 (0x04) .100.... "XGA" * COMH[3] "Master slave selection" * = 0 (0x00) ....0... "Master mode" * COMH[2] "Internal B/R channel option" * = 0 (0x00) .....0.. "B/R use same channel" * COMH[1] "Color bar test pattern" * = 0 (0x00) ......0. "Off" * COMH[0] "Reserved" * = 0 (0x00) .......0 */ { 0x12, 0x40 }, /* * 17 HREFST "Horizontal window start" * = 31 (0x1F) 00011111 * HREFST[7:0] "Horizontal window start, 8 MSBs" * = 31 (0x1F) 00011111 */ { 0x17, 0x1f }, /* * 18 HREFEND "Horizontal window end" * = 95 (0x5F) 01011111 * HREFEND[7:0] "Horizontal Window End, 8 MSBs" * = 95 (0x5F) 01011111 */ { 0x18, 0x5f }, /* * 19 VSTRT "Vertical window start" * = 0 (0x00) 00000000 * VSTRT[7:0] "Vertical Window Start, 8 MSBs" * = 0 (0x00) 00000000 */ { 0x19, 0x00 }, /* * 1A VEND "Vertical window end" * = 96 (0x60) 01100000 * VEND[7:0] "Vertical Window End, 8 MSBs" * = 96 (0x60) 01100000 */ { 0x1a, 0x60 }, /* * 32 COMM "Common Control M" * = 18 (0x12) 00010010 * COMM[7:6] "Pixel clock divide option" * = 0 (0x00) 00...... "/1" * COMM[5:3] "Horizontal window end position, 3 LSBs" * = 2 (0x02) ..010... * COMM[2:0] "Horizontal window start position, 3 LSBs" * = 2 (0x02) .....010 */ { 0x32, 0x12 }, /* * 03 COMA "Common Control A" * = 74 (0x4A) 01001010 * COMA[7:4] "AWB Update Threshold" * = 4 (0x04) 0100.... * COMA[3:2] "Vertical window end line control 2 LSBs" * = 2 (0x02) ....10.. * COMA[1:0] "Vertical window start line control 2 LSBs" * = 2 (0x02) ......10 */ { 0x03, 0x4a }, /* * 02 RED "Red Gain Control" * = 175 (0xAF) 10101111 * RED[7] "Action" * = 1 (0x01) 1....... "gain = 1/(1+bitrev([6:0]))" * RED[6:0] "Value" * = 47 (0x2F) .0101111 */ { 0x02, 0xaf }, /* * 2D ADDVSL "VSYNC Pulse Width" * = 210 (0xD2) 11010010 * ADDVSL[7:0] "VSYNC pulse width, LSB" * = 210 (0xD2) 11010010 */ { 0x2d, 0xd2 }, /* * 00 GAIN = 24 (0x18) 00011000 * GAIN[7:6] "Reserved" * = 0 (0x00) 00...... * GAIN[5] "Double" * = 0 (0x00) ..0..... "False" * GAIN[4] "Double" * = 1 (0x01) ...1.... "True" * GAIN[3:0] "Range" * = 8 (0x08) ....1000 */ { 0x00, 0x18 }, /* * 01 BLUE "Blue Gain Control" * = 240 (0xF0) 11110000 * BLUE[7] "Action" * = 1 (0x01) 1....... "gain = 1/(1+bitrev([6:0]))" * BLUE[6:0] "Value" * = 112 (0x70) .1110000 */ { 0x01, 0xf0 }, /* * 10 AEC "Automatic Exposure Control" * = 10 (0x0A) 00001010 * AEC[7:0] "Automatic Exposure Control, 8 MSBs" * = 10 (0x0A) 00001010 */ { 0x10, 0x0a }, { 0xe1, 0x67 }, { 0xe3, 0x03 }, { 0xe4, 0x26 }, { 0xe5, 0x3e }, { 0xf8, 0x01 }, { 0xff, 0x01 }, }; static const struct ov_i2c_regvals norm_6x20[] = { { 0x12, 0x80 }, /* reset */ { 0x11, 0x01 }, { 0x03, 0x60 }, { 0x05, 0x7f }, /* For when autoadjust is off */ { 0x07, 0xa8 }, /* The ratio of 0x0c and 0x0d controls the white point */ { 0x0c, 0x24 }, { 0x0d, 0x24 }, { 0x0f, 0x15 }, /* COMS */ { 0x10, 0x75 }, /* AEC Exposure time */ { 0x12, 0x24 }, /* Enable AGC */ { 0x14, 0x04 }, /* 0x16: 0x06 helps frame stability with moving objects */ { 0x16, 0x06 }, /* { 0x20, 0x30 }, * Aperture correction enable */ { 0x26, 0xb2 }, /* BLC enable */ /* 0x28: 0x05 Selects RGB format if RGB on */ { 0x28, 0x05 }, { 0x2a, 0x04 }, /* Disable framerate adjust */ /* { 0x2b, 0xac }, * Framerate; Set 2a[7] first */ { 0x2d, 0x85 }, { 0x33, 0xa0 }, /* Color Processing Parameter */ { 0x34, 0xd2 }, /* Max A/D range */ { 0x38, 0x8b }, { 0x39, 0x40 }, { 0x3c, 0x39 }, /* Enable AEC mode changing */ { 0x3c, 0x3c }, /* Change AEC mode */ { 0x3c, 0x24 }, /* Disable AEC mode changing */ { 0x3d, 0x80 }, /* These next two registers (0x4a, 0x4b) are undocumented. * They control the color balance */ { 0x4a, 0x80 }, { 0x4b, 0x80 }, { 0x4d, 0xd2 }, /* This reduces noise a bit */ { 0x4e, 0xc1 }, { 0x4f, 0x04 }, /* Do 50-53 have any effect? */ /* Toggle 0x12[2] off and on here? */ }; static const struct ov_i2c_regvals norm_6x30[] = { { 0x12, 0x80 }, /* Reset */ { 0x00, 0x1f }, /* Gain */ { 0x01, 0x99 }, /* Blue gain */ { 0x02, 0x7c }, /* Red gain */ { 0x03, 0xc0 }, /* Saturation */ { 0x05, 0x0a }, /* Contrast */ { 0x06, 0x95 }, /* Brightness */ { 0x07, 0x2d }, /* Sharpness */ { 0x0c, 0x20 }, { 0x0d, 0x20 }, { 0x0e, 0xa0 }, /* Was 0x20, bit7 enables a 2x gain which we need */ { 0x0f, 0x05 }, { 0x10, 0x9a }, { 0x11, 0x00 }, /* Pixel clock = fastest */ { 0x12, 0x24 }, /* Enable AGC and AWB */ { 0x13, 0x21 }, { 0x14, 0x80 }, { 0x15, 0x01 }, { 0x16, 0x03 }, { 0x17, 0x38 }, { 0x18, 0xea }, { 0x19, 0x04 }, { 0x1a, 0x93 }, { 0x1b, 0x00 }, { 0x1e, 0xc4 }, { 0x1f, 0x04 }, { 0x20, 0x20 }, { 0x21, 0x10 }, { 0x22, 0x88 }, { 0x23, 0xc0 }, /* Crystal circuit power level */ { 0x25, 0x9a }, /* Increase AEC black ratio */ { 0x26, 0xb2 }, /* BLC enable */ { 0x27, 0xa2 }, { 0x28, 0x00 }, { 0x29, 0x00 }, { 0x2a, 0x84 }, /* 60 Hz power */ { 0x2b, 0xa8 }, /* 60 Hz power */ { 0x2c, 0xa0 }, { 0x2d, 0x95 }, /* Enable auto-brightness */ { 0x2e, 0x88 }, { 0x33, 0x26 }, { 0x34, 0x03 }, { 0x36, 0x8f }, { 0x37, 0x80 }, { 0x38, 0x83 }, { 0x39, 0x80 }, { 0x3a, 0x0f }, { 0x3b, 0x3c }, { 0x3c, 0x1a }, { 0x3d, 0x80 }, { 0x3e, 0x80 }, { 0x3f, 0x0e }, { 0x40, 0x00 }, /* White bal */ { 0x41, 0x00 }, /* White bal */ { 0x42, 0x80 }, { 0x43, 0x3f }, /* White bal */ { 0x44, 0x80 }, { 0x45, 0x20 }, { 0x46, 0x20 }, { 0x47, 0x80 }, { 0x48, 0x7f }, { 0x49, 0x00 }, { 0x4a, 0x00 }, { 0x4b, 0x80 }, { 0x4c, 0xd0 }, { 0x4d, 0x10 }, /* U = 0.563u, V = 0.714v */ { 0x4e, 0x40 }, { 0x4f, 0x07 }, /* UV avg., col. killer: max */ { 0x50, 0xff }, { 0x54, 0x23 }, /* Max AGC gain: 18dB */ { 0x55, 0xff }, { 0x56, 0x12 }, { 0x57, 0x81 }, { 0x58, 0x75 }, { 0x59, 0x01 }, /* AGC dark current comp.: +1 */ { 0x5a, 0x2c }, { 0x5b, 0x0f }, /* AWB chrominance levels */ { 0x5c, 0x10 }, { 0x3d, 0x80 }, { 0x27, 0xa6 }, { 0x12, 0x20 }, /* Toggle AWB */ { 0x12, 0x24 }, }; /* Lawrence Glaister <lg@jfm.bc.ca> reports: * * Register 0x0f in the 7610 has the following effects: * * 0x85 (AEC method 1): Best overall, good contrast range * 0x45 (AEC method 2): Very overexposed * 0xa5 (spec sheet default): Ok, but the black level is * shifted resulting in loss of contrast * 0x05 (old driver setting): very overexposed, too much * contrast */ static const struct ov_i2c_regvals norm_7610[] = { { 0x10, 0xff }, { 0x16, 0x06 }, { 0x28, 0x24 }, { 0x2b, 0xac }, { 0x12, 0x00 }, { 0x38, 0x81 }, { 0x28, 0x24 }, /* 0c */ { 0x0f, 0x85 }, /* lg's setting */ { 0x15, 0x01 }, { 0x20, 0x1c }, { 0x23, 0x2a }, { 0x24, 0x10 }, { 0x25, 0x8a }, { 0x26, 0xa2 }, { 0x27, 0xc2 }, { 0x2a, 0x04 }, { 0x2c, 0xfe }, { 0x2d, 0x93 }, { 0x30, 0x71 }, { 0x31, 0x60 }, { 0x32, 0x26 }, { 0x33, 0x20 }, { 0x34, 0x48 }, { 0x12, 0x24 }, { 0x11, 0x01 }, { 0x0c, 0x24 }, { 0x0d, 0x24 }, }; static const struct ov_i2c_regvals norm_7620[] = { { 0x12, 0x80 }, /* reset */ { 0x00, 0x00 }, /* gain */ { 0x01, 0x80 }, /* blue gain */ { 0x02, 0x80 }, /* red gain */ { 0x03, 0xc0 }, /* OV7670_R03_VREF */ { 0x06, 0x60 }, { 0x07, 0x00 }, { 0x0c, 0x24 }, { 0x0c, 0x24 }, { 0x0d, 0x24 }, { 0x11, 0x01 }, { 0x12, 0x24 }, { 0x13, 0x01 }, { 0x14, 0x84 }, { 0x15, 0x01 }, { 0x16, 0x03 }, { 0x17, 0x2f }, { 0x18, 0xcf }, { 0x19, 0x06 }, { 0x1a, 0xf5 }, { 0x1b, 0x00 }, { 0x20, 0x18 }, { 0x21, 0x80 }, { 0x22, 0x80 }, { 0x23, 0x00 }, { 0x26, 0xa2 }, { 0x27, 0xea }, { 0x28, 0x22 }, /* Was 0x20, bit1 enables a 2x gain which we need */ { 0x29, 0x00 }, { 0x2a, 0x10 }, { 0x2b, 0x00 }, { 0x2c, 0x88 }, { 0x2d, 0x91 }, { 0x2e, 0x80 }, { 0x2f, 0x44 }, { 0x60, 0x27 }, { 0x61, 0x02 }, { 0x62, 0x5f }, { 0x63, 0xd5 }, { 0x64, 0x57 }, { 0x65, 0x83 }, { 0x66, 0x55 }, { 0x67, 0x92 }, { 0x68, 0xcf }, { 0x69, 0x76 }, { 0x6a, 0x22 }, { 0x6b, 0x00 }, { 0x6c, 0x02 }, { 0x6d, 0x44 }, { 0x6e, 0x80 }, { 0x6f, 0x1d }, { 0x70, 0x8b }, { 0x71, 0x00 }, { 0x72, 0x14 }, { 0x73, 0x54 }, { 0x74, 0x00 }, { 0x75, 0x8e }, { 0x76, 0x00 }, { 0x77, 0xff }, { 0x78, 0x80 }, { 0x79, 0x80 }, { 0x7a, 0x80 }, { 0x7b, 0xe2 }, { 0x7c, 0x00 }, }; /* 7640 and 7648. The defaults should be OK for most registers. */ static const struct ov_i2c_regvals norm_7640[] = { { 0x12, 0x80 }, { 0x12, 0x14 }, }; static const struct ov_regvals init_519_ov7660[] = { { 0x5d, 0x03 }, /* Turn off suspend mode */ { 0x53, 0x9b }, /* 0x9f enables the (unused) microcontroller */ { 0x54, 0x0f }, /* bit2 (jpeg enable) */ { 0xa2, 0x20 }, /* a2-a5 are undocumented */ { 0xa3, 0x18 }, { 0xa4, 0x04 }, { 0xa5, 0x28 }, { 0x37, 0x00 }, /* SetUsbInit */ { 0x55, 0x02 }, /* 4.096 Mhz audio clock */ /* Enable both fields, YUV Input, disable defect comp (why?) */ { 0x20, 0x0c }, /* 0x0d does U <-> V swap */ { 0x21, 0x38 }, { 0x22, 0x1d }, { 0x17, 0x50 }, /* undocumented */ { 0x37, 0x00 }, /* undocumented */ { 0x40, 0xff }, /* I2C timeout counter */ { 0x46, 0x00 }, /* I2C clock prescaler */ }; static const struct ov_i2c_regvals norm_7660[] = { {OV7670_R12_COM7, OV7670_COM7_RESET}, {OV7670_R11_CLKRC, 0x81}, {0x92, 0x00}, /* DM_LNL */ {0x93, 0x00}, /* DM_LNH */ {0x9d, 0x4c}, /* BD50ST */ {0x9e, 0x3f}, /* BD60ST */ {OV7670_R3B_COM11, 0x02}, {OV7670_R13_COM8, 0xf5}, {OV7670_R10_AECH, 0x00}, {OV7670_R00_GAIN, 0x00}, {OV7670_R01_BLUE, 0x7c}, {OV7670_R02_RED, 0x9d}, {OV7670_R12_COM7, 0x00}, {OV7670_R04_COM1, 00}, {OV7670_R18_HSTOP, 0x01}, {OV7670_R17_HSTART, 0x13}, {OV7670_R32_HREF, 0x92}, {OV7670_R19_VSTART, 0x02}, {OV7670_R1A_VSTOP, 0x7a}, {OV7670_R03_VREF, 0x00}, {OV7670_R0E_COM5, 0x04}, {OV7670_R0F_COM6, 0x62}, {OV7670_R15_COM10, 0x00}, {0x16, 0x02}, /* RSVD */ {0x1b, 0x00}, /* PSHFT */ {OV7670_R1E_MVFP, 0x01}, {0x29, 0x3c}, /* RSVD */ {0x33, 0x00}, /* CHLF */ {0x34, 0x07}, /* ARBLM */ {0x35, 0x84}, /* RSVD */ {0x36, 0x00}, /* RSVD */ {0x37, 0x04}, /* ADC */ {0x39, 0x43}, /* OFON */ {OV7670_R3A_TSLB, 0x00}, {OV7670_R3C_COM12, 0x6c}, {OV7670_R3D_COM13, 0x98}, {OV7670_R3F_EDGE, 0x23}, {OV7670_R40_COM15, 0xc1}, {OV7670_R41_COM16, 0x22}, {0x6b, 0x0a}, /* DBLV */ {0xa1, 0x08}, /* RSVD */ {0x69, 0x80}, /* HV */ {0x43, 0xf0}, /* RSVD.. */ {0x44, 0x10}, {0x45, 0x78}, {0x46, 0xa8}, {0x47, 0x60}, {0x48, 0x80}, {0x59, 0xba}, {0x5a, 0x9a}, {0x5b, 0x22}, {0x5c, 0xb9}, {0x5d, 0x9b}, {0x5e, 0x10}, {0x5f, 0xe0}, {0x60, 0x85}, {0x61, 0x60}, {0x9f, 0x9d}, /* RSVD */ {0xa0, 0xa0}, /* DSPC2 */ {0x4f, 0x60}, /* matrix */ {0x50, 0x64}, {0x51, 0x04}, {0x52, 0x18}, {0x53, 0x3c}, {0x54, 0x54}, {0x55, 0x40}, {0x56, 0x40}, {0x57, 0x40}, {0x58, 0x0d}, /* matrix sign */ {0x8b, 0xcc}, /* RSVD */ {0x8c, 0xcc}, {0x8d, 0xcf}, {0x6c, 0x40}, /* gamma curve */ {0x6d, 0xe0}, {0x6e, 0xa0}, {0x6f, 0x80}, {0x70, 0x70}, {0x71, 0x80}, {0x72, 0x60}, {0x73, 0x60}, {0x74, 0x50}, {0x75, 0x40}, {0x76, 0x38}, {0x77, 0x3c}, {0x78, 0x32}, {0x79, 0x1a}, {0x7a, 0x28}, {0x7b, 0x24}, {0x7c, 0x04}, /* gamma curve */ {0x7d, 0x12}, {0x7e, 0x26}, {0x7f, 0x46}, {0x80, 0x54}, {0x81, 0x64}, {0x82, 0x70}, {0x83, 0x7c}, {0x84, 0x86}, {0x85, 0x8e}, {0x86, 0x9c}, {0x87, 0xab}, {0x88, 0xc4}, {0x89, 0xd1}, {0x8a, 0xe5}, {OV7670_R14_COM9, 0x1e}, {OV7670_R24_AEW, 0x80}, {OV7670_R25_AEB, 0x72}, {OV7670_R26_VPT, 0xb3}, {0x62, 0x80}, /* LCC1 */ {0x63, 0x80}, /* LCC2 */ {0x64, 0x06}, /* LCC3 */ {0x65, 0x00}, /* LCC4 */ {0x66, 0x01}, /* LCC5 */ {0x94, 0x0e}, /* RSVD.. */ {0x95, 0x14}, {OV7670_R13_COM8, OV7670_COM8_FASTAEC | OV7670_COM8_AECSTEP | OV7670_COM8_BFILT | 0x10 | OV7670_COM8_AGC | OV7670_COM8_AWB | OV7670_COM8_AEC}, {0xa1, 0xc8} }; static const struct ov_i2c_regvals norm_9600[] = { {0x12, 0x80}, {0x0c, 0x28}, {0x11, 0x80}, {0x13, 0xb5}, {0x14, 0x3e}, {0x1b, 0x04}, {0x24, 0xb0}, {0x25, 0x90}, {0x26, 0x94}, {0x35, 0x90}, {0x37, 0x07}, {0x38, 0x08}, {0x01, 0x8e}, {0x02, 0x85} }; /* 7670. Defaults taken from OmniVision provided data, * as provided by Jonathan Corbet of OLPC */ static const struct ov_i2c_regvals norm_7670[] = { { OV7670_R12_COM7, OV7670_COM7_RESET }, { OV7670_R3A_TSLB, 0x04 }, /* OV */ { OV7670_R12_COM7, OV7670_COM7_FMT_VGA }, /* VGA */ { OV7670_R11_CLKRC, 0x01 }, /* * Set the hardware window. These values from OV don't entirely * make sense - hstop is less than hstart. But they work... */ { OV7670_R17_HSTART, 0x13 }, { OV7670_R18_HSTOP, 0x01 }, { OV7670_R32_HREF, 0xb6 }, { OV7670_R19_VSTART, 0x02 }, { OV7670_R1A_VSTOP, 0x7a }, { OV7670_R03_VREF, 0x0a }, { OV7670_R0C_COM3, 0x00 }, { OV7670_R3E_COM14, 0x00 }, /* Mystery scaling numbers */ { 0x70, 0x3a }, { 0x71, 0x35 }, { 0x72, 0x11 }, { 0x73, 0xf0 }, { 0xa2, 0x02 }, /* { OV7670_R15_COM10, 0x0 }, */ /* Gamma curve values */ { 0x7a, 0x20 }, { 0x7b, 0x10 }, { 0x7c, 0x1e }, { 0x7d, 0x35 }, { 0x7e, 0x5a }, { 0x7f, 0x69 }, { 0x80, 0x76 }, { 0x81, 0x80 }, { 0x82, 0x88 }, { 0x83, 0x8f }, { 0x84, 0x96 }, { 0x85, 0xa3 }, { 0x86, 0xaf }, { 0x87, 0xc4 }, { 0x88, 0xd7 }, { 0x89, 0xe8 }, /* AGC and AEC parameters. Note we start by disabling those features, then turn them only after tweaking the values. */ { OV7670_R13_COM8, OV7670_COM8_FASTAEC | OV7670_COM8_AECSTEP | OV7670_COM8_BFILT }, { OV7670_R00_GAIN, 0x00 }, { OV7670_R10_AECH, 0x00 }, { OV7670_R0D_COM4, 0x40 }, /* magic reserved bit */ { OV7670_R14_COM9, 0x18 }, /* 4x gain + magic rsvd bit */ { OV7670_RA5_BD50MAX, 0x05 }, { OV7670_RAB_BD60MAX, 0x07 }, { OV7670_R24_AEW, 0x95 }, { OV7670_R25_AEB, 0x33 }, { OV7670_R26_VPT, 0xe3 }, { OV7670_R9F_HAECC1, 0x78 }, { OV7670_RA0_HAECC2, 0x68 }, { 0xa1, 0x03 }, /* magic */ { OV7670_RA6_HAECC3, 0xd8 }, { OV7670_RA7_HAECC4, 0xd8 }, { OV7670_RA8_HAECC5, 0xf0 }, { OV7670_RA9_HAECC6, 0x90 }, { OV7670_RAA_HAECC7, 0x94 }, { OV7670_R13_COM8, OV7670_COM8_FASTAEC | OV7670_COM8_AECSTEP | OV7670_COM8_BFILT | OV7670_COM8_AGC | OV7670_COM8_AEC }, /* Almost all of these are magic "reserved" values. */ { OV7670_R0E_COM5, 0x61 }, { OV7670_R0F_COM6, 0x4b }, { 0x16, 0x02 }, { OV7670_R1E_MVFP, 0x07 }, { 0x21, 0x02 }, { 0x22, 0x91 }, { 0x29, 0x07 }, { 0x33, 0x0b }, { 0x35, 0x0b }, { 0x37, 0x1d }, { 0x38, 0x71 }, { 0x39, 0x2a }, { OV7670_R3C_COM12, 0x78 }, { 0x4d, 0x40 }, { 0x4e, 0x20 }, { OV7670_R69_GFIX, 0x00 }, { 0x6b, 0x4a }, { 0x74, 0x10 }, { 0x8d, 0x4f }, { 0x8e, 0x00 }, { 0x8f, 0x00 }, { 0x90, 0x00 }, { 0x91, 0x00 }, { 0x96, 0x00 }, { 0x9a, 0x00 }, { 0xb0, 0x84 }, { 0xb1, 0x0c }, { 0xb2, 0x0e }, { 0xb3, 0x82 }, { 0xb8, 0x0a }, /* More reserved magic, some of which tweaks white balance */ { 0x43, 0x0a }, { 0x44, 0xf0 }, { 0x45, 0x34 }, { 0x46, 0x58 }, { 0x47, 0x28 }, { 0x48, 0x3a }, { 0x59, 0x88 }, { 0x5a, 0x88 }, { 0x5b, 0x44 }, { 0x5c, 0x67 }, { 0x5d, 0x49 }, { 0x5e, 0x0e }, { 0x6c, 0x0a }, { 0x6d, 0x55 }, { 0x6e, 0x11 }, { 0x6f, 0x9f }, /* "9e for advance AWB" */ { 0x6a, 0x40 }, { OV7670_R01_BLUE, 0x40 }, { OV7670_R02_RED, 0x60 }, { OV7670_R13_COM8, OV7670_COM8_FASTAEC | OV7670_COM8_AECSTEP | OV7670_COM8_BFILT | OV7670_COM8_AGC | OV7670_COM8_AEC | OV7670_COM8_AWB }, /* Matrix coefficients */ { 0x4f, 0x80 }, { 0x50, 0x80 }, { 0x51, 0x00 }, { 0x52, 0x22 }, { 0x53, 0x5e }, { 0x54, 0x80 }, { 0x58, 0x9e }, { OV7670_R41_COM16, OV7670_COM16_AWBGAIN }, { OV7670_R3F_EDGE, 0x00 }, { 0x75, 0x05 }, { 0x76, 0xe1 }, { 0x4c, 0x00 }, { 0x77, 0x01 }, { OV7670_R3D_COM13, OV7670_COM13_GAMMA | OV7670_COM13_UVSAT | 2}, /* was 3 */ { 0x4b, 0x09 }, { 0xc9, 0x60 }, { OV7670_R41_COM16, 0x38 }, { 0x56, 0x40 }, { 0x34, 0x11 }, { OV7670_R3B_COM11, OV7670_COM11_EXP|OV7670_COM11_HZAUTO }, { 0xa4, 0x88 }, { 0x96, 0x00 }, { 0x97, 0x30 }, { 0x98, 0x20 }, { 0x99, 0x30 }, { 0x9a, 0x84 }, { 0x9b, 0x29 }, { 0x9c, 0x03 }, { 0x9d, 0x4c }, { 0x9e, 0x3f }, { 0x78, 0x04 }, /* Extra-weird stuff. Some sort of multiplexor register */ { 0x79, 0x01 }, { 0xc8, 0xf0 }, { 0x79, 0x0f }, { 0xc8, 0x00 }, { 0x79, 0x10 }, { 0xc8, 0x7e }, { 0x79, 0x0a }, { 0xc8, 0x80 }, { 0x79, 0x0b }, { 0xc8, 0x01 }, { 0x79, 0x0c }, { 0xc8, 0x0f }, { 0x79, 0x0d }, { 0xc8, 0x20 }, { 0x79, 0x09 }, { 0xc8, 0x80 }, { 0x79, 0x02 }, { 0xc8, 0xc0 }, { 0x79, 0x03 }, { 0xc8, 0x40 }, { 0x79, 0x05 }, { 0xc8, 0x30 }, { 0x79, 0x26 }, }; static const struct ov_i2c_regvals norm_8610[] = { { 0x12, 0x80 }, { 0x00, 0x00 }, { 0x01, 0x80 }, { 0x02, 0x80 }, { 0x03, 0xc0 }, { 0x04, 0x30 }, { 0x05, 0x30 }, /* was 0x10, new from windrv 090403 */ { 0x06, 0x70 }, /* was 0x80, new from windrv 090403 */ { 0x0a, 0x86 }, { 0x0b, 0xb0 }, { 0x0c, 0x20 }, { 0x0d, 0x20 }, { 0x11, 0x01 }, { 0x12, 0x25 }, { 0x13, 0x01 }, { 0x14, 0x04 }, { 0x15, 0x01 }, /* Lin and Win think different about UV order */ { 0x16, 0x03 }, { 0x17, 0x38 }, /* was 0x2f, new from windrv 090403 */ { 0x18, 0xea }, /* was 0xcf, new from windrv 090403 */ { 0x19, 0x02 }, /* was 0x06, new from windrv 090403 */ { 0x1a, 0xf5 }, { 0x1b, 0x00 }, { 0x20, 0xd0 }, /* was 0x90, new from windrv 090403 */ { 0x23, 0xc0 }, /* was 0x00, new from windrv 090403 */ { 0x24, 0x30 }, /* was 0x1d, new from windrv 090403 */ { 0x25, 0x50 }, /* was 0x57, new from windrv 090403 */ { 0x26, 0xa2 }, { 0x27, 0xea }, { 0x28, 0x00 }, { 0x29, 0x00 }, { 0x2a, 0x80 }, { 0x2b, 0xc8 }, /* was 0xcc, new from windrv 090403 */ { 0x2c, 0xac }, { 0x2d, 0x45 }, /* was 0xd5, new from windrv 090403 */ { 0x2e, 0x80 }, { 0x2f, 0x14 }, /* was 0x01, new from windrv 090403 */ { 0x4c, 0x00 }, { 0x4d, 0x30 }, /* was 0x10, new from windrv 090403 */ { 0x60, 0x02 }, /* was 0x01, new from windrv 090403 */ { 0x61, 0x00 }, /* was 0x09, new from windrv 090403 */ { 0x62, 0x5f }, /* was 0xd7, new from windrv 090403 */ { 0x63, 0xff }, { 0x64, 0x53 }, /* new windrv 090403 says 0x57, * maybe that's wrong */ { 0x65, 0x00 }, { 0x66, 0x55 }, { 0x67, 0xb0 }, { 0x68, 0xc0 }, /* was 0xaf, new from windrv 090403 */ { 0x69, 0x02 }, { 0x6a, 0x22 }, { 0x6b, 0x00 }, { 0x6c, 0x99 }, /* was 0x80, old windrv says 0x00, but * deleting bit7 colors the first images red */ { 0x6d, 0x11 }, /* was 0x00, new from windrv 090403 */ { 0x6e, 0x11 }, /* was 0x00, new from windrv 090403 */ { 0x6f, 0x01 }, { 0x70, 0x8b }, { 0x71, 0x00 }, { 0x72, 0x14 }, { 0x73, 0x54 }, { 0x74, 0x00 },/* 0x60? - was 0x00, new from windrv 090403 */ { 0x75, 0x0e }, { 0x76, 0x02 }, /* was 0x02, new from windrv 090403 */ { 0x77, 0xff }, { 0x78, 0x80 }, { 0x79, 0x80 }, { 0x7a, 0x80 }, { 0x7b, 0x10 }, /* was 0x13, new from windrv 090403 */ { 0x7c, 0x00 }, { 0x7d, 0x08 }, /* was 0x09, new from windrv 090403 */ { 0x7e, 0x08 }, /* was 0xc0, new from windrv 090403 */ { 0x7f, 0xfb }, { 0x80, 0x28 }, { 0x81, 0x00 }, { 0x82, 0x23 }, { 0x83, 0x0b }, { 0x84, 0x00 }, { 0x85, 0x62 }, /* was 0x61, new from windrv 090403 */ { 0x86, 0xc9 }, { 0x87, 0x00 }, { 0x88, 0x00 }, { 0x89, 0x01 }, { 0x12, 0x20 }, { 0x12, 0x25 }, /* was 0x24, new from windrv 090403 */ }; static unsigned char ov7670_abs_to_sm(unsigned char v) { if (v > 127) return v & 0x7f; return (128 - v) | 0x80; } /* Write a OV519 register */ static void reg_w(struct sd *sd, u16 index, u16 value) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int ret, req = 0; if (sd->gspca_dev.usb_err < 0) return; /* Avoid things going to fast for the bridge with a xhci host */ udelay(150); switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: req = 2; break; case BRIDGE_OVFX2: req = 0x0a; fallthrough; case BRIDGE_W9968CF: gspca_dbg(gspca_dev, D_USBO, "SET %02x %04x %04x\n", req, value, index); ret = usb_control_msg(sd->gspca_dev.dev, usb_sndctrlpipe(sd->gspca_dev.dev, 0), req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); goto leave; default: req = 1; } gspca_dbg(gspca_dev, D_USBO, "SET %02x 0000 %04x %02x\n", req, index, value); sd->gspca_dev.usb_buf[0] = value; ret = usb_control_msg(sd->gspca_dev.dev, usb_sndctrlpipe(sd->gspca_dev.dev, 0), req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, sd->gspca_dev.usb_buf, 1, 500); leave: if (ret < 0) { gspca_err(gspca_dev, "reg_w %02x failed %d\n", index, ret); sd->gspca_dev.usb_err = ret; return; } } /* Read from a OV519 register, note not valid for the w9968cf!! */ /* returns: negative is error, pos or zero is data */ static int reg_r(struct sd *sd, u16 index) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int ret; int req; if (sd->gspca_dev.usb_err < 0) return -1; switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: req = 3; break; case BRIDGE_OVFX2: req = 0x0b; break; default: req = 1; } /* Avoid things going to fast for the bridge with a xhci host */ udelay(150); ret = usb_control_msg(sd->gspca_dev.dev, usb_rcvctrlpipe(sd->gspca_dev.dev, 0), req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, sd->gspca_dev.usb_buf, 1, 500); if (ret >= 0) { ret = sd->gspca_dev.usb_buf[0]; gspca_dbg(gspca_dev, D_USBI, "GET %02x 0000 %04x %02x\n", req, index, ret); } else { gspca_err(gspca_dev, "reg_r %02x failed %d\n", index, ret); sd->gspca_dev.usb_err = ret; /* * Make sure the result is zeroed to avoid uninitialized * values. */ gspca_dev->usb_buf[0] = 0; } return ret; } /* Read 8 values from a OV519 register */ static int reg_r8(struct sd *sd, u16 index) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int ret; if (sd->gspca_dev.usb_err < 0) return -1; /* Avoid things going to fast for the bridge with a xhci host */ udelay(150); ret = usb_control_msg(sd->gspca_dev.dev, usb_rcvctrlpipe(sd->gspca_dev.dev, 0), 1, /* REQ_IO */ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, sd->gspca_dev.usb_buf, 8, 500); if (ret >= 0) { ret = sd->gspca_dev.usb_buf[0]; } else { gspca_err(gspca_dev, "reg_r8 %02x failed %d\n", index, ret); sd->gspca_dev.usb_err = ret; /* * Make sure the buffer is zeroed to avoid uninitialized * values. */ memset(gspca_dev->usb_buf, 0, 8); } return ret; } /* * Writes bits at positions specified by mask to an OV51x reg. Bits that are in * the same position as 1's in "mask" are cleared and set to "value". Bits * that are in the same position as 0's in "mask" are preserved, regardless * of their respective state in "value". */ static void reg_w_mask(struct sd *sd, u16 index, u8 value, u8 mask) { int ret; u8 oldval; if (mask != 0xff) { value &= mask; /* Enforce mask on value */ ret = reg_r(sd, index); if (ret < 0) return; oldval = ret & ~mask; /* Clear the masked bits */ value |= oldval; /* Set the desired bits */ } reg_w(sd, index, value); } /* * Writes multiple (n) byte value to a single register. Only valid with certain * registers (0x30 and 0xc4 - 0xce). */ static void ov518_reg_w32(struct sd *sd, u16 index, u32 value, int n) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int ret; if (sd->gspca_dev.usb_err < 0) return; *((__le32 *) sd->gspca_dev.usb_buf) = __cpu_to_le32(value); /* Avoid things going to fast for the bridge with a xhci host */ udelay(150); ret = usb_control_msg(sd->gspca_dev.dev, usb_sndctrlpipe(sd->gspca_dev.dev, 0), 1 /* REG_IO */, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, sd->gspca_dev.usb_buf, n, 500); if (ret < 0) { gspca_err(gspca_dev, "reg_w32 %02x failed %d\n", index, ret); sd->gspca_dev.usb_err = ret; } } static void ov511_i2c_w(struct sd *sd, u8 reg, u8 value) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int rc, retries; gspca_dbg(gspca_dev, D_USBO, "ov511_i2c_w %02x %02x\n", reg, value); /* Three byte write cycle */ for (retries = 6; ; ) { /* Select camera register */ reg_w(sd, R51x_I2C_SADDR_3, reg); /* Write "value" to I2C data port of OV511 */ reg_w(sd, R51x_I2C_DATA, value); /* Initiate 3-byte write cycle */ reg_w(sd, R511_I2C_CTL, 0x01); do { rc = reg_r(sd, R511_I2C_CTL); } while (rc > 0 && ((rc & 1) == 0)); /* Retry until idle */ if (rc < 0) return; if ((rc & 2) == 0) /* Ack? */ break; if (--retries < 0) { gspca_dbg(gspca_dev, D_USBO, "i2c write retries exhausted\n"); return; } } } static int ov511_i2c_r(struct sd *sd, u8 reg) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int rc, value, retries; /* Two byte write cycle */ for (retries = 6; ; ) { /* Select camera register */ reg_w(sd, R51x_I2C_SADDR_2, reg); /* Initiate 2-byte write cycle */ reg_w(sd, R511_I2C_CTL, 0x03); do { rc = reg_r(sd, R511_I2C_CTL); } while (rc > 0 && ((rc & 1) == 0)); /* Retry until idle */ if (rc < 0) return rc; if ((rc & 2) == 0) /* Ack? */ break; /* I2C abort */ reg_w(sd, R511_I2C_CTL, 0x10); if (--retries < 0) { gspca_dbg(gspca_dev, D_USBI, "i2c write retries exhausted\n"); return -1; } } /* Two byte read cycle */ for (retries = 6; ; ) { /* Initiate 2-byte read cycle */ reg_w(sd, R511_I2C_CTL, 0x05); do { rc = reg_r(sd, R511_I2C_CTL); } while (rc > 0 && ((rc & 1) == 0)); /* Retry until idle */ if (rc < 0) return rc; if ((rc & 2) == 0) /* Ack? */ break; /* I2C abort */ reg_w(sd, R511_I2C_CTL, 0x10); if (--retries < 0) { gspca_dbg(gspca_dev, D_USBI, "i2c read retries exhausted\n"); return -1; } } value = reg_r(sd, R51x_I2C_DATA); gspca_dbg(gspca_dev, D_USBI, "ov511_i2c_r %02x %02x\n", reg, value); /* This is needed to make i2c_w() work */ reg_w(sd, R511_I2C_CTL, 0x05); return value; } /* * The OV518 I2C I/O procedure is different, hence, this function. * This is normally only called from i2c_w(). Note that this function * always succeeds regardless of whether the sensor is present and working. */ static void ov518_i2c_w(struct sd *sd, u8 reg, u8 value) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; gspca_dbg(gspca_dev, D_USBO, "ov518_i2c_w %02x %02x\n", reg, value); /* Select camera register */ reg_w(sd, R51x_I2C_SADDR_3, reg); /* Write "value" to I2C data port of OV511 */ reg_w(sd, R51x_I2C_DATA, value); /* Initiate 3-byte write cycle */ reg_w(sd, R518_I2C_CTL, 0x01); /* wait for write complete */ msleep(4); reg_r8(sd, R518_I2C_CTL); } /* * returns: negative is error, pos or zero is data * * The OV518 I2C I/O procedure is different, hence, this function. * This is normally only called from i2c_r(). Note that this function * always succeeds regardless of whether the sensor is present and working. */ static int ov518_i2c_r(struct sd *sd, u8 reg) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int value; /* Select camera register */ reg_w(sd, R51x_I2C_SADDR_2, reg); /* Initiate 2-byte write cycle */ reg_w(sd, R518_I2C_CTL, 0x03); reg_r8(sd, R518_I2C_CTL); /* Initiate 2-byte read cycle */ reg_w(sd, R518_I2C_CTL, 0x05); reg_r8(sd, R518_I2C_CTL); value = reg_r(sd, R51x_I2C_DATA); gspca_dbg(gspca_dev, D_USBI, "ov518_i2c_r %02x %02x\n", reg, value); return value; } static void ovfx2_i2c_w(struct sd *sd, u8 reg, u8 value) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int ret; if (sd->gspca_dev.usb_err < 0) return; ret = usb_control_msg(sd->gspca_dev.dev, usb_sndctrlpipe(sd->gspca_dev.dev, 0), 0x02, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, (u16) value, (u16) reg, NULL, 0, 500); if (ret < 0) { gspca_err(gspca_dev, "ovfx2_i2c_w %02x failed %d\n", reg, ret); sd->gspca_dev.usb_err = ret; } gspca_dbg(gspca_dev, D_USBO, "ovfx2_i2c_w %02x %02x\n", reg, value); } static int ovfx2_i2c_r(struct sd *sd, u8 reg) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int ret; if (sd->gspca_dev.usb_err < 0) return -1; ret = usb_control_msg(sd->gspca_dev.dev, usb_rcvctrlpipe(sd->gspca_dev.dev, 0), 0x03, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, (u16) reg, sd->gspca_dev.usb_buf, 1, 500); if (ret >= 0) { ret = sd->gspca_dev.usb_buf[0]; gspca_dbg(gspca_dev, D_USBI, "ovfx2_i2c_r %02x %02x\n", reg, ret); } else { gspca_err(gspca_dev, "ovfx2_i2c_r %02x failed %d\n", reg, ret); sd->gspca_dev.usb_err = ret; } return ret; } static void i2c_w(struct sd *sd, u8 reg, u8 value) { if (sd->sensor_reg_cache[reg] == value) return; switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: ov511_i2c_w(sd, reg, value); break; case BRIDGE_OV518: case BRIDGE_OV518PLUS: case BRIDGE_OV519: ov518_i2c_w(sd, reg, value); break; case BRIDGE_OVFX2: ovfx2_i2c_w(sd, reg, value); break; case BRIDGE_W9968CF: w9968cf_i2c_w(sd, reg, value); break; } if (sd->gspca_dev.usb_err >= 0) { /* Up on sensor reset empty the register cache */ if (reg == 0x12 && (value & 0x80)) memset(sd->sensor_reg_cache, -1, sizeof(sd->sensor_reg_cache)); else sd->sensor_reg_cache[reg] = value; } } static int i2c_r(struct sd *sd, u8 reg) { int ret = -1; if (sd->sensor_reg_cache[reg] != -1) return sd->sensor_reg_cache[reg]; switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: ret = ov511_i2c_r(sd, reg); break; case BRIDGE_OV518: case BRIDGE_OV518PLUS: case BRIDGE_OV519: ret = ov518_i2c_r(sd, reg); break; case BRIDGE_OVFX2: ret = ovfx2_i2c_r(sd, reg); break; case BRIDGE_W9968CF: ret = w9968cf_i2c_r(sd, reg); break; } if (ret >= 0) sd->sensor_reg_cache[reg] = ret; return ret; } /* Writes bits at positions specified by mask to an I2C reg. Bits that are in * the same position as 1's in "mask" are cleared and set to "value". Bits * that are in the same position as 0's in "mask" are preserved, regardless * of their respective state in "value". */ static void i2c_w_mask(struct sd *sd, u8 reg, u8 value, u8 mask) { int rc; u8 oldval; value &= mask; /* Enforce mask on value */ rc = i2c_r(sd, reg); if (rc < 0) return; oldval = rc & ~mask; /* Clear the masked bits */ value |= oldval; /* Set the desired bits */ i2c_w(sd, reg, value); } /* Temporarily stops OV511 from functioning. Must do this before changing * registers while the camera is streaming */ static inline void ov51x_stop(struct sd *sd) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; gspca_dbg(gspca_dev, D_STREAM, "stopping\n"); sd->stopped = 1; switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: reg_w(sd, R51x_SYS_RESET, 0x3d); break; case BRIDGE_OV518: case BRIDGE_OV518PLUS: reg_w_mask(sd, R51x_SYS_RESET, 0x3a, 0x3a); break; case BRIDGE_OV519: reg_w(sd, OV519_R51_RESET1, 0x0f); reg_w(sd, OV519_R51_RESET1, 0x00); reg_w(sd, 0x22, 0x00); /* FRAR */ break; case BRIDGE_OVFX2: reg_w_mask(sd, 0x0f, 0x00, 0x02); break; case BRIDGE_W9968CF: reg_w(sd, 0x3c, 0x0a05); /* stop USB transfer */ break; } } /* Restarts OV511 after ov511_stop() is called. Has no effect if it is not * actually stopped (for performance). */ static inline void ov51x_restart(struct sd *sd) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; gspca_dbg(gspca_dev, D_STREAM, "restarting\n"); if (!sd->stopped) return; sd->stopped = 0; /* Reinitialize the stream */ switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: reg_w(sd, R51x_SYS_RESET, 0x00); break; case BRIDGE_OV518: case BRIDGE_OV518PLUS: reg_w(sd, 0x2f, 0x80); reg_w(sd, R51x_SYS_RESET, 0x00); break; case BRIDGE_OV519: reg_w(sd, OV519_R51_RESET1, 0x0f); reg_w(sd, OV519_R51_RESET1, 0x00); reg_w(sd, 0x22, 0x1d); /* FRAR */ break; case BRIDGE_OVFX2: reg_w_mask(sd, 0x0f, 0x02, 0x02); break; case BRIDGE_W9968CF: reg_w(sd, 0x3c, 0x8a05); /* USB FIFO enable */ break; } } static void ov51x_set_slave_ids(struct sd *sd, u8 slave); /* This does an initial reset of an OmniVision sensor and ensures that I2C * is synchronized. Returns <0 on failure. */ static int init_ov_sensor(struct sd *sd, u8 slave) { int i; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; ov51x_set_slave_ids(sd, slave); /* Reset the sensor */ i2c_w(sd, 0x12, 0x80); /* Wait for it to initialize */ msleep(150); for (i = 0; i < i2c_detect_tries; i++) { if (i2c_r(sd, OV7610_REG_ID_HIGH) == 0x7f && i2c_r(sd, OV7610_REG_ID_LOW) == 0xa2) { gspca_dbg(gspca_dev, D_PROBE, "I2C synced in %d attempt(s)\n", i); return 0; } /* Reset the sensor */ i2c_w(sd, 0x12, 0x80); /* Wait for it to initialize */ msleep(150); /* Dummy read to sync I2C */ if (i2c_r(sd, 0x00) < 0) return -1; } return -1; } /* Set the read and write slave IDs. The "slave" argument is the write slave, * and the read slave will be set to (slave + 1). * This should not be called from outside the i2c I/O functions. * Sets I2C read and write slave IDs. Returns <0 for error */ static void ov51x_set_slave_ids(struct sd *sd, u8 slave) { switch (sd->bridge) { case BRIDGE_OVFX2: reg_w(sd, OVFX2_I2C_ADDR, slave); return; case BRIDGE_W9968CF: sd->sensor_addr = slave; return; } reg_w(sd, R51x_I2C_W_SID, slave); reg_w(sd, R51x_I2C_R_SID, slave + 1); } static void write_regvals(struct sd *sd, const struct ov_regvals *regvals, int n) { while (--n >= 0) { reg_w(sd, regvals->reg, regvals->val); regvals++; } } static void write_i2c_regvals(struct sd *sd, const struct ov_i2c_regvals *regvals, int n) { while (--n >= 0) { i2c_w(sd, regvals->reg, regvals->val); regvals++; } } /**************************************************************************** * * OV511 and sensor configuration * ***************************************************************************/ /* This initializes the OV2x10 / OV3610 / OV3620 / OV9600 */ static void ov_hires_configure(struct sd *sd) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int high, low; if (sd->bridge != BRIDGE_OVFX2) { gspca_err(gspca_dev, "error hires sensors only supported with ovfx2\n"); return; } gspca_dbg(gspca_dev, D_PROBE, "starting ov hires configuration\n"); /* Detect sensor (sub)type */ high = i2c_r(sd, 0x0a); low = i2c_r(sd, 0x0b); /* info("%x, %x", high, low); */ switch (high) { case 0x96: switch (low) { case 0x40: gspca_dbg(gspca_dev, D_PROBE, "Sensor is a OV2610\n"); sd->sensor = SEN_OV2610; return; case 0x41: gspca_dbg(gspca_dev, D_PROBE, "Sensor is a OV2610AE\n"); sd->sensor = SEN_OV2610AE; return; case 0xb1: gspca_dbg(gspca_dev, D_PROBE, "Sensor is a OV9600\n"); sd->sensor = SEN_OV9600; return; } break; case 0x36: if ((low & 0x0f) == 0x00) { gspca_dbg(gspca_dev, D_PROBE, "Sensor is a OV3610\n"); sd->sensor = SEN_OV3610; return; } break; } gspca_err(gspca_dev, "Error unknown sensor type: %02x%02x\n", high, low); } /* This initializes the OV8110, OV8610 sensor. The OV8110 uses * the same register settings as the OV8610, since they are very similar. */ static void ov8xx0_configure(struct sd *sd) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int rc; gspca_dbg(gspca_dev, D_PROBE, "starting ov8xx0 configuration\n"); /* Detect sensor (sub)type */ rc = i2c_r(sd, OV7610_REG_COM_I); if (rc < 0) { gspca_err(gspca_dev, "Error detecting sensor type\n"); return; } if ((rc & 3) == 1) sd->sensor = SEN_OV8610; else gspca_err(gspca_dev, "Unknown image sensor version: %d\n", rc & 3); } /* This initializes the OV7610, OV7620, or OV76BE sensor. The OV76BE uses * the same register settings as the OV7610, since they are very similar. */ static void ov7xx0_configure(struct sd *sd) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int rc, high, low; gspca_dbg(gspca_dev, D_PROBE, "starting OV7xx0 configuration\n"); /* Detect sensor (sub)type */ rc = i2c_r(sd, OV7610_REG_COM_I); /* add OV7670 here * it appears to be wrongly detected as a 7610 by default */ if (rc < 0) { gspca_err(gspca_dev, "Error detecting sensor type\n"); return; } if ((rc & 3) == 3) { /* quick hack to make OV7670s work */ high = i2c_r(sd, 0x0a); low = i2c_r(sd, 0x0b); /* info("%x, %x", high, low); */ if (high == 0x76 && (low & 0xf0) == 0x70) { gspca_dbg(gspca_dev, D_PROBE, "Sensor is an OV76%02x\n", low); sd->sensor = SEN_OV7670; } else { gspca_dbg(gspca_dev, D_PROBE, "Sensor is an OV7610\n"); sd->sensor = SEN_OV7610; } } else if ((rc & 3) == 1) { /* I don't know what's different about the 76BE yet. */ if (i2c_r(sd, 0x15) & 1) { gspca_dbg(gspca_dev, D_PROBE, "Sensor is an OV7620AE\n"); sd->sensor = SEN_OV7620AE; } else { gspca_dbg(gspca_dev, D_PROBE, "Sensor is an OV76BE\n"); sd->sensor = SEN_OV76BE; } } else if ((rc & 3) == 0) { /* try to read product id registers */ high = i2c_r(sd, 0x0a); if (high < 0) { gspca_err(gspca_dev, "Error detecting camera chip PID\n"); return; } low = i2c_r(sd, 0x0b); if (low < 0) { gspca_err(gspca_dev, "Error detecting camera chip VER\n"); return; } if (high == 0x76) { switch (low) { case 0x30: gspca_err(gspca_dev, "Sensor is an OV7630/OV7635\n"); gspca_err(gspca_dev, "7630 is not supported by this driver\n"); return; case 0x40: gspca_dbg(gspca_dev, D_PROBE, "Sensor is an OV7645\n"); sd->sensor = SEN_OV7640; /* FIXME */ break; case 0x45: gspca_dbg(gspca_dev, D_PROBE, "Sensor is an OV7645B\n"); sd->sensor = SEN_OV7640; /* FIXME */ break; case 0x48: gspca_dbg(gspca_dev, D_PROBE, "Sensor is an OV7648\n"); sd->sensor = SEN_OV7648; break; case 0x60: gspca_dbg(gspca_dev, D_PROBE, "Sensor is a OV7660\n"); sd->sensor = SEN_OV7660; break; default: gspca_err(gspca_dev, "Unknown sensor: 0x76%02x\n", low); return; } } else { gspca_dbg(gspca_dev, D_PROBE, "Sensor is an OV7620\n"); sd->sensor = SEN_OV7620; } } else { gspca_err(gspca_dev, "Unknown image sensor version: %d\n", rc & 3); } } /* This initializes the OV6620, OV6630, OV6630AE, or OV6630AF sensor. */ static void ov6xx0_configure(struct sd *sd) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int rc; gspca_dbg(gspca_dev, D_PROBE, "starting OV6xx0 configuration\n"); /* Detect sensor (sub)type */ rc = i2c_r(sd, OV7610_REG_COM_I); if (rc < 0) { gspca_err(gspca_dev, "Error detecting sensor type\n"); return; } /* Ugh. The first two bits are the version bits, but * the entire register value must be used. I guess OVT * underestimated how many variants they would make. */ switch (rc) { case 0x00: sd->sensor = SEN_OV6630; pr_warn("WARNING: Sensor is an OV66308. Your camera may have been misdetected in previous driver versions.\n"); break; case 0x01: sd->sensor = SEN_OV6620; gspca_dbg(gspca_dev, D_PROBE, "Sensor is an OV6620\n"); break; case 0x02: sd->sensor = SEN_OV6630; gspca_dbg(gspca_dev, D_PROBE, "Sensor is an OV66308AE\n"); break; case 0x03: sd->sensor = SEN_OV66308AF; gspca_dbg(gspca_dev, D_PROBE, "Sensor is an OV66308AF\n"); break; case 0x90: sd->sensor = SEN_OV6630; pr_warn("WARNING: Sensor is an OV66307. Your camera may have been misdetected in previous driver versions.\n"); break; default: gspca_err(gspca_dev, "FATAL: Unknown sensor version: 0x%02x\n", rc); return; } /* Set sensor-specific vars */ sd->sif = 1; } /* Turns on or off the LED. Only has an effect with OV511+/OV518(+)/OV519 */ static void ov51x_led_control(struct sd *sd, int on) { if (sd->invert_led) on = !on; switch (sd->bridge) { /* OV511 has no LED control */ case BRIDGE_OV511PLUS: reg_w(sd, R511_SYS_LED_CTL, on); break; case BRIDGE_OV518: case BRIDGE_OV518PLUS: reg_w_mask(sd, R518_GPIO_OUT, 0x02 * on, 0x02); break; case BRIDGE_OV519: reg_w_mask(sd, OV519_GPIO_DATA_OUT0, on, 1); break; } } static void sd_reset_snapshot(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (!sd->snapshot_needs_reset) return; /* Note it is important that we clear sd->snapshot_needs_reset, before actually clearing the snapshot state in the bridge otherwise we might race with the pkt_scan interrupt handler */ sd->snapshot_needs_reset = 0; switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: reg_w(sd, R51x_SYS_SNAP, 0x02); reg_w(sd, R51x_SYS_SNAP, 0x00); break; case BRIDGE_OV518: case BRIDGE_OV518PLUS: reg_w(sd, R51x_SYS_SNAP, 0x02); /* Reset */ reg_w(sd, R51x_SYS_SNAP, 0x01); /* Enable */ break; case BRIDGE_OV519: reg_w(sd, R51x_SYS_RESET, 0x40); reg_w(sd, R51x_SYS_RESET, 0x00); break; } } static void ov51x_upload_quan_tables(struct sd *sd) { static const unsigned char yQuanTable511[] = { 0, 1, 1, 2, 2, 3, 3, 4, 1, 1, 1, 2, 2, 3, 4, 4, 1, 1, 2, 2, 3, 4, 4, 4, 2, 2, 2, 3, 4, 4, 4, 4, 2, 2, 3, 4, 4, 5, 5, 5, 3, 3, 4, 4, 5, 5, 5, 5, 3, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5 }; static const unsigned char uvQuanTable511[] = { 0, 2, 2, 3, 4, 4, 4, 4, 2, 2, 2, 4, 4, 4, 4, 4, 2, 2, 3, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; /* OV518 quantization tables are 8x4 (instead of 8x8) */ static const unsigned char yQuanTable518[] = { 5, 4, 5, 6, 6, 7, 7, 7, 5, 5, 5, 5, 6, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 8, 7, 7, 6, 7, 7, 7, 8, 8 }; static const unsigned char uvQuanTable518[] = { 6, 6, 6, 7, 7, 7, 7, 7, 6, 6, 6, 7, 7, 7, 7, 7, 6, 6, 6, 7, 7, 7, 7, 8, 7, 7, 7, 7, 7, 7, 8, 8 }; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; const unsigned char *pYTable, *pUVTable; unsigned char val0, val1; int i, size, reg = R51x_COMP_LUT_BEGIN; gspca_dbg(gspca_dev, D_PROBE, "Uploading quantization tables\n"); if (sd->bridge == BRIDGE_OV511 || sd->bridge == BRIDGE_OV511PLUS) { pYTable = yQuanTable511; pUVTable = uvQuanTable511; size = 32; } else { pYTable = yQuanTable518; pUVTable = uvQuanTable518; size = 16; } for (i = 0; i < size; i++) { val0 = *pYTable++; val1 = *pYTable++; val0 &= 0x0f; val1 &= 0x0f; val0 |= val1 << 4; reg_w(sd, reg, val0); val0 = *pUVTable++; val1 = *pUVTable++; val0 &= 0x0f; val1 &= 0x0f; val0 |= val1 << 4; reg_w(sd, reg + size, val0); reg++; } } /* This initializes the OV511/OV511+ and the sensor */ static void ov511_configure(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; /* For 511 and 511+ */ static const struct ov_regvals init_511[] = { { R51x_SYS_RESET, 0x7f }, { R51x_SYS_INIT, 0x01 }, { R51x_SYS_RESET, 0x7f }, { R51x_SYS_INIT, 0x01 }, { R51x_SYS_RESET, 0x3f }, { R51x_SYS_INIT, 0x01 }, { R51x_SYS_RESET, 0x3d }, }; static const struct ov_regvals norm_511[] = { { R511_DRAM_FLOW_CTL, 0x01 }, { R51x_SYS_SNAP, 0x00 }, { R51x_SYS_SNAP, 0x02 }, { R51x_SYS_SNAP, 0x00 }, { R511_FIFO_OPTS, 0x1f }, { R511_COMP_EN, 0x00 }, { R511_COMP_LUT_EN, 0x03 }, }; static const struct ov_regvals norm_511_p[] = { { R511_DRAM_FLOW_CTL, 0xff }, { R51x_SYS_SNAP, 0x00 }, { R51x_SYS_SNAP, 0x02 }, { R51x_SYS_SNAP, 0x00 }, { R511_FIFO_OPTS, 0xff }, { R511_COMP_EN, 0x00 }, { R511_COMP_LUT_EN, 0x03 }, }; static const struct ov_regvals compress_511[] = { { 0x70, 0x1f }, { 0x71, 0x05 }, { 0x72, 0x06 }, { 0x73, 0x06 }, { 0x74, 0x14 }, { 0x75, 0x03 }, { 0x76, 0x04 }, { 0x77, 0x04 }, }; gspca_dbg(gspca_dev, D_PROBE, "Device custom id %x\n", reg_r(sd, R51x_SYS_CUST_ID)); write_regvals(sd, init_511, ARRAY_SIZE(init_511)); switch (sd->bridge) { case BRIDGE_OV511: write_regvals(sd, norm_511, ARRAY_SIZE(norm_511)); break; case BRIDGE_OV511PLUS: write_regvals(sd, norm_511_p, ARRAY_SIZE(norm_511_p)); break; } /* Init compression */ write_regvals(sd, compress_511, ARRAY_SIZE(compress_511)); ov51x_upload_quan_tables(sd); } /* This initializes the OV518/OV518+ and the sensor */ static void ov518_configure(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; /* For 518 and 518+ */ static const struct ov_regvals init_518[] = { { R51x_SYS_RESET, 0x40 }, { R51x_SYS_INIT, 0xe1 }, { R51x_SYS_RESET, 0x3e }, { R51x_SYS_INIT, 0xe1 }, { R51x_SYS_RESET, 0x00 }, { R51x_SYS_INIT, 0xe1 }, { 0x46, 0x00 }, { 0x5d, 0x03 }, }; static const struct ov_regvals norm_518[] = { { R51x_SYS_SNAP, 0x02 }, /* Reset */ { R51x_SYS_SNAP, 0x01 }, /* Enable */ { 0x31, 0x0f }, { 0x5d, 0x03 }, { 0x24, 0x9f }, { 0x25, 0x90 }, { 0x20, 0x00 }, { 0x51, 0x04 }, { 0x71, 0x19 }, { 0x2f, 0x80 }, }; static const struct ov_regvals norm_518_p[] = { { R51x_SYS_SNAP, 0x02 }, /* Reset */ { R51x_SYS_SNAP, 0x01 }, /* Enable */ { 0x31, 0x0f }, { 0x5d, 0x03 }, { 0x24, 0x9f }, { 0x25, 0x90 }, { 0x20, 0x60 }, { 0x51, 0x02 }, { 0x71, 0x19 }, { 0x40, 0xff }, { 0x41, 0x42 }, { 0x46, 0x00 }, { 0x33, 0x04 }, { 0x21, 0x19 }, { 0x3f, 0x10 }, { 0x2f, 0x80 }, }; /* First 5 bits of custom ID reg are a revision ID on OV518 */ sd->revision = reg_r(sd, R51x_SYS_CUST_ID) & 0x1f; gspca_dbg(gspca_dev, D_PROBE, "Device revision %d\n", sd->revision); write_regvals(sd, init_518, ARRAY_SIZE(init_518)); /* Set LED GPIO pin to output mode */ reg_w_mask(sd, R518_GPIO_CTL, 0x00, 0x02); switch (sd->bridge) { case BRIDGE_OV518: write_regvals(sd, norm_518, ARRAY_SIZE(norm_518)); break; case BRIDGE_OV518PLUS: write_regvals(sd, norm_518_p, ARRAY_SIZE(norm_518_p)); break; } ov51x_upload_quan_tables(sd); reg_w(sd, 0x2f, 0x80); } static void ov519_configure(struct sd *sd) { static const struct ov_regvals init_519[] = { { 0x5a, 0x6d }, /* EnableSystem */ { 0x53, 0x9b }, /* don't enable the microcontroller */ { OV519_R54_EN_CLK1, 0xff }, /* set bit2 to enable jpeg */ { 0x5d, 0x03 }, { 0x49, 0x01 }, { 0x48, 0x00 }, /* Set LED pin to output mode. Bit 4 must be cleared or sensor * detection will fail. This deserves further investigation. */ { OV519_GPIO_IO_CTRL0, 0xee }, { OV519_R51_RESET1, 0x0f }, { OV519_R51_RESET1, 0x00 }, { 0x22, 0x00 }, /* windows reads 0x55 at this point*/ }; write_regvals(sd, init_519, ARRAY_SIZE(init_519)); } static void ovfx2_configure(struct sd *sd) { static const struct ov_regvals init_fx2[] = { { 0x00, 0x60 }, { 0x02, 0x01 }, { 0x0f, 0x1d }, { 0xe9, 0x82 }, { 0xea, 0xc7 }, { 0xeb, 0x10 }, { 0xec, 0xf6 }, }; sd->stopped = 1; write_regvals(sd, init_fx2, ARRAY_SIZE(init_fx2)); } /* set the mode */ /* This function works for ov7660 only */ static void ov519_set_mode(struct sd *sd) { static const struct ov_regvals bridge_ov7660[2][10] = { {{0x10, 0x14}, {0x11, 0x1e}, {0x12, 0x00}, {0x13, 0x00}, {0x14, 0x00}, {0x15, 0x00}, {0x16, 0x00}, {0x20, 0x0c}, {0x25, 0x01}, {0x26, 0x00}}, {{0x10, 0x28}, {0x11, 0x3c}, {0x12, 0x00}, {0x13, 0x00}, {0x14, 0x00}, {0x15, 0x00}, {0x16, 0x00}, {0x20, 0x0c}, {0x25, 0x03}, {0x26, 0x00}} }; static const struct ov_i2c_regvals sensor_ov7660[2][3] = { {{0x12, 0x00}, {0x24, 0x00}, {0x0c, 0x0c}}, {{0x12, 0x00}, {0x04, 0x00}, {0x0c, 0x00}} }; static const struct ov_i2c_regvals sensor_ov7660_2[] = { {OV7670_R17_HSTART, 0x13}, {OV7670_R18_HSTOP, 0x01}, {OV7670_R32_HREF, 0x92}, {OV7670_R19_VSTART, 0x02}, {OV7670_R1A_VSTOP, 0x7a}, {OV7670_R03_VREF, 0x00}, /* {0x33, 0x00}, */ /* {0x34, 0x07}, */ /* {0x36, 0x00}, */ /* {0x6b, 0x0a}, */ }; write_regvals(sd, bridge_ov7660[sd->gspca_dev.curr_mode], ARRAY_SIZE(bridge_ov7660[0])); write_i2c_regvals(sd, sensor_ov7660[sd->gspca_dev.curr_mode], ARRAY_SIZE(sensor_ov7660[0])); write_i2c_regvals(sd, sensor_ov7660_2, ARRAY_SIZE(sensor_ov7660_2)); } /* set the frame rate */ /* This function works for sensors ov7640, ov7648 ov7660 and ov7670 only */ static void ov519_set_fr(struct sd *sd) { int fr; u8 clock; /* frame rate table with indices: * - mode = 0: 320x240, 1: 640x480 * - fr rate = 0: 30, 1: 25, 2: 20, 3: 15, 4: 10, 5: 5 * - reg = 0: bridge a4, 1: bridge 23, 2: sensor 11 (clock) */ static const u8 fr_tb[2][6][3] = { {{0x04, 0xff, 0x00}, {0x04, 0x1f, 0x00}, {0x04, 0x1b, 0x00}, {0x04, 0x15, 0x00}, {0x04, 0x09, 0x00}, {0x04, 0x01, 0x00}}, {{0x0c, 0xff, 0x00}, {0x0c, 0x1f, 0x00}, {0x0c, 0x1b, 0x00}, {0x04, 0xff, 0x01}, {0x04, 0x1f, 0x01}, {0x04, 0x1b, 0x01}}, }; if (frame_rate > 0) sd->frame_rate = frame_rate; if (sd->frame_rate >= 30) fr = 0; else if (sd->frame_rate >= 25) fr = 1; else if (sd->frame_rate >= 20) fr = 2; else if (sd->frame_rate >= 15) fr = 3; else if (sd->frame_rate >= 10) fr = 4; else fr = 5; reg_w(sd, 0xa4, fr_tb[sd->gspca_dev.curr_mode][fr][0]); reg_w(sd, 0x23, fr_tb[sd->gspca_dev.curr_mode][fr][1]); clock = fr_tb[sd->gspca_dev.curr_mode][fr][2]; if (sd->sensor == SEN_OV7660) clock |= 0x80; /* enable double clock */ ov518_i2c_w(sd, OV7670_R11_CLKRC, clock); } static void setautogain(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; i2c_w_mask(sd, 0x13, val ? 0x05 : 0x00, 0x05); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam = &gspca_dev->cam; sd->bridge = id->driver_info & BRIDGE_MASK; sd->invert_led = (id->driver_info & BRIDGE_INVERT_LED) != 0; switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: cam->cam_mode = ov511_vga_mode; cam->nmodes = ARRAY_SIZE(ov511_vga_mode); break; case BRIDGE_OV518: case BRIDGE_OV518PLUS: cam->cam_mode = ov518_vga_mode; cam->nmodes = ARRAY_SIZE(ov518_vga_mode); break; case BRIDGE_OV519: cam->cam_mode = ov519_vga_mode; cam->nmodes = ARRAY_SIZE(ov519_vga_mode); break; case BRIDGE_OVFX2: cam->cam_mode = ov519_vga_mode; cam->nmodes = ARRAY_SIZE(ov519_vga_mode); cam->bulk_size = OVFX2_BULK_SIZE; cam->bulk_nurbs = MAX_NURBS; cam->bulk = 1; break; case BRIDGE_W9968CF: cam->cam_mode = w9968cf_vga_mode; cam->nmodes = ARRAY_SIZE(w9968cf_vga_mode); break; } sd->frame_rate = 15; return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam = &gspca_dev->cam; switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: ov511_configure(gspca_dev); break; case BRIDGE_OV518: case BRIDGE_OV518PLUS: ov518_configure(gspca_dev); break; case BRIDGE_OV519: ov519_configure(sd); break; case BRIDGE_OVFX2: ovfx2_configure(sd); break; case BRIDGE_W9968CF: w9968cf_configure(sd); break; } /* The OV519 must be more aggressive about sensor detection since * I2C write will never fail if the sensor is not present. We have * to try to initialize the sensor to detect its presence */ sd->sensor = -1; /* Test for 76xx */ if (init_ov_sensor(sd, OV7xx0_SID) >= 0) { ov7xx0_configure(sd); /* Test for 6xx0 */ } else if (init_ov_sensor(sd, OV6xx0_SID) >= 0) { ov6xx0_configure(sd); /* Test for 8xx0 */ } else if (init_ov_sensor(sd, OV8xx0_SID) >= 0) { ov8xx0_configure(sd); /* Test for 3xxx / 2xxx */ } else if (init_ov_sensor(sd, OV_HIRES_SID) >= 0) { ov_hires_configure(sd); } else { gspca_err(gspca_dev, "Can't determine sensor slave IDs\n"); goto error; } if (sd->sensor < 0) goto error; ov51x_led_control(sd, 0); /* turn LED off */ switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: if (sd->sif) { cam->cam_mode = ov511_sif_mode; cam->nmodes = ARRAY_SIZE(ov511_sif_mode); } break; case BRIDGE_OV518: case BRIDGE_OV518PLUS: if (sd->sif) { cam->cam_mode = ov518_sif_mode; cam->nmodes = ARRAY_SIZE(ov518_sif_mode); } break; case BRIDGE_OV519: if (sd->sif) { cam->cam_mode = ov519_sif_mode; cam->nmodes = ARRAY_SIZE(ov519_sif_mode); } break; case BRIDGE_OVFX2: switch (sd->sensor) { case SEN_OV2610: case SEN_OV2610AE: cam->cam_mode = ovfx2_ov2610_mode; cam->nmodes = ARRAY_SIZE(ovfx2_ov2610_mode); break; case SEN_OV3610: cam->cam_mode = ovfx2_ov3610_mode; cam->nmodes = ARRAY_SIZE(ovfx2_ov3610_mode); break; case SEN_OV9600: cam->cam_mode = ovfx2_ov9600_mode; cam->nmodes = ARRAY_SIZE(ovfx2_ov9600_mode); break; default: if (sd->sif) { cam->cam_mode = ov519_sif_mode; cam->nmodes = ARRAY_SIZE(ov519_sif_mode); } break; } break; case BRIDGE_W9968CF: if (sd->sif) cam->nmodes = ARRAY_SIZE(w9968cf_vga_mode) - 1; /* w9968cf needs initialisation once the sensor is known */ w9968cf_init(sd); break; } /* initialize the sensor */ switch (sd->sensor) { case SEN_OV2610: write_i2c_regvals(sd, norm_2610, ARRAY_SIZE(norm_2610)); /* Enable autogain, autoexpo, awb, bandfilter */ i2c_w_mask(sd, 0x13, 0x27, 0x27); break; case SEN_OV2610AE: write_i2c_regvals(sd, norm_2610ae, ARRAY_SIZE(norm_2610ae)); /* enable autoexpo */ i2c_w_mask(sd, 0x13, 0x05, 0x05); break; case SEN_OV3610: write_i2c_regvals(sd, norm_3620b, ARRAY_SIZE(norm_3620b)); /* Enable autogain, autoexpo, awb, bandfilter */ i2c_w_mask(sd, 0x13, 0x27, 0x27); break; case SEN_OV6620: write_i2c_regvals(sd, norm_6x20, ARRAY_SIZE(norm_6x20)); break; case SEN_OV6630: case SEN_OV66308AF: write_i2c_regvals(sd, norm_6x30, ARRAY_SIZE(norm_6x30)); break; default: /* case SEN_OV7610: */ /* case SEN_OV76BE: */ write_i2c_regvals(sd, norm_7610, ARRAY_SIZE(norm_7610)); i2c_w_mask(sd, 0x0e, 0x00, 0x40); break; case SEN_OV7620: case SEN_OV7620AE: write_i2c_regvals(sd, norm_7620, ARRAY_SIZE(norm_7620)); break; case SEN_OV7640: case SEN_OV7648: write_i2c_regvals(sd, norm_7640, ARRAY_SIZE(norm_7640)); break; case SEN_OV7660: i2c_w(sd, OV7670_R12_COM7, OV7670_COM7_RESET); msleep(14); reg_w(sd, OV519_R57_SNAPSHOT, 0x23); write_regvals(sd, init_519_ov7660, ARRAY_SIZE(init_519_ov7660)); write_i2c_regvals(sd, norm_7660, ARRAY_SIZE(norm_7660)); sd->gspca_dev.curr_mode = 1; /* 640x480 */ ov519_set_mode(sd); ov519_set_fr(sd); sd_reset_snapshot(gspca_dev); ov51x_restart(sd); ov51x_stop(sd); /* not in win traces */ ov51x_led_control(sd, 0); break; case SEN_OV7670: write_i2c_regvals(sd, norm_7670, ARRAY_SIZE(norm_7670)); break; case SEN_OV8610: write_i2c_regvals(sd, norm_8610, ARRAY_SIZE(norm_8610)); break; case SEN_OV9600: write_i2c_regvals(sd, norm_9600, ARRAY_SIZE(norm_9600)); /* enable autoexpo */ /* i2c_w_mask(sd, 0x13, 0x05, 0x05); */ break; } return gspca_dev->usb_err; error: gspca_err(gspca_dev, "OV519 Config failed\n"); return -EINVAL; } /* function called at start time before URB creation */ static int sd_isoc_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; switch (sd->bridge) { case BRIDGE_OVFX2: if (gspca_dev->pixfmt.width != 800) gspca_dev->cam.bulk_size = OVFX2_BULK_SIZE; else gspca_dev->cam.bulk_size = 7 * 4096; break; } return 0; } /* Set up the OV511/OV511+ with the given image parameters. * * Do not put any sensor-specific code in here (including I2C I/O functions) */ static void ov511_mode_init_regs(struct sd *sd) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int hsegs, vsegs, packet_size, fps, needed; int interlaced = 0; struct usb_host_interface *alt; struct usb_interface *intf; intf = usb_ifnum_to_if(sd->gspca_dev.dev, sd->gspca_dev.iface); alt = usb_altnum_to_altsetting(intf, sd->gspca_dev.alt); if (!alt) { gspca_err(gspca_dev, "Couldn't get altsetting\n"); sd->gspca_dev.usb_err = -EIO; return; } if (alt->desc.bNumEndpoints < 1) { sd->gspca_dev.usb_err = -ENODEV; return; } packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); reg_w(sd, R51x_FIFO_PSIZE, packet_size >> 5); reg_w(sd, R511_CAM_UV_EN, 0x01); reg_w(sd, R511_SNAP_UV_EN, 0x01); reg_w(sd, R511_SNAP_OPTS, 0x03); /* Here I'm assuming that snapshot size == image size. * I hope that's always true. --claudio */ hsegs = (sd->gspca_dev.pixfmt.width >> 3) - 1; vsegs = (sd->gspca_dev.pixfmt.height >> 3) - 1; reg_w(sd, R511_CAM_PXCNT, hsegs); reg_w(sd, R511_CAM_LNCNT, vsegs); reg_w(sd, R511_CAM_PXDIV, 0x00); reg_w(sd, R511_CAM_LNDIV, 0x00); /* YUV420, low pass filter on */ reg_w(sd, R511_CAM_OPTS, 0x03); /* Snapshot additions */ reg_w(sd, R511_SNAP_PXCNT, hsegs); reg_w(sd, R511_SNAP_LNCNT, vsegs); reg_w(sd, R511_SNAP_PXDIV, 0x00); reg_w(sd, R511_SNAP_LNDIV, 0x00); /******** Set the framerate ********/ if (frame_rate > 0) sd->frame_rate = frame_rate; switch (sd->sensor) { case SEN_OV6620: /* No framerate control, doesn't like higher rates yet */ sd->clockdiv = 3; break; /* Note once the FIXME's in mode_init_ov_sensor_regs() are fixed for more sensors we need to do this for them too */ case SEN_OV7620: case SEN_OV7620AE: case SEN_OV7640: case SEN_OV7648: case SEN_OV76BE: if (sd->gspca_dev.pixfmt.width == 320) interlaced = 1; fallthrough; case SEN_OV6630: case SEN_OV7610: case SEN_OV7670: switch (sd->frame_rate) { case 30: case 25: /* Not enough bandwidth to do 640x480 @ 30 fps */ if (sd->gspca_dev.pixfmt.width != 640) { sd->clockdiv = 0; break; } /* For 640x480 case */ fallthrough; default: /* case 20: */ /* case 15: */ sd->clockdiv = 1; break; case 10: sd->clockdiv = 2; break; case 5: sd->clockdiv = 5; break; } if (interlaced) { sd->clockdiv = (sd->clockdiv + 1) * 2 - 1; /* Higher then 10 does not work */ if (sd->clockdiv > 10) sd->clockdiv = 10; } break; case SEN_OV8610: /* No framerate control ?? */ sd->clockdiv = 0; break; } /* Check if we have enough bandwidth to disable compression */ fps = (interlaced ? 60 : 30) / (sd->clockdiv + 1) + 1; needed = fps * sd->gspca_dev.pixfmt.width * sd->gspca_dev.pixfmt.height * 3 / 2; /* 1000 isoc packets/sec */ if (needed > 1000 * packet_size) { /* Enable Y and UV quantization and compression */ reg_w(sd, R511_COMP_EN, 0x07); reg_w(sd, R511_COMP_LUT_EN, 0x03); } else { reg_w(sd, R511_COMP_EN, 0x06); reg_w(sd, R511_COMP_LUT_EN, 0x00); } reg_w(sd, R51x_SYS_RESET, OV511_RESET_OMNICE); reg_w(sd, R51x_SYS_RESET, 0); } /* Sets up the OV518/OV518+ with the given image parameters * * OV518 needs a completely different approach, until we can figure out what * the individual registers do. Also, only 15 FPS is supported now. * * Do not put any sensor-specific code in here (including I2C I/O functions) */ static void ov518_mode_init_regs(struct sd *sd) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int hsegs, vsegs, packet_size; struct usb_host_interface *alt; struct usb_interface *intf; intf = usb_ifnum_to_if(sd->gspca_dev.dev, sd->gspca_dev.iface); alt = usb_altnum_to_altsetting(intf, sd->gspca_dev.alt); if (!alt) { gspca_err(gspca_dev, "Couldn't get altsetting\n"); sd->gspca_dev.usb_err = -EIO; return; } if (alt->desc.bNumEndpoints < 1) { sd->gspca_dev.usb_err = -ENODEV; return; } packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize); ov518_reg_w32(sd, R51x_FIFO_PSIZE, packet_size & ~7, 2); /******** Set the mode ********/ reg_w(sd, 0x2b, 0); reg_w(sd, 0x2c, 0); reg_w(sd, 0x2d, 0); reg_w(sd, 0x2e, 0); reg_w(sd, 0x3b, 0); reg_w(sd, 0x3c, 0); reg_w(sd, 0x3d, 0); reg_w(sd, 0x3e, 0); if (sd->bridge == BRIDGE_OV518) { /* Set 8-bit (YVYU) input format */ reg_w_mask(sd, 0x20, 0x08, 0x08); /* Set 12-bit (4:2:0) output format */ reg_w_mask(sd, 0x28, 0x80, 0xf0); reg_w_mask(sd, 0x38, 0x80, 0xf0); } else { reg_w(sd, 0x28, 0x80); reg_w(sd, 0x38, 0x80); } hsegs = sd->gspca_dev.pixfmt.width / 16; vsegs = sd->gspca_dev.pixfmt.height / 4; reg_w(sd, 0x29, hsegs); reg_w(sd, 0x2a, vsegs); reg_w(sd, 0x39, hsegs); reg_w(sd, 0x3a, vsegs); /* Windows driver does this here; who knows why */ reg_w(sd, 0x2f, 0x80); /******** Set the framerate ********/ if (sd->bridge == BRIDGE_OV518PLUS && sd->revision == 0 && sd->sensor == SEN_OV7620AE) sd->clockdiv = 0; else sd->clockdiv = 1; /* Mode independent, but framerate dependent, regs */ /* 0x51: Clock divider; Only works on some cams which use 2 crystals */ reg_w(sd, 0x51, 0x04); reg_w(sd, 0x22, 0x18); reg_w(sd, 0x23, 0xff); if (sd->bridge == BRIDGE_OV518PLUS) { switch (sd->sensor) { case SEN_OV7620AE: /* * HdG: 640x480 needs special handling on device * revision 2, we check for device revision > 0 to * avoid regressions, as we don't know the correct * thing todo for revision 1. * * Also this likely means we don't need to * differentiate between the OV7620 and OV7620AE, * earlier testing hitting this same problem likely * happened to be with revision < 2 cams using an * OV7620 and revision 2 cams using an OV7620AE. */ if (sd->revision > 0 && sd->gspca_dev.pixfmt.width == 640) { reg_w(sd, 0x20, 0x60); reg_w(sd, 0x21, 0x1f); } else { reg_w(sd, 0x20, 0x00); reg_w(sd, 0x21, 0x19); } break; case SEN_OV7620: reg_w(sd, 0x20, 0x00); reg_w(sd, 0x21, 0x19); break; default: reg_w(sd, 0x21, 0x19); } } else reg_w(sd, 0x71, 0x17); /* Compression-related? */ /* FIXME: Sensor-specific */ /* Bit 5 is what matters here. Of course, it is "reserved" */ i2c_w(sd, 0x54, 0x23); reg_w(sd, 0x2f, 0x80); if (sd->bridge == BRIDGE_OV518PLUS) { reg_w(sd, 0x24, 0x94); reg_w(sd, 0x25, 0x90); ov518_reg_w32(sd, 0xc4, 400, 2); /* 190h */ ov518_reg_w32(sd, 0xc6, 540, 2); /* 21ch */ ov518_reg_w32(sd, 0xc7, 540, 2); /* 21ch */ ov518_reg_w32(sd, 0xc8, 108, 2); /* 6ch */ ov518_reg_w32(sd, 0xca, 131098, 3); /* 2001ah */ ov518_reg_w32(sd, 0xcb, 532, 2); /* 214h */ ov518_reg_w32(sd, 0xcc, 2400, 2); /* 960h */ ov518_reg_w32(sd, 0xcd, 32, 2); /* 20h */ ov518_reg_w32(sd, 0xce, 608, 2); /* 260h */ } else { reg_w(sd, 0x24, 0x9f); reg_w(sd, 0x25, 0x90); ov518_reg_w32(sd, 0xc4, 400, 2); /* 190h */ ov518_reg_w32(sd, 0xc6, 381, 2); /* 17dh */ ov518_reg_w32(sd, 0xc7, 381, 2); /* 17dh */ ov518_reg_w32(sd, 0xc8, 128, 2); /* 80h */ ov518_reg_w32(sd, 0xca, 183331, 3); /* 2cc23h */ ov518_reg_w32(sd, 0xcb, 746, 2); /* 2eah */ ov518_reg_w32(sd, 0xcc, 1750, 2); /* 6d6h */ ov518_reg_w32(sd, 0xcd, 45, 2); /* 2dh */ ov518_reg_w32(sd, 0xce, 851, 2); /* 353h */ } reg_w(sd, 0x2f, 0x80); } /* Sets up the OV519 with the given image parameters * * OV519 needs a completely different approach, until we can figure out what * the individual registers do. * * Do not put any sensor-specific code in here (including I2C I/O functions) */ static void ov519_mode_init_regs(struct sd *sd) { static const struct ov_regvals mode_init_519_ov7670[] = { { 0x5d, 0x03 }, /* Turn off suspend mode */ { 0x53, 0x9f }, /* was 9b in 1.65-1.08 */ { OV519_R54_EN_CLK1, 0x0f }, /* bit2 (jpeg enable) */ { 0xa2, 0x20 }, /* a2-a5 are undocumented */ { 0xa3, 0x18 }, { 0xa4, 0x04 }, { 0xa5, 0x28 }, { 0x37, 0x00 }, /* SetUsbInit */ { 0x55, 0x02 }, /* 4.096 Mhz audio clock */ /* Enable both fields, YUV Input, disable defect comp (why?) */ { 0x20, 0x0c }, { 0x21, 0x38 }, { 0x22, 0x1d }, { 0x17, 0x50 }, /* undocumented */ { 0x37, 0x00 }, /* undocumented */ { 0x40, 0xff }, /* I2C timeout counter */ { 0x46, 0x00 }, /* I2C clock prescaler */ { 0x59, 0x04 }, /* new from windrv 090403 */ { 0xff, 0x00 }, /* undocumented */ /* windows reads 0x55 at this point, why? */ }; static const struct ov_regvals mode_init_519[] = { { 0x5d, 0x03 }, /* Turn off suspend mode */ { 0x53, 0x9f }, /* was 9b in 1.65-1.08 */ { OV519_R54_EN_CLK1, 0x0f }, /* bit2 (jpeg enable) */ { 0xa2, 0x20 }, /* a2-a5 are undocumented */ { 0xa3, 0x18 }, { 0xa4, 0x04 }, { 0xa5, 0x28 }, { 0x37, 0x00 }, /* SetUsbInit */ { 0x55, 0x02 }, /* 4.096 Mhz audio clock */ /* Enable both fields, YUV Input, disable defect comp (why?) */ { 0x22, 0x1d }, { 0x17, 0x50 }, /* undocumented */ { 0x37, 0x00 }, /* undocumented */ { 0x40, 0xff }, /* I2C timeout counter */ { 0x46, 0x00 }, /* I2C clock prescaler */ { 0x59, 0x04 }, /* new from windrv 090403 */ { 0xff, 0x00 }, /* undocumented */ /* windows reads 0x55 at this point, why? */ }; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; /******** Set the mode ********/ switch (sd->sensor) { default: write_regvals(sd, mode_init_519, ARRAY_SIZE(mode_init_519)); if (sd->sensor == SEN_OV7640 || sd->sensor == SEN_OV7648) { /* Select 8-bit input mode */ reg_w_mask(sd, OV519_R20_DFR, 0x10, 0x10); } break; case SEN_OV7660: return; /* done by ov519_set_mode/fr() */ case SEN_OV7670: write_regvals(sd, mode_init_519_ov7670, ARRAY_SIZE(mode_init_519_ov7670)); break; } reg_w(sd, OV519_R10_H_SIZE, sd->gspca_dev.pixfmt.width >> 4); reg_w(sd, OV519_R11_V_SIZE, sd->gspca_dev.pixfmt.height >> 3); if (sd->sensor == SEN_OV7670 && sd->gspca_dev.cam.cam_mode[sd->gspca_dev.curr_mode].priv) reg_w(sd, OV519_R12_X_OFFSETL, 0x04); else if (sd->sensor == SEN_OV7648 && sd->gspca_dev.cam.cam_mode[sd->gspca_dev.curr_mode].priv) reg_w(sd, OV519_R12_X_OFFSETL, 0x01); else reg_w(sd, OV519_R12_X_OFFSETL, 0x00); reg_w(sd, OV519_R13_X_OFFSETH, 0x00); reg_w(sd, OV519_R14_Y_OFFSETL, 0x00); reg_w(sd, OV519_R15_Y_OFFSETH, 0x00); reg_w(sd, OV519_R16_DIVIDER, 0x00); reg_w(sd, OV519_R25_FORMAT, 0x03); /* YUV422 */ reg_w(sd, 0x26, 0x00); /* Undocumented */ /******** Set the framerate ********/ if (frame_rate > 0) sd->frame_rate = frame_rate; /* FIXME: These are only valid at the max resolution. */ sd->clockdiv = 0; switch (sd->sensor) { case SEN_OV7640: case SEN_OV7648: switch (sd->frame_rate) { default: /* case 30: */ reg_w(sd, 0xa4, 0x0c); reg_w(sd, 0x23, 0xff); break; case 25: reg_w(sd, 0xa4, 0x0c); reg_w(sd, 0x23, 0x1f); break; case 20: reg_w(sd, 0xa4, 0x0c); reg_w(sd, 0x23, 0x1b); break; case 15: reg_w(sd, 0xa4, 0x04); reg_w(sd, 0x23, 0xff); sd->clockdiv = 1; break; case 10: reg_w(sd, 0xa4, 0x04); reg_w(sd, 0x23, 0x1f); sd->clockdiv = 1; break; case 5: reg_w(sd, 0xa4, 0x04); reg_w(sd, 0x23, 0x1b); sd->clockdiv = 1; break; } break; case SEN_OV8610: switch (sd->frame_rate) { default: /* 15 fps */ /* case 15: */ reg_w(sd, 0xa4, 0x06); reg_w(sd, 0x23, 0xff); break; case 10: reg_w(sd, 0xa4, 0x06); reg_w(sd, 0x23, 0x1f); break; case 5: reg_w(sd, 0xa4, 0x06); reg_w(sd, 0x23, 0x1b); break; } break; case SEN_OV7670: /* guesses, based on 7640 */ gspca_dbg(gspca_dev, D_STREAM, "Setting framerate to %d fps\n", (sd->frame_rate == 0) ? 15 : sd->frame_rate); reg_w(sd, 0xa4, 0x10); switch (sd->frame_rate) { case 30: reg_w(sd, 0x23, 0xff); break; case 20: reg_w(sd, 0x23, 0x1b); break; default: /* case 15: */ reg_w(sd, 0x23, 0xff); sd->clockdiv = 1; break; } break; } } static void mode_init_ov_sensor_regs(struct sd *sd) { struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int qvga, xstart, xend, ystart, yend; u8 v; qvga = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv & 1; /******** Mode (VGA/QVGA) and sensor specific regs ********/ switch (sd->sensor) { case SEN_OV2610: i2c_w_mask(sd, 0x14, qvga ? 0x20 : 0x00, 0x20); i2c_w_mask(sd, 0x28, qvga ? 0x00 : 0x20, 0x20); i2c_w(sd, 0x24, qvga ? 0x20 : 0x3a); i2c_w(sd, 0x25, qvga ? 0x30 : 0x60); i2c_w_mask(sd, 0x2d, qvga ? 0x40 : 0x00, 0x40); i2c_w_mask(sd, 0x67, qvga ? 0xf0 : 0x90, 0xf0); i2c_w_mask(sd, 0x74, qvga ? 0x20 : 0x00, 0x20); return; case SEN_OV2610AE: { u8 v; /* frame rates: * 10fps / 5 fps for 1600x1200 * 40fps / 20fps for 800x600 */ v = 80; if (qvga) { if (sd->frame_rate < 25) v = 0x81; } else { if (sd->frame_rate < 10) v = 0x81; } i2c_w(sd, 0x11, v); i2c_w(sd, 0x12, qvga ? 0x60 : 0x20); return; } case SEN_OV3610: if (qvga) { xstart = (1040 - gspca_dev->pixfmt.width) / 2 + (0x1f << 4); ystart = (776 - gspca_dev->pixfmt.height) / 2; } else { xstart = (2076 - gspca_dev->pixfmt.width) / 2 + (0x10 << 4); ystart = (1544 - gspca_dev->pixfmt.height) / 2; } xend = xstart + gspca_dev->pixfmt.width; yend = ystart + gspca_dev->pixfmt.height; /* Writing to the COMH register resets the other windowing regs to their default values, so we must do this first. */ i2c_w_mask(sd, 0x12, qvga ? 0x40 : 0x00, 0xf0); i2c_w_mask(sd, 0x32, (((xend >> 1) & 7) << 3) | ((xstart >> 1) & 7), 0x3f); i2c_w_mask(sd, 0x03, (((yend >> 1) & 3) << 2) | ((ystart >> 1) & 3), 0x0f); i2c_w(sd, 0x17, xstart >> 4); i2c_w(sd, 0x18, xend >> 4); i2c_w(sd, 0x19, ystart >> 3); i2c_w(sd, 0x1a, yend >> 3); return; case SEN_OV8610: /* For OV8610 qvga means qsvga */ i2c_w_mask(sd, OV7610_REG_COM_C, qvga ? (1 << 5) : 0, 1 << 5); i2c_w_mask(sd, 0x13, 0x00, 0x20); /* Select 16 bit data bus */ i2c_w_mask(sd, 0x12, 0x04, 0x06); /* AWB: 1 Test pattern: 0 */ i2c_w_mask(sd, 0x2d, 0x00, 0x40); /* from windrv 090403 */ i2c_w_mask(sd, 0x28, 0x20, 0x20); /* progressive mode on */ break; case SEN_OV7610: i2c_w_mask(sd, 0x14, qvga ? 0x20 : 0x00, 0x20); i2c_w(sd, 0x35, qvga ? 0x1e : 0x9e); i2c_w_mask(sd, 0x13, 0x00, 0x20); /* Select 16 bit data bus */ i2c_w_mask(sd, 0x12, 0x04, 0x06); /* AWB: 1 Test pattern: 0 */ break; case SEN_OV7620: case SEN_OV7620AE: case SEN_OV76BE: i2c_w_mask(sd, 0x14, qvga ? 0x20 : 0x00, 0x20); i2c_w_mask(sd, 0x28, qvga ? 0x00 : 0x20, 0x20); i2c_w(sd, 0x24, qvga ? 0x20 : 0x3a); i2c_w(sd, 0x25, qvga ? 0x30 : 0x60); i2c_w_mask(sd, 0x2d, qvga ? 0x40 : 0x00, 0x40); i2c_w_mask(sd, 0x67, qvga ? 0xb0 : 0x90, 0xf0); i2c_w_mask(sd, 0x74, qvga ? 0x20 : 0x00, 0x20); i2c_w_mask(sd, 0x13, 0x00, 0x20); /* Select 16 bit data bus */ i2c_w_mask(sd, 0x12, 0x04, 0x06); /* AWB: 1 Test pattern: 0 */ if (sd->sensor == SEN_OV76BE) i2c_w(sd, 0x35, qvga ? 0x1e : 0x9e); break; case SEN_OV7640: case SEN_OV7648: i2c_w_mask(sd, 0x14, qvga ? 0x20 : 0x00, 0x20); i2c_w_mask(sd, 0x28, qvga ? 0x00 : 0x20, 0x20); /* Setting this undocumented bit in qvga mode removes a very annoying vertical shaking of the image */ i2c_w_mask(sd, 0x2d, qvga ? 0x40 : 0x00, 0x40); /* Unknown */ i2c_w_mask(sd, 0x67, qvga ? 0xf0 : 0x90, 0xf0); /* Allow higher automatic gain (to allow higher framerates) */ i2c_w_mask(sd, 0x74, qvga ? 0x20 : 0x00, 0x20); i2c_w_mask(sd, 0x12, 0x04, 0x04); /* AWB: 1 */ break; case SEN_OV7670: /* set COM7_FMT_VGA or COM7_FMT_QVGA * do we need to set anything else? * HSTART etc are set in set_ov_sensor_window itself */ i2c_w_mask(sd, OV7670_R12_COM7, qvga ? OV7670_COM7_FMT_QVGA : OV7670_COM7_FMT_VGA, OV7670_COM7_FMT_MASK); i2c_w_mask(sd, 0x13, 0x00, 0x20); /* Select 16 bit data bus */ i2c_w_mask(sd, OV7670_R13_COM8, OV7670_COM8_AWB, OV7670_COM8_AWB); if (qvga) { /* QVGA from ov7670.c by * Jonathan Corbet */ xstart = 164; xend = 28; ystart = 14; yend = 494; } else { /* VGA */ xstart = 158; xend = 14; ystart = 10; yend = 490; } /* OV7670 hardware window registers are split across * multiple locations */ i2c_w(sd, OV7670_R17_HSTART, xstart >> 3); i2c_w(sd, OV7670_R18_HSTOP, xend >> 3); v = i2c_r(sd, OV7670_R32_HREF); v = (v & 0xc0) | ((xend & 0x7) << 3) | (xstart & 0x07); msleep(10); /* need to sleep between read and write to * same reg! */ i2c_w(sd, OV7670_R32_HREF, v); i2c_w(sd, OV7670_R19_VSTART, ystart >> 2); i2c_w(sd, OV7670_R1A_VSTOP, yend >> 2); v = i2c_r(sd, OV7670_R03_VREF); v = (v & 0xc0) | ((yend & 0x3) << 2) | (ystart & 0x03); msleep(10); /* need to sleep between read and write to * same reg! */ i2c_w(sd, OV7670_R03_VREF, v); break; case SEN_OV6620: i2c_w_mask(sd, 0x14, qvga ? 0x20 : 0x00, 0x20); i2c_w_mask(sd, 0x13, 0x00, 0x20); /* Select 16 bit data bus */ i2c_w_mask(sd, 0x12, 0x04, 0x06); /* AWB: 1 Test pattern: 0 */ break; case SEN_OV6630: case SEN_OV66308AF: i2c_w_mask(sd, 0x14, qvga ? 0x20 : 0x00, 0x20); i2c_w_mask(sd, 0x12, 0x04, 0x06); /* AWB: 1 Test pattern: 0 */ break; case SEN_OV9600: { const struct ov_i2c_regvals *vals; static const struct ov_i2c_regvals sxga_15[] = { {0x11, 0x80}, {0x14, 0x3e}, {0x24, 0x85}, {0x25, 0x75} }; static const struct ov_i2c_regvals sxga_7_5[] = { {0x11, 0x81}, {0x14, 0x3e}, {0x24, 0x85}, {0x25, 0x75} }; static const struct ov_i2c_regvals vga_30[] = { {0x11, 0x81}, {0x14, 0x7e}, {0x24, 0x70}, {0x25, 0x60} }; static const struct ov_i2c_regvals vga_15[] = { {0x11, 0x83}, {0x14, 0x3e}, {0x24, 0x80}, {0x25, 0x70} }; /* frame rates: * 15fps / 7.5 fps for 1280x1024 * 30fps / 15fps for 640x480 */ i2c_w_mask(sd, 0x12, qvga ? 0x40 : 0x00, 0x40); if (qvga) vals = sd->frame_rate < 30 ? vga_15 : vga_30; else vals = sd->frame_rate < 15 ? sxga_7_5 : sxga_15; write_i2c_regvals(sd, vals, ARRAY_SIZE(sxga_15)); return; } default: return; } /******** Clock programming ********/ i2c_w(sd, 0x11, sd->clockdiv); } /* this function works for bridge ov519 and sensors ov7660 and ov7670 only */ static void sethvflip(struct gspca_dev *gspca_dev, s32 hflip, s32 vflip) { struct sd *sd = (struct sd *) gspca_dev; if (sd->gspca_dev.streaming) reg_w(sd, OV519_R51_RESET1, 0x0f); /* block stream */ i2c_w_mask(sd, OV7670_R1E_MVFP, OV7670_MVFP_MIRROR * hflip | OV7670_MVFP_VFLIP * vflip, OV7670_MVFP_MIRROR | OV7670_MVFP_VFLIP); if (sd->gspca_dev.streaming) reg_w(sd, OV519_R51_RESET1, 0x00); /* restart stream */ } static void set_ov_sensor_window(struct sd *sd) { struct gspca_dev *gspca_dev; int qvga, crop; int hwsbase, hwebase, vwsbase, vwebase, hwscale, vwscale; /* mode setup is fully handled in mode_init_ov_sensor_regs for these */ switch (sd->sensor) { case SEN_OV2610: case SEN_OV2610AE: case SEN_OV3610: case SEN_OV7670: case SEN_OV9600: mode_init_ov_sensor_regs(sd); return; case SEN_OV7660: ov519_set_mode(sd); ov519_set_fr(sd); return; } gspca_dev = &sd->gspca_dev; qvga = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv & 1; crop = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv & 2; /* The different sensor ICs handle setting up of window differently. * IF YOU SET IT WRONG, YOU WILL GET ALL ZERO ISOC DATA FROM OV51x!! */ switch (sd->sensor) { case SEN_OV8610: hwsbase = 0x1e; hwebase = 0x1e; vwsbase = 0x02; vwebase = 0x02; break; case SEN_OV7610: case SEN_OV76BE: hwsbase = 0x38; hwebase = 0x3a; vwsbase = vwebase = 0x05; break; case SEN_OV6620: case SEN_OV6630: case SEN_OV66308AF: hwsbase = 0x38; hwebase = 0x3a; vwsbase = 0x05; vwebase = 0x06; if (sd->sensor == SEN_OV66308AF && qvga) /* HDG: this fixes U and V getting swapped */ hwsbase++; if (crop) { hwsbase += 8; hwebase += 8; vwsbase += 11; vwebase += 11; } break; case SEN_OV7620: case SEN_OV7620AE: hwsbase = 0x2f; /* From 7620.SET (spec is wrong) */ hwebase = 0x2f; vwsbase = vwebase = 0x05; break; case SEN_OV7640: case SEN_OV7648: hwsbase = 0x1a; hwebase = 0x1a; vwsbase = vwebase = 0x03; break; default: return; } switch (sd->sensor) { case SEN_OV6620: case SEN_OV6630: case SEN_OV66308AF: if (qvga) { /* QCIF */ hwscale = 0; vwscale = 0; } else { /* CIF */ hwscale = 1; vwscale = 1; /* The datasheet says 0; * it's wrong */ } break; case SEN_OV8610: if (qvga) { /* QSVGA */ hwscale = 1; vwscale = 1; } else { /* SVGA */ hwscale = 2; vwscale = 2; } break; default: /* SEN_OV7xx0 */ if (qvga) { /* QVGA */ hwscale = 1; vwscale = 0; } else { /* VGA */ hwscale = 2; vwscale = 1; } } mode_init_ov_sensor_regs(sd); i2c_w(sd, 0x17, hwsbase); i2c_w(sd, 0x18, hwebase + (sd->sensor_width >> hwscale)); i2c_w(sd, 0x19, vwsbase); i2c_w(sd, 0x1a, vwebase + (sd->sensor_height >> vwscale)); } /* -- start the camera -- */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; /* Default for most bridges, allow bridge_mode_init_regs to override */ sd->sensor_width = sd->gspca_dev.pixfmt.width; sd->sensor_height = sd->gspca_dev.pixfmt.height; switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: ov511_mode_init_regs(sd); break; case BRIDGE_OV518: case BRIDGE_OV518PLUS: ov518_mode_init_regs(sd); break; case BRIDGE_OV519: ov519_mode_init_regs(sd); break; /* case BRIDGE_OVFX2: nothing to do */ case BRIDGE_W9968CF: w9968cf_mode_init_regs(sd); break; } set_ov_sensor_window(sd); /* Force clear snapshot state in case the snapshot button was pressed while we weren't streaming */ sd->snapshot_needs_reset = 1; sd_reset_snapshot(gspca_dev); sd->first_frame = 3; ov51x_restart(sd); ov51x_led_control(sd, 1); return gspca_dev->usb_err; } static void sd_stopN(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; ov51x_stop(sd); ov51x_led_control(sd, 0); } static void sd_stop0(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (!sd->gspca_dev.present) return; if (sd->bridge == BRIDGE_W9968CF) w9968cf_stop0(sd); #if IS_ENABLED(CONFIG_INPUT) /* If the last button state is pressed, release it now! */ if (sd->snapshot_pressed) { input_report_key(gspca_dev->input_dev, KEY_CAMERA, 0); input_sync(gspca_dev->input_dev); sd->snapshot_pressed = 0; } #endif if (sd->bridge == BRIDGE_OV519) reg_w(sd, OV519_R57_SNAPSHOT, 0x23); } static void ov51x_handle_button(struct gspca_dev *gspca_dev, u8 state) { struct sd *sd = (struct sd *) gspca_dev; if (sd->snapshot_pressed != state) { #if IS_ENABLED(CONFIG_INPUT) input_report_key(gspca_dev->input_dev, KEY_CAMERA, state); input_sync(gspca_dev->input_dev); #endif if (state) sd->snapshot_needs_reset = 1; sd->snapshot_pressed = state; } else { /* On the ov511 / ov519 we need to reset the button state multiple times, as resetting does not work as long as the button stays pressed */ switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: case BRIDGE_OV519: if (state) sd->snapshot_needs_reset = 1; break; } } } static void ov511_pkt_scan(struct gspca_dev *gspca_dev, u8 *in, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; /* SOF/EOF packets have 1st to 8th bytes zeroed and the 9th * byte non-zero. The EOF packet has image width/height in the * 10th and 11th bytes. The 9th byte is given as follows: * * bit 7: EOF * 6: compression enabled * 5: 422/420/400 modes * 4: 422/420/400 modes * 3: 1 * 2: snapshot button on * 1: snapshot frame * 0: even/odd field */ if (!(in[0] | in[1] | in[2] | in[3] | in[4] | in[5] | in[6] | in[7]) && (in[8] & 0x08)) { ov51x_handle_button(gspca_dev, (in[8] >> 2) & 1); if (in[8] & 0x80) { /* Frame end */ if ((in[9] + 1) * 8 != gspca_dev->pixfmt.width || (in[10] + 1) * 8 != gspca_dev->pixfmt.height) { gspca_err(gspca_dev, "Invalid frame size, got: %dx%d, requested: %dx%d\n", (in[9] + 1) * 8, (in[10] + 1) * 8, gspca_dev->pixfmt.width, gspca_dev->pixfmt.height); gspca_dev->last_packet_type = DISCARD_PACKET; return; } /* Add 11 byte footer to frame, might be useful */ gspca_frame_add(gspca_dev, LAST_PACKET, in, 11); return; } else { /* Frame start */ gspca_frame_add(gspca_dev, FIRST_PACKET, in, 0); sd->packet_nr = 0; } } /* Ignore the packet number */ len--; /* intermediate packet */ gspca_frame_add(gspca_dev, INTER_PACKET, in, len); } static void ov518_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; /* A false positive here is likely, until OVT gives me * the definitive SOF/EOF format */ if ((!(data[0] | data[1] | data[2] | data[3] | data[5])) && data[6]) { ov51x_handle_button(gspca_dev, (data[6] >> 1) & 1); gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); gspca_frame_add(gspca_dev, FIRST_PACKET, NULL, 0); sd->packet_nr = 0; } if (gspca_dev->last_packet_type == DISCARD_PACKET) return; /* Does this device use packet numbers ? */ if (len & 7) { len--; if (sd->packet_nr == data[len]) sd->packet_nr++; /* The last few packets of the frame (which are all 0's except that they may contain part of the footer), are numbered 0 */ else if (sd->packet_nr == 0 || data[len]) { gspca_err(gspca_dev, "Invalid packet nr: %d (expect: %d)\n", (int)data[len], (int)sd->packet_nr); gspca_dev->last_packet_type = DISCARD_PACKET; return; } } /* intermediate packet */ gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static void ov519_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { /* Header of ov519 is 16 bytes: * Byte Value Description * 0 0xff magic * 1 0xff magic * 2 0xff magic * 3 0xXX 0x50 = SOF, 0x51 = EOF * 9 0xXX 0x01 initial frame without data, * 0x00 standard frame with image * 14 Lo in EOF: length of image data / 8 * 15 Hi */ if (data[0] == 0xff && data[1] == 0xff && data[2] == 0xff) { switch (data[3]) { case 0x50: /* start of frame */ /* Don't check the button state here, as the state usually (always ?) changes at EOF and checking it here leads to unnecessary snapshot state resets. */ #define HDRSZ 16 data += HDRSZ; len -= HDRSZ; #undef HDRSZ if (data[0] == 0xff || data[1] == 0xd8) gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); else gspca_dev->last_packet_type = DISCARD_PACKET; return; case 0x51: /* end of frame */ ov51x_handle_button(gspca_dev, data[11] & 1); if (data[9] != 0) gspca_dev->last_packet_type = DISCARD_PACKET; gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); return; } } /* intermediate packet */ gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static void ovfx2_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; gspca_frame_add(gspca_dev, INTER_PACKET, data, len); /* A short read signals EOF */ if (len < gspca_dev->cam.bulk_size) { /* If the frame is short, and it is one of the first ones the sensor and bridge are still syncing, so drop it. */ if (sd->first_frame) { sd->first_frame--; if (gspca_dev->image_len < sd->gspca_dev.pixfmt.width * sd->gspca_dev.pixfmt.height) gspca_dev->last_packet_type = DISCARD_PACKET; } gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); gspca_frame_add(gspca_dev, FIRST_PACKET, NULL, 0); } } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; switch (sd->bridge) { case BRIDGE_OV511: case BRIDGE_OV511PLUS: ov511_pkt_scan(gspca_dev, data, len); break; case BRIDGE_OV518: case BRIDGE_OV518PLUS: ov518_pkt_scan(gspca_dev, data, len); break; case BRIDGE_OV519: ov519_pkt_scan(gspca_dev, data, len); break; case BRIDGE_OVFX2: ovfx2_pkt_scan(gspca_dev, data, len); break; case BRIDGE_W9968CF: w9968cf_pkt_scan(gspca_dev, data, len); break; } } /* -- management routines -- */ static void setbrightness(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; static const struct ov_i2c_regvals brit_7660[][7] = { {{0x0f, 0x6a}, {0x24, 0x40}, {0x25, 0x2b}, {0x26, 0x90}, {0x27, 0xe0}, {0x28, 0xe0}, {0x2c, 0xe0}}, {{0x0f, 0x6a}, {0x24, 0x50}, {0x25, 0x40}, {0x26, 0xa1}, {0x27, 0xc0}, {0x28, 0xc0}, {0x2c, 0xc0}}, {{0x0f, 0x6a}, {0x24, 0x68}, {0x25, 0x58}, {0x26, 0xc2}, {0x27, 0xa0}, {0x28, 0xa0}, {0x2c, 0xa0}}, {{0x0f, 0x6a}, {0x24, 0x70}, {0x25, 0x68}, {0x26, 0xd3}, {0x27, 0x80}, {0x28, 0x80}, {0x2c, 0x80}}, {{0x0f, 0x6a}, {0x24, 0x80}, {0x25, 0x70}, {0x26, 0xd3}, {0x27, 0x20}, {0x28, 0x20}, {0x2c, 0x20}}, {{0x0f, 0x6a}, {0x24, 0x88}, {0x25, 0x78}, {0x26, 0xd3}, {0x27, 0x40}, {0x28, 0x40}, {0x2c, 0x40}}, {{0x0f, 0x6a}, {0x24, 0x90}, {0x25, 0x80}, {0x26, 0xd4}, {0x27, 0x60}, {0x28, 0x60}, {0x2c, 0x60}} }; switch (sd->sensor) { case SEN_OV8610: case SEN_OV7610: case SEN_OV76BE: case SEN_OV6620: case SEN_OV6630: case SEN_OV66308AF: case SEN_OV7640: case SEN_OV7648: i2c_w(sd, OV7610_REG_BRT, val); break; case SEN_OV7620: case SEN_OV7620AE: i2c_w(sd, OV7610_REG_BRT, val); break; case SEN_OV7660: write_i2c_regvals(sd, brit_7660[val], ARRAY_SIZE(brit_7660[0])); break; case SEN_OV7670: /*win trace * i2c_w_mask(sd, OV7670_R13_COM8, 0, OV7670_COM8_AEC); */ i2c_w(sd, OV7670_R55_BRIGHT, ov7670_abs_to_sm(val)); break; } } static void setcontrast(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; static const struct ov_i2c_regvals contrast_7660[][31] = { {{0x6c, 0xf0}, {0x6d, 0xf0}, {0x6e, 0xf8}, {0x6f, 0xa0}, {0x70, 0x58}, {0x71, 0x38}, {0x72, 0x30}, {0x73, 0x30}, {0x74, 0x28}, {0x75, 0x28}, {0x76, 0x24}, {0x77, 0x24}, {0x78, 0x22}, {0x79, 0x28}, {0x7a, 0x2a}, {0x7b, 0x34}, {0x7c, 0x0f}, {0x7d, 0x1e}, {0x7e, 0x3d}, {0x7f, 0x65}, {0x80, 0x70}, {0x81, 0x77}, {0x82, 0x7d}, {0x83, 0x83}, {0x84, 0x88}, {0x85, 0x8d}, {0x86, 0x96}, {0x87, 0x9f}, {0x88, 0xb0}, {0x89, 0xc4}, {0x8a, 0xd9}}, {{0x6c, 0xf0}, {0x6d, 0xf0}, {0x6e, 0xf8}, {0x6f, 0x94}, {0x70, 0x58}, {0x71, 0x40}, {0x72, 0x30}, {0x73, 0x30}, {0x74, 0x30}, {0x75, 0x30}, {0x76, 0x2c}, {0x77, 0x24}, {0x78, 0x22}, {0x79, 0x28}, {0x7a, 0x2a}, {0x7b, 0x31}, {0x7c, 0x0f}, {0x7d, 0x1e}, {0x7e, 0x3d}, {0x7f, 0x62}, {0x80, 0x6d}, {0x81, 0x75}, {0x82, 0x7b}, {0x83, 0x81}, {0x84, 0x87}, {0x85, 0x8d}, {0x86, 0x98}, {0x87, 0xa1}, {0x88, 0xb2}, {0x89, 0xc6}, {0x8a, 0xdb}}, {{0x6c, 0xf0}, {0x6d, 0xf0}, {0x6e, 0xf0}, {0x6f, 0x84}, {0x70, 0x58}, {0x71, 0x48}, {0x72, 0x40}, {0x73, 0x40}, {0x74, 0x28}, {0x75, 0x28}, {0x76, 0x28}, {0x77, 0x24}, {0x78, 0x26}, {0x79, 0x28}, {0x7a, 0x28}, {0x7b, 0x34}, {0x7c, 0x0f}, {0x7d, 0x1e}, {0x7e, 0x3c}, {0x7f, 0x5d}, {0x80, 0x68}, {0x81, 0x71}, {0x82, 0x79}, {0x83, 0x81}, {0x84, 0x86}, {0x85, 0x8b}, {0x86, 0x95}, {0x87, 0x9e}, {0x88, 0xb1}, {0x89, 0xc5}, {0x8a, 0xd9}}, {{0x6c, 0xf0}, {0x6d, 0xf0}, {0x6e, 0xf0}, {0x6f, 0x70}, {0x70, 0x58}, {0x71, 0x58}, {0x72, 0x48}, {0x73, 0x48}, {0x74, 0x38}, {0x75, 0x40}, {0x76, 0x34}, {0x77, 0x34}, {0x78, 0x2e}, {0x79, 0x28}, {0x7a, 0x24}, {0x7b, 0x22}, {0x7c, 0x0f}, {0x7d, 0x1e}, {0x7e, 0x3c}, {0x7f, 0x58}, {0x80, 0x63}, {0x81, 0x6e}, {0x82, 0x77}, {0x83, 0x80}, {0x84, 0x87}, {0x85, 0x8f}, {0x86, 0x9c}, {0x87, 0xa9}, {0x88, 0xc0}, {0x89, 0xd4}, {0x8a, 0xe6}}, {{0x6c, 0xa0}, {0x6d, 0xf0}, {0x6e, 0x90}, {0x6f, 0x80}, {0x70, 0x70}, {0x71, 0x80}, {0x72, 0x60}, {0x73, 0x60}, {0x74, 0x58}, {0x75, 0x60}, {0x76, 0x4c}, {0x77, 0x38}, {0x78, 0x38}, {0x79, 0x2a}, {0x7a, 0x20}, {0x7b, 0x0e}, {0x7c, 0x0a}, {0x7d, 0x14}, {0x7e, 0x26}, {0x7f, 0x46}, {0x80, 0x54}, {0x81, 0x64}, {0x82, 0x70}, {0x83, 0x7c}, {0x84, 0x87}, {0x85, 0x93}, {0x86, 0xa6}, {0x87, 0xb4}, {0x88, 0xd0}, {0x89, 0xe5}, {0x8a, 0xf5}}, {{0x6c, 0x60}, {0x6d, 0x80}, {0x6e, 0x60}, {0x6f, 0x80}, {0x70, 0x80}, {0x71, 0x80}, {0x72, 0x88}, {0x73, 0x30}, {0x74, 0x70}, {0x75, 0x68}, {0x76, 0x64}, {0x77, 0x50}, {0x78, 0x3c}, {0x79, 0x22}, {0x7a, 0x10}, {0x7b, 0x08}, {0x7c, 0x06}, {0x7d, 0x0e}, {0x7e, 0x1a}, {0x7f, 0x3a}, {0x80, 0x4a}, {0x81, 0x5a}, {0x82, 0x6b}, {0x83, 0x7b}, {0x84, 0x89}, {0x85, 0x96}, {0x86, 0xaf}, {0x87, 0xc3}, {0x88, 0xe1}, {0x89, 0xf2}, {0x8a, 0xfa}}, {{0x6c, 0x20}, {0x6d, 0x40}, {0x6e, 0x20}, {0x6f, 0x60}, {0x70, 0x88}, {0x71, 0xc8}, {0x72, 0xc0}, {0x73, 0xb8}, {0x74, 0xa8}, {0x75, 0xb8}, {0x76, 0x80}, {0x77, 0x5c}, {0x78, 0x26}, {0x79, 0x10}, {0x7a, 0x08}, {0x7b, 0x04}, {0x7c, 0x02}, {0x7d, 0x06}, {0x7e, 0x0a}, {0x7f, 0x22}, {0x80, 0x33}, {0x81, 0x4c}, {0x82, 0x64}, {0x83, 0x7b}, {0x84, 0x90}, {0x85, 0xa7}, {0x86, 0xc7}, {0x87, 0xde}, {0x88, 0xf1}, {0x89, 0xf9}, {0x8a, 0xfd}}, }; switch (sd->sensor) { case SEN_OV7610: case SEN_OV6620: i2c_w(sd, OV7610_REG_CNT, val); break; case SEN_OV6630: case SEN_OV66308AF: i2c_w_mask(sd, OV7610_REG_CNT, val >> 4, 0x0f); break; case SEN_OV8610: { static const u8 ctab[] = { 0x03, 0x09, 0x0b, 0x0f, 0x53, 0x6f, 0x35, 0x7f }; /* Use Y gamma control instead. Bit 0 enables it. */ i2c_w(sd, 0x64, ctab[val >> 5]); break; } case SEN_OV7620: case SEN_OV7620AE: { static const u8 ctab[] = { 0x01, 0x05, 0x09, 0x11, 0x15, 0x35, 0x37, 0x57, 0x5b, 0xa5, 0xa7, 0xc7, 0xc9, 0xcf, 0xef, 0xff }; /* Use Y gamma control instead. Bit 0 enables it. */ i2c_w(sd, 0x64, ctab[val >> 4]); break; } case SEN_OV7660: write_i2c_regvals(sd, contrast_7660[val], ARRAY_SIZE(contrast_7660[0])); break; case SEN_OV7670: /* check that this isn't just the same as ov7610 */ i2c_w(sd, OV7670_R56_CONTRAS, val >> 1); break; } } static void setexposure(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; i2c_w(sd, 0x10, val); } static void setcolors(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; static const struct ov_i2c_regvals colors_7660[][6] = { {{0x4f, 0x28}, {0x50, 0x2a}, {0x51, 0x02}, {0x52, 0x0a}, {0x53, 0x19}, {0x54, 0x23}}, {{0x4f, 0x47}, {0x50, 0x4a}, {0x51, 0x03}, {0x52, 0x11}, {0x53, 0x2c}, {0x54, 0x3e}}, {{0x4f, 0x66}, {0x50, 0x6b}, {0x51, 0x05}, {0x52, 0x19}, {0x53, 0x40}, {0x54, 0x59}}, {{0x4f, 0x84}, {0x50, 0x8b}, {0x51, 0x06}, {0x52, 0x20}, {0x53, 0x53}, {0x54, 0x73}}, {{0x4f, 0xa3}, {0x50, 0xab}, {0x51, 0x08}, {0x52, 0x28}, {0x53, 0x66}, {0x54, 0x8e}}, }; switch (sd->sensor) { case SEN_OV8610: case SEN_OV7610: case SEN_OV76BE: case SEN_OV6620: case SEN_OV6630: case SEN_OV66308AF: i2c_w(sd, OV7610_REG_SAT, val); break; case SEN_OV7620: case SEN_OV7620AE: /* Use UV gamma control instead. Bits 0 & 7 are reserved. */ /* rc = ov_i2c_write(sd->dev, 0x62, (val >> 9) & 0x7e); if (rc < 0) goto out; */ i2c_w(sd, OV7610_REG_SAT, val); break; case SEN_OV7640: case SEN_OV7648: i2c_w(sd, OV7610_REG_SAT, val & 0xf0); break; case SEN_OV7660: write_i2c_regvals(sd, colors_7660[val], ARRAY_SIZE(colors_7660[0])); break; case SEN_OV7670: /* supported later once I work out how to do it * transparently fail now! */ /* set REG_COM13 values for UV sat auto mode */ break; } } static void setautobright(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; i2c_w_mask(sd, 0x2d, val ? 0x10 : 0x00, 0x10); } static void setfreq_i(struct sd *sd, s32 val) { if (sd->sensor == SEN_OV7660 || sd->sensor == SEN_OV7670) { switch (val) { case 0: /* Banding filter disabled */ i2c_w_mask(sd, OV7670_R13_COM8, 0, OV7670_COM8_BFILT); break; case 1: /* 50 hz */ i2c_w_mask(sd, OV7670_R13_COM8, OV7670_COM8_BFILT, OV7670_COM8_BFILT); i2c_w_mask(sd, OV7670_R3B_COM11, 0x08, 0x18); break; case 2: /* 60 hz */ i2c_w_mask(sd, OV7670_R13_COM8, OV7670_COM8_BFILT, OV7670_COM8_BFILT); i2c_w_mask(sd, OV7670_R3B_COM11, 0x00, 0x18); break; case 3: /* Auto hz - ov7670 only */ i2c_w_mask(sd, OV7670_R13_COM8, OV7670_COM8_BFILT, OV7670_COM8_BFILT); i2c_w_mask(sd, OV7670_R3B_COM11, OV7670_COM11_HZAUTO, 0x18); break; } } else { switch (val) { case 0: /* Banding filter disabled */ i2c_w_mask(sd, 0x2d, 0x00, 0x04); i2c_w_mask(sd, 0x2a, 0x00, 0x80); break; case 1: /* 50 hz (filter on and framerate adj) */ i2c_w_mask(sd, 0x2d, 0x04, 0x04); i2c_w_mask(sd, 0x2a, 0x80, 0x80); /* 20 fps -> 16.667 fps */ if (sd->sensor == SEN_OV6620 || sd->sensor == SEN_OV6630 || sd->sensor == SEN_OV66308AF) i2c_w(sd, 0x2b, 0x5e); else i2c_w(sd, 0x2b, 0xac); break; case 2: /* 60 hz (filter on, ...) */ i2c_w_mask(sd, 0x2d, 0x04, 0x04); if (sd->sensor == SEN_OV6620 || sd->sensor == SEN_OV6630 || sd->sensor == SEN_OV66308AF) { /* 20 fps -> 15 fps */ i2c_w_mask(sd, 0x2a, 0x80, 0x80); i2c_w(sd, 0x2b, 0xa8); } else { /* no framerate adj. */ i2c_w_mask(sd, 0x2a, 0x00, 0x80); } break; } } } static void setfreq(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; setfreq_i(sd, val); /* Ugly but necessary */ if (sd->bridge == BRIDGE_W9968CF) w9968cf_set_crop_window(sd); } static int sd_get_jcomp(struct gspca_dev *gspca_dev, struct v4l2_jpegcompression *jcomp) { struct sd *sd = (struct sd *) gspca_dev; if (sd->bridge != BRIDGE_W9968CF) return -ENOTTY; memset(jcomp, 0, sizeof *jcomp); jcomp->quality = v4l2_ctrl_g_ctrl(sd->jpegqual); jcomp->jpeg_markers = V4L2_JPEG_MARKER_DHT | V4L2_JPEG_MARKER_DQT | V4L2_JPEG_MARKER_DRI; return 0; } static int sd_set_jcomp(struct gspca_dev *gspca_dev, const struct v4l2_jpegcompression *jcomp) { struct sd *sd = (struct sd *) gspca_dev; if (sd->bridge != BRIDGE_W9968CF) return -ENOTTY; v4l2_ctrl_s_ctrl(sd->jpegqual, jcomp->quality); return 0; } static int sd_g_volatile_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; gspca_dev->usb_err = 0; switch (ctrl->id) { case V4L2_CID_AUTOGAIN: gspca_dev->exposure->val = i2c_r(sd, 0x10); break; } return 0; } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; case V4L2_CID_CONTRAST: setcontrast(gspca_dev, ctrl->val); break; case V4L2_CID_POWER_LINE_FREQUENCY: setfreq(gspca_dev, ctrl->val); break; case V4L2_CID_AUTOBRIGHTNESS: if (ctrl->is_new) setautobright(gspca_dev, ctrl->val); if (!ctrl->val && sd->brightness->is_new) setbrightness(gspca_dev, sd->brightness->val); break; case V4L2_CID_SATURATION: setcolors(gspca_dev, ctrl->val); break; case V4L2_CID_HFLIP: sethvflip(gspca_dev, ctrl->val, sd->vflip->val); break; case V4L2_CID_AUTOGAIN: if (ctrl->is_new) setautogain(gspca_dev, ctrl->val); if (!ctrl->val && gspca_dev->exposure->is_new) setexposure(gspca_dev, gspca_dev->exposure->val); break; case V4L2_CID_JPEG_COMPRESSION_QUALITY: return -EBUSY; /* Should never happen, as we grab the ctrl */ } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .g_volatile_ctrl = sd_g_volatile_ctrl, .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *)gspca_dev; struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 10); if (valid_controls[sd->sensor].has_brightness) sd->brightness = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, sd->sensor == SEN_OV7660 ? 6 : 255, 1, sd->sensor == SEN_OV7660 ? 3 : 127); if (valid_controls[sd->sensor].has_contrast) { if (sd->sensor == SEN_OV7660) v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 6, 1, 3); else v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 255, 1, (sd->sensor == SEN_OV6630 || sd->sensor == SEN_OV66308AF) ? 200 : 127); } if (valid_controls[sd->sensor].has_sat) v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, sd->sensor == SEN_OV7660 ? 4 : 255, 1, sd->sensor == SEN_OV7660 ? 2 : 127); if (valid_controls[sd->sensor].has_exposure) gspca_dev->exposure = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_EXPOSURE, 0, 255, 1, 127); if (valid_controls[sd->sensor].has_hvflip) { sd->hflip = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_HFLIP, 0, 1, 1, 0); sd->vflip = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_VFLIP, 0, 1, 1, 0); } if (valid_controls[sd->sensor].has_autobright) sd->autobright = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_AUTOBRIGHTNESS, 0, 1, 1, 1); if (valid_controls[sd->sensor].has_autogain) gspca_dev->autogain = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); if (valid_controls[sd->sensor].has_freq) { if (sd->sensor == SEN_OV7670) sd->freq = v4l2_ctrl_new_std_menu(hdl, &sd_ctrl_ops, V4L2_CID_POWER_LINE_FREQUENCY, V4L2_CID_POWER_LINE_FREQUENCY_AUTO, 0, V4L2_CID_POWER_LINE_FREQUENCY_AUTO); else sd->freq = v4l2_ctrl_new_std_menu(hdl, &sd_ctrl_ops, V4L2_CID_POWER_LINE_FREQUENCY, V4L2_CID_POWER_LINE_FREQUENCY_60HZ, 0, 0); } if (sd->bridge == BRIDGE_W9968CF) sd->jpegqual = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_JPEG_COMPRESSION_QUALITY, QUALITY_MIN, QUALITY_MAX, 1, QUALITY_DEF); if (hdl->error) { gspca_err(gspca_dev, "Could not initialize controls\n"); return hdl->error; } if (gspca_dev->autogain) v4l2_ctrl_auto_cluster(3, &gspca_dev->autogain, 0, true); if (sd->autobright) v4l2_ctrl_auto_cluster(2, &sd->autobright, 0, false); if (sd->hflip) v4l2_ctrl_cluster(2, &sd->hflip); return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .isoc_init = sd_isoc_init, .start = sd_start, .stopN = sd_stopN, .stop0 = sd_stop0, .pkt_scan = sd_pkt_scan, .dq_callback = sd_reset_snapshot, .get_jcomp = sd_get_jcomp, .set_jcomp = sd_set_jcomp, #if IS_ENABLED(CONFIG_INPUT) .other_input = 1, #endif }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x041e, 0x4003), .driver_info = BRIDGE_W9968CF }, {USB_DEVICE(0x041e, 0x4052), .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED }, {USB_DEVICE(0x041e, 0x405f), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x041e, 0x4060), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x041e, 0x4061), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x041e, 0x4064), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x041e, 0x4067), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x041e, 0x4068), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x045e, 0x028c), .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED }, {USB_DEVICE(0x054c, 0x0154), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x054c, 0x0155), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x05a9, 0x0511), .driver_info = BRIDGE_OV511 }, {USB_DEVICE(0x05a9, 0x0518), .driver_info = BRIDGE_OV518 }, {USB_DEVICE(0x05a9, 0x0519), .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED }, {USB_DEVICE(0x05a9, 0x0530), .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED }, {USB_DEVICE(0x05a9, 0x2800), .driver_info = BRIDGE_OVFX2 }, {USB_DEVICE(0x05a9, 0x4519), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x05a9, 0x8519), .driver_info = BRIDGE_OV519 }, {USB_DEVICE(0x05a9, 0xa511), .driver_info = BRIDGE_OV511PLUS }, {USB_DEVICE(0x05a9, 0xa518), .driver_info = BRIDGE_OV518PLUS }, {USB_DEVICE(0x0813, 0x0002), .driver_info = BRIDGE_OV511PLUS }, {USB_DEVICE(0x0b62, 0x0059), .driver_info = BRIDGE_OVFX2 }, {USB_DEVICE(0x0e96, 0xc001), .driver_info = BRIDGE_OVFX2 }, {USB_DEVICE(0x1046, 0x9967), .driver_info = BRIDGE_W9968CF }, {USB_DEVICE(0x8020, 0xef04), .driver_info = BRIDGE_OVFX2 }, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); module_param(frame_rate, int, 0644); MODULE_PARM_DESC(frame_rate, "Frame rate (5, 10, 15, 20 or 30 fps)");
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 // SPDX-License-Identifier: GPL-2.0+ /* * c6xdigio.c * Hardware driver for Mechatronic Systems Inc. C6x_DIGIO DSP daughter card. * http://web.archive.org/web/%2A/http://robot0.ge.uiuc.edu/~spong/mecha/ * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 1999 Dan Block */ /* * Driver: c6xdigio * Description: Mechatronic Systems Inc. C6x_DIGIO DSP daughter card * Author: Dan Block * Status: unknown * Devices: [Mechatronic Systems Inc.] C6x_DIGIO DSP daughter card (c6xdigio) * Updated: Sun Nov 20 20:18:34 EST 2005 * * Configuration Options: * [0] - base address */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/timex.h> #include <linux/timer.h> #include <linux/io.h> #include <linux/pnp.h> #include <linux/comedi/comedidev.h> /* * Register I/O map */ #define C6XDIGIO_DATA_REG 0x00 #define C6XDIGIO_DATA_CHAN(x) (((x) + 1) << 4) #define C6XDIGIO_DATA_PWM BIT(5) #define C6XDIGIO_DATA_ENCODER BIT(6) #define C6XDIGIO_STATUS_REG 0x01 #define C6XDIGIO_CTRL_REG 0x02 #define C6XDIGIO_TIME_OUT 20 static int c6xdigio_chk_status(struct comedi_device *dev, unsigned long context) { unsigned int status; int timeout = 0; do { status = inb(dev->iobase + C6XDIGIO_STATUS_REG); if ((status & 0x80) != context) return 0; timeout++; } while (timeout < C6XDIGIO_TIME_OUT); return -EBUSY; } static int c6xdigio_write_data(struct comedi_device *dev, unsigned int val, unsigned int status) { outb_p(val, dev->iobase + C6XDIGIO_DATA_REG); return c6xdigio_chk_status(dev, status); } static int c6xdigio_get_encoder_bits(struct comedi_device *dev, unsigned int *bits, unsigned int cmd, unsigned int status) { unsigned int val; val = inb(dev->iobase + C6XDIGIO_STATUS_REG); val >>= 3; val &= 0x07; *bits = val; return c6xdigio_write_data(dev, cmd, status); } static void c6xdigio_pwm_write(struct comedi_device *dev, unsigned int chan, unsigned int val) { unsigned int cmd = C6XDIGIO_DATA_PWM | C6XDIGIO_DATA_CHAN(chan); unsigned int bits; if (val > 498) val = 498; if (val < 2) val = 2; bits = (val >> 0) & 0x03; c6xdigio_write_data(dev, cmd | bits | (0 << 2), 0x00); bits = (val >> 2) & 0x03; c6xdigio_write_data(dev, cmd | bits | (1 << 2), 0x80); bits = (val >> 4) & 0x03; c6xdigio_write_data(dev, cmd | bits | (0 << 2), 0x00); bits = (val >> 6) & 0x03; c6xdigio_write_data(dev, cmd | bits | (1 << 2), 0x80); bits = (val >> 8) & 0x03; c6xdigio_write_data(dev, cmd | bits | (0 << 2), 0x00); c6xdigio_write_data(dev, 0x00, 0x80); } static int c6xdigio_encoder_read(struct comedi_device *dev, unsigned int chan) { unsigned int cmd = C6XDIGIO_DATA_ENCODER | C6XDIGIO_DATA_CHAN(chan); unsigned int val = 0; unsigned int bits; c6xdigio_write_data(dev, cmd, 0x00); c6xdigio_get_encoder_bits(dev, &bits, cmd | (1 << 2), 0x80); val |= (bits << 0); c6xdigio_get_encoder_bits(dev, &bits, cmd | (0 << 2), 0x00); val |= (bits << 3); c6xdigio_get_encoder_bits(dev, &bits, cmd | (1 << 2), 0x80); val |= (bits << 6); c6xdigio_get_encoder_bits(dev, &bits, cmd | (0 << 2), 0x00); val |= (bits << 9); c6xdigio_get_encoder_bits(dev, &bits, cmd | (1 << 2), 0x80); val |= (bits << 12); c6xdigio_get_encoder_bits(dev, &bits, cmd | (0 << 2), 0x00); val |= (bits << 15); c6xdigio_get_encoder_bits(dev, &bits, cmd | (1 << 2), 0x80); val |= (bits << 18); c6xdigio_get_encoder_bits(dev, &bits, cmd | (0 << 2), 0x00); val |= (bits << 21); c6xdigio_write_data(dev, 0x00, 0x80); return val; } static int c6xdigio_pwm_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); unsigned int val = (s->state >> (16 * chan)) & 0xffff; int i; for (i = 0; i < insn->n; i++) { val = data[i]; c6xdigio_pwm_write(dev, chan, val); } /* * There are only 2 PWM channels and they have a maxdata of 500. * Instead of allocating private data to save the values in for * readback this driver just packs the values for the two channels * in the s->state. */ s->state &= (0xffff << (16 * chan)); s->state |= (val << (16 * chan)); return insn->n; } static int c6xdigio_pwm_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); unsigned int val; int i; val = (s->state >> (16 * chan)) & 0xffff; for (i = 0; i < insn->n; i++) data[i] = val; return insn->n; } static int c6xdigio_encoder_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); unsigned int val; int i; for (i = 0; i < insn->n; i++) { val = c6xdigio_encoder_read(dev, chan); /* munge two's complement value to offset binary */ data[i] = comedi_offset_munge(s, val); } return insn->n; } static void c6xdigio_init(struct comedi_device *dev) { /* Initialize the PWM */ c6xdigio_write_data(dev, 0x70, 0x00); c6xdigio_write_data(dev, 0x74, 0x80); c6xdigio_write_data(dev, 0x70, 0x00); c6xdigio_write_data(dev, 0x00, 0x80); /* Reset the encoders */ c6xdigio_write_data(dev, 0x68, 0x00); c6xdigio_write_data(dev, 0x6c, 0x80); c6xdigio_write_data(dev, 0x68, 0x00); c6xdigio_write_data(dev, 0x00, 0x80); } static const struct pnp_device_id c6xdigio_pnp_tbl[] = { /* Standard LPT Printer Port */ {.id = "PNP0400", .driver_data = 0}, /* ECP Printer Port */ {.id = "PNP0401", .driver_data = 0}, {} }; static struct pnp_driver c6xdigio_pnp_driver = { .name = "c6xdigio", .id_table = c6xdigio_pnp_tbl, }; static int c6xdigio_attach(struct comedi_device *dev, struct comedi_devconfig *it) { struct comedi_subdevice *s; int ret; ret = comedi_request_region(dev, it->options[0], 0x03); if (ret) return ret; ret = comedi_alloc_subdevices(dev, 2); if (ret) return ret; s = &dev->subdevices[0]; /* pwm output subdevice */ s->type = COMEDI_SUBD_PWM; s->subdev_flags = SDF_WRITABLE; s->n_chan = 2; s->maxdata = 500; s->range_table = &range_unknown; s->insn_write = c6xdigio_pwm_insn_write; s->insn_read = c6xdigio_pwm_insn_read; s = &dev->subdevices[1]; /* encoder (counter) subdevice */ s->type = COMEDI_SUBD_COUNTER; s->subdev_flags = SDF_READABLE | SDF_LSAMPL; s->n_chan = 2; s->maxdata = 0xffffff; s->range_table = &range_unknown; s->insn_read = c6xdigio_encoder_insn_read; /* I will call this init anyway but more than likely the DSP board */ /* will not be connected when device driver is loaded. */ c6xdigio_init(dev); return 0; } static struct comedi_driver c6xdigio_driver = { .driver_name = "c6xdigio", .module = THIS_MODULE, .attach = c6xdigio_attach, .detach = comedi_legacy_detach, }; static bool c6xdigio_pnp_registered = false; static int __init c6xdigio_module_init(void) { int ret; ret = comedi_driver_register(&c6xdigio_driver); if (ret) return ret; if (IS_ENABLED(CONFIG_PNP)) { /* Try to activate the PnP ports */ ret = pnp_register_driver(&c6xdigio_pnp_driver); if (ret) { pr_warn("failed to register pnp driver - err %d\n", ret); ret = 0; /* ignore the error. */ } else { c6xdigio_pnp_registered = true; } } return 0; } module_init(c6xdigio_module_init); static void __exit c6xdigio_module_exit(void) { if (c6xdigio_pnp_registered) pnp_unregister_driver(&c6xdigio_pnp_driver); comedi_driver_unregister(&c6xdigio_driver); } module_exit(c6xdigio_module_exit); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi driver for the C6x_DIGIO DSP daughter card"); MODULE_LICENSE("GPL");
76 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2008 IBM Corporation * * Authors: * Mimi Zohar <zohar@us.ibm.com> * * File: integrity_iint.c * - initialize the integrity directory in securityfs * - load IMA and EVM keys */ #include <linux/security.h> #include "integrity.h" struct dentry *integrity_dir; /* * integrity_kernel_read - read data from the file * * This is a function for reading file content instead of kernel_read(). * It does not perform locking checks to ensure it cannot be blocked. * It does not perform security checks because it is irrelevant for IMA. * */ int integrity_kernel_read(struct file *file, loff_t offset, void *addr, unsigned long count) { return __kernel_read(file, addr, count, &offset); } /* * integrity_load_keys - load integrity keys hook * * Hooks is called from init/main.c:kernel_init_freeable() * when rootfs is ready */ void __init integrity_load_keys(void) { ima_load_x509(); if (!IS_ENABLED(CONFIG_IMA_LOAD_X509)) evm_load_x509(); } int __init integrity_fs_init(void) { if (integrity_dir) return 0; integrity_dir = securityfs_create_dir("integrity", NULL); if (IS_ERR(integrity_dir)) { int ret = PTR_ERR(integrity_dir); if (ret != -ENODEV) pr_err("Unable to create integrity sysfs dir: %d\n", ret); integrity_dir = NULL; return ret; } return 0; } void __init integrity_fs_fini(void) { if (!integrity_dir || !simple_empty(integrity_dir)) return; securityfs_remove(integrity_dir); integrity_dir = NULL; }
7 21 9 1900 6 1891 13 1902 14 11 19 17 1 4 15 4 1 1 1 9 6 2 1 1 1 5 6 3 3 17 17 14 1 15 14 1 1 98 109 95 229 12 226 82 80 109 12 82 82 82 22 22 29 6 4 2 1 4 71 59 2 70 62 10 62 72 69 3 3 35 5 90 89 90 72 942 944 206 943 940 619 928 1 941 2 941 945 204 433 923 1 943 208 942 7 7 7 1 2 2 2 3 4 4 31 103 101 4 3 3 86 4 1 3 83 84 29 80 36 23 20 23 16 7 1 1 1 2 1 3 7 23 2 17 4 5 5 5 7 7 4 4 7 4 5 2 6 4 5 7 4 4 1 1 1544 1552 942 940 17 10 11 2 9 4 1 3 1 2 1 1 1 4 3 1 3 4 3 3 2 1 1 3 61 182 56 56 9 1 1 666 667 6 670 665 128 18 116 4 4 2154 141 142 141 137 7 142 137 138 138 138 137 138 141 7 138 30 107 138 142 142 104 104 1453 2 1449 1447 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 // SPDX-License-Identifier: GPL-2.0-only /* * fs/libfs.c * Library for filesystems writers. */ #include <linux/blkdev.h> #include <linux/export.h> #include <linux/filelock.h> #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/cred.h> #include <linux/mount.h> #include <linux/vfs.h> #include <linux/quotaops.h> #include <linux/mutex.h> #include <linux/namei.h> #include <linux/exportfs.h> #include <linux/iversion.h> #include <linux/writeback.h> #include <linux/buffer_head.h> /* sync_mapping_buffers */ #include <linux/fs_context.h> #include <linux/pseudo_fs.h> #include <linux/fsnotify.h> #include <linux/unicode.h> #include <linux/fscrypt.h> #include <linux/pidfs.h> #include <linux/uaccess.h> #include "internal.h" int simple_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9); return 0; } EXPORT_SYMBOL(simple_getattr); int simple_statfs(struct dentry *dentry, struct kstatfs *buf) { u64 id = huge_encode_dev(dentry->d_sb->s_dev); buf->f_fsid = u64_to_fsid(id); buf->f_type = dentry->d_sb->s_magic; buf->f_bsize = PAGE_SIZE; buf->f_namelen = NAME_MAX; return 0; } EXPORT_SYMBOL(simple_statfs); /* * Retaining negative dentries for an in-memory filesystem just wastes * memory and lookup time: arrange for them to be deleted immediately. */ int always_delete_dentry(const struct dentry *dentry) { return 1; } EXPORT_SYMBOL(always_delete_dentry); /* * Lookup the data. This is trivial - if the dentry didn't already * exist, we know it is negative. Set d_op to delete negative dentries. */ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); if (!dentry->d_op && !(dentry->d_flags & DCACHE_DONTCACHE)) { spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_DONTCACHE; spin_unlock(&dentry->d_lock); } if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) return NULL; d_add(dentry, NULL); return NULL; } EXPORT_SYMBOL(simple_lookup); int dcache_dir_open(struct inode *inode, struct file *file) { file->private_data = d_alloc_cursor(file->f_path.dentry); return file->private_data ? 0 : -ENOMEM; } EXPORT_SYMBOL(dcache_dir_open); int dcache_dir_close(struct inode *inode, struct file *file) { dput(file->private_data); return 0; } EXPORT_SYMBOL(dcache_dir_close); /* parent is locked at least shared */ /* * Returns an element of siblings' list. * We are looking for <count>th positive after <p>; if * found, dentry is grabbed and returned to caller. * If no such element exists, NULL is returned. */ static struct dentry *scan_positives(struct dentry *cursor, struct hlist_node **p, loff_t count, struct dentry *last) { struct dentry *dentry = cursor->d_parent, *found = NULL; spin_lock(&dentry->d_lock); while (*p) { struct dentry *d = hlist_entry(*p, struct dentry, d_sib); p = &d->d_sib.next; // we must at least skip cursors, to avoid livelocks if (d->d_flags & DCACHE_DENTRY_CURSOR) continue; if (simple_positive(d) && !--count) { spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(d)) found = dget_dlock(d); spin_unlock(&d->d_lock); if (likely(found)) break; count = 1; } if (need_resched()) { if (!hlist_unhashed(&cursor->d_sib)) __hlist_del(&cursor->d_sib); hlist_add_behind(&cursor->d_sib, &d->d_sib); p = &cursor->d_sib.next; spin_unlock(&dentry->d_lock); cond_resched(); spin_lock(&dentry->d_lock); } } spin_unlock(&dentry->d_lock); dput(last); return found; } loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) { struct dentry *dentry = file->f_path.dentry; switch (whence) { case 1: offset += file->f_pos; fallthrough; case 0: if (offset >= 0) break; fallthrough; default: return -EINVAL; } if (offset != file->f_pos) { struct dentry *cursor = file->private_data; struct dentry *to = NULL; inode_lock_shared(dentry->d_inode); if (offset > 2) to = scan_positives(cursor, &dentry->d_children.first, offset - 2, NULL); spin_lock(&dentry->d_lock); hlist_del_init(&cursor->d_sib); if (to) hlist_add_behind(&cursor->d_sib, &to->d_sib); spin_unlock(&dentry->d_lock); dput(to); file->f_pos = offset; inode_unlock_shared(dentry->d_inode); } return offset; } EXPORT_SYMBOL(dcache_dir_lseek); /* * Directory is locked and all positive dentries in it are safe, since * for ramfs-type trees they can't go away without unlink() or rmdir(), * both impossible due to the lock on directory. */ int dcache_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct dentry *cursor = file->private_data; struct dentry *next = NULL; struct hlist_node **p; if (!dir_emit_dots(file, ctx)) return 0; if (ctx->pos == 2) p = &dentry->d_children.first; else p = &cursor->d_sib.next; while ((next = scan_positives(cursor, p, 1, next)) != NULL) { if (!dir_emit(ctx, next->d_name.name, next->d_name.len, d_inode(next)->i_ino, fs_umode_to_dtype(d_inode(next)->i_mode))) break; ctx->pos++; p = &next->d_sib.next; } spin_lock(&dentry->d_lock); hlist_del_init(&cursor->d_sib); if (next) hlist_add_before(&cursor->d_sib, &next->d_sib); spin_unlock(&dentry->d_lock); dput(next); return 0; } EXPORT_SYMBOL(dcache_readdir); ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) { return -EISDIR; } EXPORT_SYMBOL(generic_read_dir); const struct file_operations simple_dir_operations = { .open = dcache_dir_open, .release = dcache_dir_close, .llseek = dcache_dir_lseek, .read = generic_read_dir, .iterate_shared = dcache_readdir, .fsync = noop_fsync, }; EXPORT_SYMBOL(simple_dir_operations); const struct inode_operations simple_dir_inode_operations = { .lookup = simple_lookup, }; EXPORT_SYMBOL(simple_dir_inode_operations); /* simple_offset_add() never assigns these to a dentry */ enum { DIR_OFFSET_FIRST = 2, /* Find first real entry */ DIR_OFFSET_EOD = S32_MAX, }; /* simple_offset_add() allocation range */ enum { DIR_OFFSET_MIN = DIR_OFFSET_FIRST + 1, DIR_OFFSET_MAX = DIR_OFFSET_EOD - 1, }; static void offset_set(struct dentry *dentry, long offset) { dentry->d_fsdata = (void *)offset; } static long dentry2offset(struct dentry *dentry) { return (long)dentry->d_fsdata; } static struct lock_class_key simple_offset_lock_class; /** * simple_offset_init - initialize an offset_ctx * @octx: directory offset map to be initialized * */ void simple_offset_init(struct offset_ctx *octx) { mt_init_flags(&octx->mt, MT_FLAGS_ALLOC_RANGE); lockdep_set_class(&octx->mt.ma_lock, &simple_offset_lock_class); octx->next_offset = DIR_OFFSET_MIN; } /** * simple_offset_add - Add an entry to a directory's offset map * @octx: directory offset ctx to be updated * @dentry: new dentry being added * * Returns zero on success. @octx and the dentry's offset are updated. * Otherwise, a negative errno value is returned. */ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) { unsigned long offset; int ret; if (dentry2offset(dentry) != 0) return -EBUSY; ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN, DIR_OFFSET_MAX, &octx->next_offset, GFP_KERNEL); if (unlikely(ret < 0)) return ret == -EBUSY ? -ENOSPC : ret; offset_set(dentry, offset); return 0; } static int simple_offset_replace(struct offset_ctx *octx, struct dentry *dentry, long offset) { int ret; ret = mtree_store(&octx->mt, offset, dentry, GFP_KERNEL); if (ret) return ret; offset_set(dentry, offset); return 0; } /** * simple_offset_remove - Remove an entry to a directory's offset map * @octx: directory offset ctx to be updated * @dentry: dentry being removed * */ void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry) { long offset; offset = dentry2offset(dentry); if (offset == 0) return; mtree_erase(&octx->mt, offset); offset_set(dentry, 0); } /** * simple_offset_rename - handle directory offsets for rename * @old_dir: parent directory of source entry * @old_dentry: dentry of source entry * @new_dir: parent_directory of destination entry * @new_dentry: dentry of destination * * Caller provides appropriate serialization. * * User space expects the directory offset value of the replaced * (new) directory entry to be unchanged after a rename. * * Caller must have grabbed a slot for new_dentry in the maple_tree * associated with new_dir, even if dentry is negative. */ void simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); long new_offset = dentry2offset(new_dentry); if (WARN_ON(!new_offset)) return; simple_offset_remove(old_ctx, old_dentry); offset_set(new_dentry, 0); WARN_ON(simple_offset_replace(new_ctx, old_dentry, new_offset)); } /** * simple_offset_rename_exchange - exchange rename with directory offsets * @old_dir: parent of dentry being moved * @old_dentry: dentry being moved * @new_dir: destination parent * @new_dentry: destination dentry * * This API preserves the directory offset values. Caller provides * appropriate serialization. * * Returns zero on success. Otherwise a negative errno is returned and the * rename is rolled back. */ int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); long old_index = dentry2offset(old_dentry); long new_index = dentry2offset(new_dentry); int ret; if (WARN_ON(!old_index || !new_index)) return -EINVAL; ret = mtree_store(&new_ctx->mt, new_index, old_dentry, GFP_KERNEL); if (WARN_ON(ret)) return ret; ret = mtree_store(&old_ctx->mt, old_index, new_dentry, GFP_KERNEL); if (WARN_ON(ret)) { mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL); return ret; } offset_set(old_dentry, new_index); offset_set(new_dentry, old_index); simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); return 0; } /** * simple_offset_destroy - Release offset map * @octx: directory offset ctx that is about to be destroyed * * During fs teardown (eg. umount), a directory's offset map might still * contain entries. xa_destroy() cleans out anything that remains. */ void simple_offset_destroy(struct offset_ctx *octx) { mtree_destroy(&octx->mt); } /** * offset_dir_llseek - Advance the read position of a directory descriptor * @file: an open directory whose position is to be updated * @offset: a byte offset * @whence: enumerator describing the starting position for this update * * SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories. * * Returns the updated read position if successful; otherwise a * negative errno is returned and the read position remains unchanged. */ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) { switch (whence) { case SEEK_CUR: offset += file->f_pos; fallthrough; case SEEK_SET: if (offset >= 0) break; fallthrough; default: return -EINVAL; } return vfs_setpos(file, offset, LONG_MAX); } static struct dentry *find_positive_dentry(struct dentry *parent, struct dentry *dentry, bool next) { struct dentry *found = NULL; spin_lock(&parent->d_lock); if (next) dentry = d_next_sibling(dentry); else if (!dentry) dentry = d_first_child(parent); hlist_for_each_entry_from(dentry, d_sib) { if (!simple_positive(dentry)) continue; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(dentry)) found = dget_dlock(dentry); spin_unlock(&dentry->d_lock); if (likely(found)) break; } spin_unlock(&parent->d_lock); return found; } static noinline_for_stack struct dentry * offset_dir_lookup(struct dentry *parent, loff_t offset) { struct inode *inode = d_inode(parent); struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); struct dentry *child, *found = NULL; MA_STATE(mas, &octx->mt, offset, offset); if (offset == DIR_OFFSET_FIRST) found = find_positive_dentry(parent, NULL, false); else { rcu_read_lock(); child = mas_find_rev(&mas, DIR_OFFSET_MIN); found = find_positive_dentry(parent, child, false); rcu_read_unlock(); } return found; } static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) { struct inode *inode = d_inode(dentry); return dir_emit(ctx, dentry->d_name.name, dentry->d_name.len, inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } static void offset_iterate_dir(struct file *file, struct dir_context *ctx) { struct dentry *dir = file->f_path.dentry; struct dentry *dentry; dentry = offset_dir_lookup(dir, ctx->pos); if (!dentry) goto out_eod; while (true) { struct dentry *next; ctx->pos = dentry2offset(dentry); if (!offset_dir_emit(ctx, dentry)) break; next = find_positive_dentry(dir, dentry, true); dput(dentry); if (!next) goto out_eod; dentry = next; } dput(dentry); return; out_eod: ctx->pos = DIR_OFFSET_EOD; } /** * offset_readdir - Emit entries starting at offset @ctx->pos * @file: an open directory to iterate over * @ctx: directory iteration context * * Caller must hold @file's i_rwsem to prevent insertion or removal of * entries during this call. * * On entry, @ctx->pos contains an offset that represents the first entry * to be read from the directory. * * The operation continues until there are no more entries to read, or * until the ctx->actor indicates there is no more space in the caller's * output buffer. * * On return, @ctx->pos contains an offset that will read the next entry * in this directory when offset_readdir() is called again with @ctx. * Caller places this value in the d_off field of the last entry in the * user's buffer. * * Return values: * %0 - Complete */ static int offset_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dir = file->f_path.dentry; lockdep_assert_held(&d_inode(dir)->i_rwsem); if (!dir_emit_dots(file, ctx)) return 0; if (ctx->pos != DIR_OFFSET_EOD) offset_iterate_dir(file, ctx); return 0; } const struct file_operations simple_offset_dir_operations = { .llseek = offset_dir_llseek, .iterate_shared = offset_readdir, .read = generic_read_dir, .fsync = noop_fsync, .setlease = generic_setlease, }; struct dentry *find_next_child(struct dentry *parent, struct dentry *prev) { struct dentry *child = NULL, *d; spin_lock(&parent->d_lock); d = prev ? d_next_sibling(prev) : d_first_child(parent); hlist_for_each_entry_from(d, d_sib) { if (simple_positive(d)) { spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(d)) child = dget_dlock(d); spin_unlock(&d->d_lock); if (likely(child)) break; } } spin_unlock(&parent->d_lock); dput(prev); return child; } EXPORT_SYMBOL(find_next_child); static void __simple_recursive_removal(struct dentry *dentry, void (*callback)(struct dentry *), bool locked) { struct dentry *this = dget(dentry); while (true) { struct dentry *victim = NULL, *child; struct inode *inode = this->d_inode; inode_lock_nested(inode, I_MUTEX_CHILD); if (d_is_dir(this)) inode->i_flags |= S_DEAD; while ((child = find_next_child(this, victim)) == NULL) { // kill and ascend // update metadata while it's still locked inode_set_ctime_current(inode); clear_nlink(inode); inode_unlock(inode); victim = this; this = this->d_parent; inode = this->d_inode; if (!locked || victim != dentry) inode_lock_nested(inode, I_MUTEX_CHILD); if (simple_positive(victim)) { d_invalidate(victim); // avoid lost mounts if (callback) callback(victim); fsnotify_delete(inode, d_inode(victim), victim); d_make_discardable(victim); } if (victim == dentry) { inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (d_is_dir(dentry)) drop_nlink(inode); if (!locked) inode_unlock(inode); dput(dentry); return; } } inode_unlock(inode); this = child; } } void simple_recursive_removal(struct dentry *dentry, void (*callback)(struct dentry *)) { return __simple_recursive_removal(dentry, callback, false); } EXPORT_SYMBOL(simple_recursive_removal); void simple_remove_by_name(struct dentry *parent, const char *name, void (*callback)(struct dentry *)) { struct dentry *dentry; dentry = lookup_noperm_positive_unlocked(&QSTR(name), parent); if (!IS_ERR(dentry)) { simple_recursive_removal(dentry, callback); dput(dentry); // paired with lookup_noperm_positive_unlocked() } } EXPORT_SYMBOL(simple_remove_by_name); /* caller holds parent directory with I_MUTEX_PARENT */ void locked_recursive_removal(struct dentry *dentry, void (*callback)(struct dentry *)) { return __simple_recursive_removal(dentry, callback, true); } EXPORT_SYMBOL(locked_recursive_removal); static const struct super_operations simple_super_operations = { .statfs = simple_statfs, }; static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc) { struct pseudo_fs_context *ctx = fc->fs_private; struct inode *root; s->s_maxbytes = MAX_LFS_FILESIZE; s->s_blocksize = PAGE_SIZE; s->s_blocksize_bits = PAGE_SHIFT; s->s_magic = ctx->magic; s->s_op = ctx->ops ?: &simple_super_operations; s->s_export_op = ctx->eops; s->s_xattr = ctx->xattr; s->s_time_gran = 1; s->s_d_flags |= ctx->s_d_flags; root = new_inode(s); if (!root) return -ENOMEM; /* * since this is the first inode, make it number 1. New inodes created * after this must take care not to collide with it (by passing * max_reserved of 1 to iunique). */ root->i_ino = 1; root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; simple_inode_init_ts(root); s->s_root = d_make_root(root); if (!s->s_root) return -ENOMEM; set_default_d_op(s, ctx->dops); return 0; } static int pseudo_fs_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, pseudo_fs_fill_super); } static void pseudo_fs_free(struct fs_context *fc) { kfree(fc->fs_private); } static const struct fs_context_operations pseudo_fs_context_ops = { .free = pseudo_fs_free, .get_tree = pseudo_fs_get_tree, }; /* * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that * will never be mountable) */ struct pseudo_fs_context *init_pseudo(struct fs_context *fc, unsigned long magic) { struct pseudo_fs_context *ctx; ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL); if (likely(ctx)) { ctx->magic = magic; fc->fs_private = ctx; fc->ops = &pseudo_fs_context_ops; fc->sb_flags |= SB_NOUSER; fc->global = true; } return ctx; } EXPORT_SYMBOL(init_pseudo); int simple_open(struct inode *inode, struct file *file) { if (inode->i_private) file->private_data = inode->i_private; return 0; } EXPORT_SYMBOL(simple_open); int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); inc_nlink(inode); ihold(inode); d_make_persistent(dentry, inode); return 0; } EXPORT_SYMBOL(simple_link); int simple_empty(struct dentry *dentry) { struct dentry *child; int ret = 0; spin_lock(&dentry->d_lock); hlist_for_each_entry(child, &dentry->d_children, d_sib) { spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(child)) { spin_unlock(&child->d_lock); goto out; } spin_unlock(&child->d_lock); } ret = 1; out: spin_unlock(&dentry->d_lock); return ret; } EXPORT_SYMBOL(simple_empty); void __simple_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); drop_nlink(inode); } EXPORT_SYMBOL(__simple_unlink); void __simple_rmdir(struct inode *dir, struct dentry *dentry) { drop_nlink(d_inode(dentry)); __simple_unlink(dir, dentry); drop_nlink(dir); } EXPORT_SYMBOL(__simple_rmdir); int simple_unlink(struct inode *dir, struct dentry *dentry) { __simple_unlink(dir, dentry); d_make_discardable(dentry); return 0; } EXPORT_SYMBOL(simple_unlink); int simple_rmdir(struct inode *dir, struct dentry *dentry) { if (!simple_empty(dentry)) return -ENOTEMPTY; __simple_rmdir(dir, dentry); d_make_discardable(dentry); return 0; } EXPORT_SYMBOL(simple_rmdir); /** * simple_rename_timestamp - update the various inode timestamps for rename * @old_dir: old parent directory * @old_dentry: dentry that is being renamed * @new_dir: new parent directory * @new_dentry: target for rename * * POSIX mandates that the old and new parent directories have their ctime and * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), have * their ctime updated. */ void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct inode *newino = d_inode(new_dentry); inode_set_mtime_to_ts(old_dir, inode_set_ctime_current(old_dir)); if (new_dir != old_dir) inode_set_mtime_to_ts(new_dir, inode_set_ctime_current(new_dir)); inode_set_ctime_current(d_inode(old_dentry)); if (newino) inode_set_ctime_current(newino); } EXPORT_SYMBOL_GPL(simple_rename_timestamp); int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { bool old_is_dir = d_is_dir(old_dentry); bool new_is_dir = d_is_dir(new_dentry); if (old_dir != new_dir && old_is_dir != new_is_dir) { if (old_is_dir) { drop_nlink(old_dir); inc_nlink(new_dir); } else { drop_nlink(new_dir); inc_nlink(old_dir); } } simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); return 0; } EXPORT_SYMBOL_GPL(simple_rename_exchange); int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { int they_are_dirs = d_is_dir(old_dentry); if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) return -EINVAL; if (flags & RENAME_EXCHANGE) return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); if (!simple_empty(new_dentry)) return -ENOTEMPTY; if (d_really_is_positive(new_dentry)) { simple_unlink(new_dir, new_dentry); if (they_are_dirs) { drop_nlink(d_inode(new_dentry)); drop_nlink(old_dir); } } else if (they_are_dirs) { drop_nlink(old_dir); inc_nlink(new_dir); } simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); return 0; } EXPORT_SYMBOL(simple_rename); /** * simple_setattr - setattr for simple filesystem * @idmap: idmap of the target mount * @dentry: dentry * @iattr: iattr structure * * Returns 0 on success, -error on failure. * * simple_setattr is a simple ->setattr implementation without a proper * implementation of size changes. * * It can either be used for in-memory filesystems or special files * on simple regular filesystems. Anything that needs to change on-disk * or wire state on size changes needs its own setattr method. */ int simple_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); int error; error = setattr_prepare(idmap, dentry, iattr); if (error) return error; if (iattr->ia_valid & ATTR_SIZE) truncate_setsize(inode, iattr->ia_size); setattr_copy(idmap, inode, iattr); mark_inode_dirty(inode); return 0; } EXPORT_SYMBOL(simple_setattr); static int simple_read_folio(struct file *file, struct folio *folio) { folio_zero_range(folio, 0, folio_size(folio)); flush_dcache_folio(folio); folio_mark_uptodate(folio); folio_unlock(folio); return 0; } int simple_write_begin(const struct kiocb *iocb, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { struct folio *folio; folio = __filemap_get_folio(mapping, pos / PAGE_SIZE, FGP_WRITEBEGIN, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) return PTR_ERR(folio); *foliop = folio; if (!folio_test_uptodate(folio) && (len != folio_size(folio))) { size_t from = offset_in_folio(folio, pos); folio_zero_segments(folio, 0, from, from + len, folio_size(folio)); } return 0; } EXPORT_SYMBOL(simple_write_begin); /** * simple_write_end - .write_end helper for non-block-device FSes * @iocb: kernel I/O control block * @mapping: " * @pos: " * @len: " * @copied: " * @folio: " * @fsdata: " * * simple_write_end does the minimum needed for updating a folio after * writing is done. It has the same API signature as the .write_end of * address_space_operations vector. So it can just be set onto .write_end for * FSes that don't need any other processing. i_rwsem is assumed to be held * exclusively. * Block based filesystems should use generic_write_end(). * NOTE: Even though i_size might get updated by this function, mark_inode_dirty * is not called, so a filesystem that actually does store data in .write_inode * should extend on what's done here with a call to mark_inode_dirty() in the * case that i_size has changed. * * Use *ONLY* with simple_read_folio() */ static int simple_write_end(const struct kiocb *iocb, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct folio *folio, void *fsdata) { struct inode *inode = folio->mapping->host; loff_t last_pos = pos + copied; /* zero the stale part of the folio if we did a short copy */ if (!folio_test_uptodate(folio)) { if (copied < len) { size_t from = offset_in_folio(folio, pos); folio_zero_range(folio, from + copied, len - copied); } folio_mark_uptodate(folio); } /* * No need to use i_size_read() here, the i_size * cannot change under us because we hold the i_rwsem. */ if (last_pos > inode->i_size) i_size_write(inode, last_pos); folio_mark_dirty(folio); folio_unlock(folio); folio_put(folio); return copied; } /* * Provides ramfs-style behavior: data in the pagecache, but no writeback. */ const struct address_space_operations ram_aops = { .read_folio = simple_read_folio, .write_begin = simple_write_begin, .write_end = simple_write_end, .dirty_folio = noop_dirty_folio, }; EXPORT_SYMBOL(ram_aops); /* * the inodes created here are not hashed. If you use iunique to generate * unique inode values later for this filesystem, then you must take care * to pass it an appropriate max_reserved value to avoid collisions. */ int simple_fill_super(struct super_block *s, unsigned long magic, const struct tree_descr *files) { struct inode *inode; struct dentry *dentry; int i; s->s_blocksize = PAGE_SIZE; s->s_blocksize_bits = PAGE_SHIFT; s->s_magic = magic; s->s_op = &simple_super_operations; s->s_time_gran = 1; inode = new_inode(s); if (!inode) return -ENOMEM; /* * because the root inode is 1, the files array must not contain an * entry at index 1 */ inode->i_ino = 1; inode->i_mode = S_IFDIR | 0755; simple_inode_init_ts(inode); inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; set_nlink(inode, 2); s->s_root = d_make_root(inode); if (!s->s_root) return -ENOMEM; for (i = 0; !files->name || files->name[0]; i++, files++) { if (!files->name) continue; /* warn if it tries to conflict with the root inode */ if (unlikely(i == 1)) printk(KERN_WARNING "%s: %s passed in a files array" "with an index of 1!\n", __func__, s->s_type->name); dentry = d_alloc_name(s->s_root, files->name); if (!dentry) return -ENOMEM; inode = new_inode(s); if (!inode) { dput(dentry); return -ENOMEM; } inode->i_mode = S_IFREG | files->mode; simple_inode_init_ts(inode); inode->i_fop = files->ops; inode->i_ino = i; d_make_persistent(dentry, inode); dput(dentry); } return 0; } EXPORT_SYMBOL(simple_fill_super); static DEFINE_SPINLOCK(pin_fs_lock); int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count) { struct vfsmount *mnt = NULL; spin_lock(&pin_fs_lock); if (unlikely(!*mount)) { spin_unlock(&pin_fs_lock); mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); if (IS_ERR(mnt)) return PTR_ERR(mnt); spin_lock(&pin_fs_lock); if (!*mount) *mount = mnt; } mntget(*mount); ++*count; spin_unlock(&pin_fs_lock); mntput(mnt); return 0; } EXPORT_SYMBOL(simple_pin_fs); void simple_release_fs(struct vfsmount **mount, int *count) { struct vfsmount *mnt; spin_lock(&pin_fs_lock); mnt = *mount; if (!--*count) *mount = NULL; spin_unlock(&pin_fs_lock); mntput(mnt); } EXPORT_SYMBOL(simple_release_fs); /** * simple_read_from_buffer - copy data from the buffer to user space * @to: the user space buffer to read to * @count: the maximum number of bytes to read * @ppos: the current position in the buffer * @from: the buffer to read from * @available: the size of the buffer * * The simple_read_from_buffer() function reads up to @count bytes from the * buffer @from at offset @ppos into the user space address starting at @to. * * On success, the number of bytes read is returned and the offset @ppos is * advanced by this number, or negative value is returned on error. **/ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available) { loff_t pos = *ppos; size_t ret; if (pos < 0) return -EINVAL; if (pos >= available || !count) return 0; if (count > available - pos) count = available - pos; ret = copy_to_user(to, from + pos, count); if (ret == count) return -EFAULT; count -= ret; *ppos = pos + count; return count; } EXPORT_SYMBOL(simple_read_from_buffer); /** * simple_write_to_buffer - copy data from user space to the buffer * @to: the buffer to write to * @available: the size of the buffer * @ppos: the current position in the buffer * @from: the user space buffer to read from * @count: the maximum number of bytes to read * * The simple_write_to_buffer() function reads up to @count bytes from the user * space address starting at @from into the buffer @to at offset @ppos. * * On success, the number of bytes written is returned and the offset @ppos is * advanced by this number, or negative value is returned on error. **/ ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count) { loff_t pos = *ppos; size_t res; if (pos < 0) return -EINVAL; if (pos >= available || !count) return 0; if (count > available - pos) count = available - pos; res = copy_from_user(to + pos, from, count); if (res == count) return -EFAULT; count -= res; *ppos = pos + count; return count; } EXPORT_SYMBOL(simple_write_to_buffer); /** * memory_read_from_buffer - copy data from the buffer * @to: the kernel space buffer to read to * @count: the maximum number of bytes to read * @ppos: the current position in the buffer * @from: the buffer to read from * @available: the size of the buffer * * The memory_read_from_buffer() function reads up to @count bytes from the * buffer @from at offset @ppos into the kernel space address starting at @to. * * On success, the number of bytes read is returned and the offset @ppos is * advanced by this number, or negative value is returned on error. **/ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, const void *from, size_t available) { loff_t pos = *ppos; if (pos < 0) return -EINVAL; if (pos >= available) return 0; if (count > available - pos) count = available - pos; memcpy(to, from + pos, count); *ppos = pos + count; return count; } EXPORT_SYMBOL(memory_read_from_buffer); /* * Transaction based IO. * The file expects a single write which triggers the transaction, and then * possibly a read which collects the result - which is stored in a * file-local buffer. */ void simple_transaction_set(struct file *file, size_t n) { struct simple_transaction_argresp *ar = file->private_data; BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); /* * The barrier ensures that ar->size will really remain zero until * ar->data is ready for reading. */ smp_mb(); ar->size = n; } EXPORT_SYMBOL(simple_transaction_set); char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) { struct simple_transaction_argresp *ar; static DEFINE_SPINLOCK(simple_transaction_lock); if (size > SIMPLE_TRANSACTION_LIMIT - 1) return ERR_PTR(-EFBIG); ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL); if (!ar) return ERR_PTR(-ENOMEM); spin_lock(&simple_transaction_lock); /* only one write allowed per open */ if (file->private_data) { spin_unlock(&simple_transaction_lock); free_page((unsigned long)ar); return ERR_PTR(-EBUSY); } file->private_data = ar; spin_unlock(&simple_transaction_lock); if (copy_from_user(ar->data, buf, size)) return ERR_PTR(-EFAULT); return ar->data; } EXPORT_SYMBOL(simple_transaction_get); ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) { struct simple_transaction_argresp *ar = file->private_data; if (!ar) return 0; return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); } EXPORT_SYMBOL(simple_transaction_read); int simple_transaction_release(struct inode *inode, struct file *file) { free_page((unsigned long)file->private_data); return 0; } EXPORT_SYMBOL(simple_transaction_release); /* Simple attribute files */ struct simple_attr { int (*get)(void *, u64 *); int (*set)(void *, u64); char get_buf[24]; /* enough to store a u64 and "\n\0" */ char set_buf[24]; void *data; const char *fmt; /* format for read operation */ struct mutex mutex; /* protects access to these buffers */ }; /* simple_attr_open is called by an actual attribute open file operation * to set the attribute specific access operations. */ int simple_attr_open(struct inode *inode, struct file *file, int (*get)(void *, u64 *), int (*set)(void *, u64), const char *fmt) { struct simple_attr *attr; attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) return -ENOMEM; attr->get = get; attr->set = set; attr->data = inode->i_private; attr->fmt = fmt; mutex_init(&attr->mutex); file->private_data = attr; return nonseekable_open(inode, file); } EXPORT_SYMBOL_GPL(simple_attr_open); int simple_attr_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; } EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */ /* read from the buffer that is filled with the get function */ ssize_t simple_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { struct simple_attr *attr; size_t size; ssize_t ret; attr = file->private_data; if (!attr->get) return -EACCES; ret = mutex_lock_interruptible(&attr->mutex); if (ret) return ret; if (*ppos && attr->get_buf[0]) { /* continued read */ size = strlen(attr->get_buf); } else { /* first read */ u64 val; ret = attr->get(attr->data, &val); if (ret) goto out; size = scnprintf(attr->get_buf, sizeof(attr->get_buf), attr->fmt, (unsigned long long)val); } ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); out: mutex_unlock(&attr->mutex); return ret; } EXPORT_SYMBOL_GPL(simple_attr_read); /* interpret the buffer as a number to call the set function with */ static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf, size_t len, loff_t *ppos, bool is_signed) { struct simple_attr *attr; unsigned long long val; size_t size; ssize_t ret; attr = file->private_data; if (!attr->set) return -EACCES; ret = mutex_lock_interruptible(&attr->mutex); if (ret) return ret; ret = -EFAULT; size = min(sizeof(attr->set_buf) - 1, len); if (copy_from_user(attr->set_buf, buf, size)) goto out; attr->set_buf[size] = '\0'; if (is_signed) ret = kstrtoll(attr->set_buf, 0, &val); else ret = kstrtoull(attr->set_buf, 0, &val); if (ret) goto out; ret = attr->set(attr->data, val); if (ret == 0) ret = len; /* on success, claim we got the whole input */ out: mutex_unlock(&attr->mutex); return ret; } ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { return simple_attr_write_xsigned(file, buf, len, ppos, false); } EXPORT_SYMBOL_GPL(simple_attr_write); ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { return simple_attr_write_xsigned(file, buf, len, ppos, true); } EXPORT_SYMBOL_GPL(simple_attr_write_signed); /** * generic_encode_ino32_fh - generic export_operations->encode_fh function * @inode: the object to encode * @fh: where to store the file handle fragment * @max_len: maximum length to store there (in 4 byte units) * @parent: parent directory inode, if wanted * * This generic encode_fh function assumes that the 32 inode number * is suitable for locating an inode, and that the generation number * can be used to check that it is still valid. It places them in the * filehandle fragment where export_decode_fh expects to find them. */ int generic_encode_ino32_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { struct fid *fid = (void *)fh; int len = *max_len; int type = FILEID_INO32_GEN; if (parent && (len < 4)) { *max_len = 4; return FILEID_INVALID; } else if (len < 2) { *max_len = 2; return FILEID_INVALID; } len = 2; fid->i32.ino = inode->i_ino; fid->i32.gen = inode->i_generation; if (parent) { fid->i32.parent_ino = parent->i_ino; fid->i32.parent_gen = parent->i_generation; len = 4; type = FILEID_INO32_GEN_PARENT; } *max_len = len; return type; } EXPORT_SYMBOL_GPL(generic_encode_ino32_fh); /** * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation * @sb: filesystem to do the file handle conversion on * @fid: file handle to convert * @fh_len: length of the file handle in bytes * @fh_type: type of file handle * @get_inode: filesystem callback to retrieve inode * * This function decodes @fid as long as it has one of the well-known * Linux filehandle types and calls @get_inode on it to retrieve the * inode for the object specified in the file handle. */ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)) { struct inode *inode = NULL; if (fh_len < 2) return NULL; switch (fh_type) { case FILEID_INO32_GEN: case FILEID_INO32_GEN_PARENT: inode = get_inode(sb, fid->i32.ino, fid->i32.gen); break; } return d_obtain_alias(inode); } EXPORT_SYMBOL_GPL(generic_fh_to_dentry); /** * generic_fh_to_parent - generic helper for the fh_to_parent export operation * @sb: filesystem to do the file handle conversion on * @fid: file handle to convert * @fh_len: length of the file handle in bytes * @fh_type: type of file handle * @get_inode: filesystem callback to retrieve inode * * This function decodes @fid as long as it has one of the well-known * Linux filehandle types and calls @get_inode on it to retrieve the * inode for the _parent_ object specified in the file handle if it * is specified in the file handle, or NULL otherwise. */ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen)) { struct inode *inode = NULL; if (fh_len <= 2) return NULL; switch (fh_type) { case FILEID_INO32_GEN_PARENT: inode = get_inode(sb, fid->i32.parent_ino, (fh_len > 3 ? fid->i32.parent_gen : 0)); break; } return d_obtain_alias(inode); } EXPORT_SYMBOL_GPL(generic_fh_to_parent); /** * __generic_file_fsync - generic fsync implementation for simple filesystems * * @file: file to synchronize * @start: start offset in bytes * @end: end offset in bytes (inclusive) * @datasync: only synchronize essential metadata if true * * This is a generic implementation of the fsync method for simple * filesystems which track all non-inode metadata in the buffers list * hanging off the address_space structure. */ int __generic_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; int err; int ret; err = file_write_and_wait_range(file, start, end); if (err) return err; inode_lock(inode); ret = sync_mapping_buffers(inode->i_mapping); if (!(inode_state_read_once(inode) & I_DIRTY_ALL)) goto out; if (datasync && !(inode_state_read_once(inode) & I_DIRTY_DATASYNC)) goto out; err = sync_inode_metadata(inode, 1); if (ret == 0) ret = err; out: inode_unlock(inode); /* check and advance again to catch errors after syncing out buffers */ err = file_check_and_advance_wb_err(file); if (ret == 0) ret = err; return ret; } EXPORT_SYMBOL(__generic_file_fsync); /** * generic_file_fsync - generic fsync implementation for simple filesystems * with flush * @file: file to synchronize * @start: start offset in bytes * @end: end offset in bytes (inclusive) * @datasync: only synchronize essential metadata if true * */ int generic_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; int err; err = __generic_file_fsync(file, start, end, datasync); if (err) return err; return blkdev_issue_flush(inode->i_sb->s_bdev); } EXPORT_SYMBOL(generic_file_fsync); /** * generic_check_addressable - Check addressability of file system * @blocksize_bits: log of file system block size * @num_blocks: number of blocks in file system * * Determine whether a file system with @num_blocks blocks (and a * block size of 2**@blocksize_bits) is addressable by the sector_t * and page cache of the system. Return 0 if so and -EFBIG otherwise. */ int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) { u64 last_fs_block = num_blocks - 1; u64 last_fs_page, max_bytes; if (check_shl_overflow(num_blocks, blocksize_bits, &max_bytes)) return -EFBIG; last_fs_page = (max_bytes >> PAGE_SHIFT) - 1; if (unlikely(num_blocks == 0)) return 0; if (blocksize_bits < 9) return -EINVAL; if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) || (last_fs_page > (pgoff_t)(~0ULL))) { return -EFBIG; } return 0; } EXPORT_SYMBOL(generic_check_addressable); /* * No-op implementation of ->fsync for in-memory filesystems. */ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) { return 0; } EXPORT_SYMBOL(noop_fsync); ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { /* * iomap based filesystems support direct I/O without need for * this callback. However, it still needs to be set in * inode->a_ops so that open/fcntl know that direct I/O is * generally supported. */ return -EINVAL; } EXPORT_SYMBOL_GPL(noop_direct_IO); /* Because kfree isn't assignment-compatible with void(void*) ;-/ */ void kfree_link(void *p) { kfree(p); } EXPORT_SYMBOL(kfree_link); struct inode *alloc_anon_inode(struct super_block *s) { static const struct address_space_operations anon_aops = { .dirty_folio = noop_dirty_folio, }; struct inode *inode = new_inode_pseudo(s); if (!inode) return ERR_PTR(-ENOMEM); inode->i_ino = get_next_ino(); inode->i_mapping->a_ops = &anon_aops; /* * Mark the inode dirty from the very beginning, * that way it will never be moved to the dirty * list because mark_inode_dirty() will think * that it already _is_ on the dirty list. */ inode_state_assign_raw(inode, I_DIRTY); /* * Historically anonymous inodes don't have a type at all and * userspace has come to rely on this. */ inode->i_mode = S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE | S_ANON_INODE; simple_inode_init_ts(inode); return inode; } EXPORT_SYMBOL(alloc_anon_inode); /** * simple_get_link - generic helper to get the target of "fast" symlinks * @dentry: not used here * @inode: the symlink inode * @done: not used here * * Generic helper for filesystems to use for symlink inodes where a pointer to * the symlink target is stored in ->i_link. NOTE: this isn't normally called, * since as an optimization the path lookup code uses any non-NULL ->i_link * directly, without calling ->get_link(). But ->get_link() still must be set, * to mark the inode_operations as being for a symlink. * * Return: the symlink target */ const char *simple_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { return inode->i_link; } EXPORT_SYMBOL(simple_get_link); const struct inode_operations simple_symlink_inode_operations = { .get_link = simple_get_link, }; EXPORT_SYMBOL(simple_symlink_inode_operations); /* * Operations for a permanently empty directory. */ static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return ERR_PTR(-ENOENT); } static int empty_dir_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { return -EPERM; } static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size) { return -EOPNOTSUPP; } static const struct inode_operations empty_dir_inode_operations = { .lookup = empty_dir_lookup, .setattr = empty_dir_setattr, .listxattr = empty_dir_listxattr, }; static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence) { /* An empty directory has two entries . and .. at offsets 0 and 1 */ return generic_file_llseek_size(file, offset, whence, 2, 2); } static int empty_dir_readdir(struct file *file, struct dir_context *ctx) { dir_emit_dots(file, ctx); return 0; } static const struct file_operations empty_dir_operations = { .llseek = empty_dir_llseek, .read = generic_read_dir, .iterate_shared = empty_dir_readdir, .fsync = noop_fsync, }; void make_empty_dir_inode(struct inode *inode) { set_nlink(inode, 2); inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_rdev = 0; inode->i_size = 0; inode->i_blkbits = PAGE_SHIFT; inode->i_blocks = 0; inode->i_op = &empty_dir_inode_operations; inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &empty_dir_operations; } bool is_empty_dir_inode(struct inode *inode) { return (inode->i_fop == &empty_dir_operations) && (inode->i_op == &empty_dir_inode_operations); } #if IS_ENABLED(CONFIG_UNICODE) /** * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems * @dentry: dentry whose name we are checking against * @len: len of name of dentry * @str: str pointer to name of dentry * @name: Name to compare against * * Return: 0 if names match, 1 if mismatch, or -ERRNO */ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { const struct dentry *parent; const struct inode *dir; union shortname_store strbuf; struct qstr qstr; /* * Attempt a case-sensitive match first. It is cheaper and * should cover most lookups, including all the sane * applications that expect a case-sensitive filesystem. * * This comparison is safe under RCU because the caller * guarantees the consistency between str and len. See * __d_lookup_rcu_op_compare() for details. */ if (len == name->len && !memcmp(str, name->name, len)) return 0; parent = READ_ONCE(dentry->d_parent); dir = READ_ONCE(parent->d_inode); if (!dir || !IS_CASEFOLDED(dir)) return 1; qstr.len = len; qstr.name = str; /* * If the dentry name is stored in-line, then it may be concurrently * modified by a rename. If this happens, the VFS will eventually retry * the lookup, so it doesn't matter what ->d_compare() returns. * However, it's unsafe to call utf8_strncasecmp() with an unstable * string. Therefore, we have to copy the name into a temporary buffer. * As above, len is guaranteed to match str, so the shortname case * is exactly when str points to ->d_shortname. */ if (qstr.name == dentry->d_shortname.string) { strbuf = dentry->d_shortname; // NUL is guaranteed to be in there qstr.name = strbuf.string; /* prevent compiler from optimizing out the temporary buffer */ barrier(); } return utf8_strncasecmp(dentry->d_sb->s_encoding, name, &qstr); } EXPORT_SYMBOL(generic_ci_d_compare); /** * generic_ci_d_hash - generic d_hash implementation for casefolding filesystems * @dentry: dentry of the parent directory * @str: qstr of name whose hash we should fill in * * Return: 0 if hash was successful or unchanged, and -EINVAL on error */ int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) { const struct inode *dir = READ_ONCE(dentry->d_inode); struct super_block *sb = dentry->d_sb; const struct unicode_map *um = sb->s_encoding; int ret; if (!dir || !IS_CASEFOLDED(dir)) return 0; ret = utf8_casefold_hash(um, dentry, str); if (ret < 0 && sb_has_strict_encoding(sb)) return -EINVAL; return 0; } EXPORT_SYMBOL(generic_ci_d_hash); static const struct dentry_operations generic_ci_dentry_ops = { .d_hash = generic_ci_d_hash, .d_compare = generic_ci_d_compare, #ifdef CONFIG_FS_ENCRYPTION .d_revalidate = fscrypt_d_revalidate, #endif }; /** * generic_ci_match() - Match a name (case-insensitively) with a dirent. * This is a filesystem helper for comparison with directory entries. * generic_ci_d_compare should be used in VFS' ->d_compare instead. * * @parent: Inode of the parent of the dirent under comparison * @name: name under lookup. * @folded_name: Optional pre-folded name under lookup * @de_name: Dirent name. * @de_name_len: dirent name length. * * Test whether a case-insensitive directory entry matches the filename * being searched. If @folded_name is provided, it is used instead of * recalculating the casefold of @name. * * Return: > 0 if the directory entry matches, 0 if it doesn't match, or * < 0 on error. */ int generic_ci_match(const struct inode *parent, const struct qstr *name, const struct qstr *folded_name, const u8 *de_name, u32 de_name_len) { const struct super_block *sb = parent->i_sb; const struct unicode_map *um = sb->s_encoding; struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); struct qstr dirent = QSTR_INIT(de_name, de_name_len); int res = 0; if (IS_ENCRYPTED(parent)) { const struct fscrypt_str encrypted_name = FSTR_INIT((u8 *) de_name, de_name_len); if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent))) return -EINVAL; decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); if (!decrypted_name.name) return -ENOMEM; res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name, &decrypted_name); if (res < 0) { kfree(decrypted_name.name); return res; } dirent.name = decrypted_name.name; dirent.len = decrypted_name.len; } /* * Attempt a case-sensitive match first. It is cheaper and * should cover most lookups, including all the sane * applications that expect a case-sensitive filesystem. */ if (dirent.len == name->len && !memcmp(name->name, dirent.name, dirent.len)) goto out; if (folded_name->name) res = utf8_strncasecmp_folded(um, folded_name, &dirent); else res = utf8_strncasecmp(um, name, &dirent); out: kfree(decrypted_name.name); if (res < 0 && sb_has_strict_encoding(sb)) { pr_err_ratelimited("Directory contains filename that is invalid UTF-8"); return 0; } return !res; } EXPORT_SYMBOL(generic_ci_match); #endif #ifdef CONFIG_FS_ENCRYPTION static const struct dentry_operations generic_encrypted_dentry_ops = { .d_revalidate = fscrypt_d_revalidate, }; #endif /** * generic_set_sb_d_ops - helper for choosing the set of * filesystem-wide dentry operations for the enabled features * @sb: superblock to be configured * * Filesystems supporting casefolding and/or fscrypt can call this * helper at mount-time to configure default dentry_operations to the * best set of dentry operations required for the enabled features. * The helper must be called after these have been configured, but * before the root dentry is created. */ void generic_set_sb_d_ops(struct super_block *sb) { #if IS_ENABLED(CONFIG_UNICODE) if (sb->s_encoding) { set_default_d_op(sb, &generic_ci_dentry_ops); return; } #endif #ifdef CONFIG_FS_ENCRYPTION if (sb->s_cop) { set_default_d_op(sb, &generic_encrypted_dentry_ops); return; } #endif } EXPORT_SYMBOL(generic_set_sb_d_ops); /** * inode_maybe_inc_iversion - increments i_version * @inode: inode with the i_version that should be updated * @force: increment the counter even if it's not necessary? * * Every time the inode is modified, the i_version field must be seen to have * changed by any observer. * * If "force" is set or the QUERIED flag is set, then ensure that we increment * the value, and clear the queried flag. * * In the common case where neither is set, then we can return "false" without * updating i_version. * * If this function returns false, and no other metadata has changed, then we * can avoid logging the metadata. */ bool inode_maybe_inc_iversion(struct inode *inode, bool force) { u64 cur, new; /* * The i_version field is not strictly ordered with any other inode * information, but the legacy inode_inc_iversion code used a spinlock * to serialize increments. * * We add a full memory barrier to ensure that any de facto ordering * with other state is preserved (either implicitly coming from cmpxchg * or explicitly from smp_mb if we don't know upfront if we will execute * the former). * * These barriers pair with inode_query_iversion(). */ cur = inode_peek_iversion_raw(inode); if (!force && !(cur & I_VERSION_QUERIED)) { smp_mb(); cur = inode_peek_iversion_raw(inode); } do { /* If flag is clear then we needn't do anything */ if (!force && !(cur & I_VERSION_QUERIED)) return false; /* Since lowest bit is flag, add 2 to avoid it */ new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); return true; } EXPORT_SYMBOL(inode_maybe_inc_iversion); /** * inode_query_iversion - read i_version for later use * @inode: inode from which i_version should be read * * Read the inode i_version counter. This should be used by callers that wish * to store the returned i_version for later comparison. This will guarantee * that a later query of the i_version will result in a different value if * anything has changed. * * In this implementation, we fetch the current value, set the QUERIED flag and * then try to swap it into place with a cmpxchg, if it wasn't already set. If * that fails, we try again with the newly fetched value from the cmpxchg. */ u64 inode_query_iversion(struct inode *inode) { u64 cur, new; bool fenced = false; /* * Memory barriers (implicit in cmpxchg, explicit in smp_mb) pair with * inode_maybe_inc_iversion(), see that routine for more details. */ cur = inode_peek_iversion_raw(inode); do { /* If flag is already set, then no need to swap */ if (cur & I_VERSION_QUERIED) { if (!fenced) smp_mb(); break; } fenced = true; new = cur | I_VERSION_QUERIED; } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); return cur >> I_VERSION_QUERIED_SHIFT; } EXPORT_SYMBOL(inode_query_iversion); ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter, ssize_t direct_written, ssize_t buffered_written) { struct address_space *mapping = iocb->ki_filp->f_mapping; loff_t pos = iocb->ki_pos - buffered_written; loff_t end = iocb->ki_pos - 1; int err; /* * If the buffered write fallback returned an error, we want to return * the number of bytes which were written by direct I/O, or the error * code if that was zero. * * Note that this differs from normal direct-io semantics, which will * return -EFOO even if some bytes were written. */ if (unlikely(buffered_written < 0)) { if (direct_written) return direct_written; return buffered_written; } /* * We need to ensure that the page cache pages are written to disk and * invalidated to preserve the expected O_DIRECT semantics. */ err = filemap_write_and_wait_range(mapping, pos, end); if (err < 0) { /* * We don't know how much we wrote, so just return the number of * bytes which were direct-written */ iocb->ki_pos -= buffered_written; if (direct_written) return direct_written; return err; } invalidate_mapping_pages(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT); return direct_written + buffered_written; } EXPORT_SYMBOL_GPL(direct_write_fallback); /** * simple_inode_init_ts - initialize the timestamps for a new inode * @inode: inode to be initialized * * When a new inode is created, most filesystems set the timestamps to the * current time. Add a helper to do this. */ struct timespec64 simple_inode_init_ts(struct inode *inode) { struct timespec64 ts = inode_set_ctime_current(inode); inode_set_atime_to_ts(inode, ts); inode_set_mtime_to_ts(inode, ts); return ts; } EXPORT_SYMBOL(simple_inode_init_ts); struct dentry *stashed_dentry_get(struct dentry **stashed) { struct dentry *dentry; guard(rcu)(); dentry = rcu_dereference(*stashed); if (!dentry) return NULL; if (IS_ERR(dentry)) return dentry; if (!lockref_get_not_dead(&dentry->d_lockref)) return NULL; return dentry; } static struct dentry *prepare_anon_dentry(struct dentry **stashed, struct super_block *sb, void *data) { struct dentry *dentry; struct inode *inode; const struct stashed_operations *sops = sb->s_fs_info; int ret; inode = new_inode_pseudo(sb); if (!inode) { sops->put_data(data); return ERR_PTR(-ENOMEM); } inode->i_flags |= S_IMMUTABLE; inode->i_mode = S_IFREG; simple_inode_init_ts(inode); ret = sops->init_inode(inode, data); if (ret < 0) { iput(inode); return ERR_PTR(ret); } /* Notice when this is changed. */ WARN_ON_ONCE(!S_ISREG(inode->i_mode)); dentry = d_alloc_anon(sb); if (!dentry) { iput(inode); return ERR_PTR(-ENOMEM); } /* Store address of location where dentry's supposed to be stashed. */ dentry->d_fsdata = stashed; /* @data is now owned by the fs */ d_instantiate(dentry, inode); return dentry; } struct dentry *stash_dentry(struct dentry **stashed, struct dentry *dentry) { guard(rcu)(); for (;;) { struct dentry *old; /* Assume any old dentry was cleared out. */ old = cmpxchg(stashed, NULL, dentry); if (likely(!old)) return dentry; /* Check if somebody else installed a reusable dentry. */ if (lockref_get_not_dead(&old->d_lockref)) return old; /* There's an old dead dentry there, try to take it over. */ if (likely(try_cmpxchg(stashed, &old, dentry))) return dentry; } } /** * path_from_stashed - create path from stashed or new dentry * @stashed: where to retrieve or stash dentry * @mnt: mnt of the filesystems to use * @data: data to store in inode->i_private * @path: path to create * * The function tries to retrieve a stashed dentry from @stashed. If the dentry * is still valid then it will be reused. If the dentry isn't able the function * will allocate a new dentry and inode. It will then check again whether it * can reuse an existing dentry in case one has been added in the meantime or * update @stashed with the newly added dentry. * * Special-purpose helper for nsfs and pidfs. * * Return: On success zero and on failure a negative error is returned. */ int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data, struct path *path) { struct dentry *dentry, *res; const struct stashed_operations *sops = mnt->mnt_sb->s_fs_info; /* See if dentry can be reused. */ res = stashed_dentry_get(stashed); if (IS_ERR(res)) return PTR_ERR(res); if (res) { sops->put_data(data); goto make_path; } /* Allocate a new dentry. */ dentry = prepare_anon_dentry(stashed, mnt->mnt_sb, data); if (IS_ERR(dentry)) return PTR_ERR(dentry); /* Added a new dentry. @data is now owned by the filesystem. */ if (sops->stash_dentry) res = sops->stash_dentry(stashed, dentry); else res = stash_dentry(stashed, dentry); if (IS_ERR(res)) { dput(dentry); return PTR_ERR(res); } if (res != dentry) dput(dentry); make_path: path->dentry = res; path->mnt = mntget(mnt); VFS_WARN_ON_ONCE(path->dentry->d_fsdata != stashed); VFS_WARN_ON_ONCE(d_inode(path->dentry)->i_private != data); return 0; } void stashed_dentry_prune(struct dentry *dentry) { struct dentry **stashed = dentry->d_fsdata; struct inode *inode = d_inode(dentry); if (WARN_ON_ONCE(!stashed)) return; if (!inode) return; /* * Only replace our own @dentry as someone else might've * already cleared out @dentry and stashed their own * dentry in there. */ cmpxchg(stashed, dentry, NULL); } /** * simple_start_creating - prepare to create a given name * @parent: directory in which to prepare to create the name * @name: the name to be created * * Required lock is taken and a lookup in performed prior to creating an * object in a directory. No permission checking is performed. * * Returns: a negative dentry on which vfs_create() or similar may * be attempted, or an error. */ struct dentry *simple_start_creating(struct dentry *parent, const char *name) { struct qstr qname = QSTR(name); int err; err = lookup_noperm_common(&qname, parent); if (err) return ERR_PTR(err); return start_dirop(parent, &qname, LOOKUP_CREATE | LOOKUP_EXCL); } EXPORT_SYMBOL(simple_start_creating); /* parent must have been held exclusive since simple_start_creating() */ void simple_done_creating(struct dentry *child) { inode_unlock(child->d_parent->d_inode); dput(child); } EXPORT_SYMBOL(simple_done_creating);
3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 // SPDX-License-Identifier: GPL-2.0-only /* * VMware VMCI Driver * * Copyright (C) 2012 VMware, Inc. All rights reserved. */ #include <linux/vmw_vmci_defs.h> #include <linux/vmw_vmci_api.h> #include <linux/atomic.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include "vmci_driver.h" #include "vmci_event.h" static bool vmci_disable_host; module_param_named(disable_host, vmci_disable_host, bool, 0); MODULE_PARM_DESC(disable_host, "Disable driver host personality (default=enabled)"); static bool vmci_disable_guest; module_param_named(disable_guest, vmci_disable_guest, bool, 0); MODULE_PARM_DESC(disable_guest, "Disable driver guest personality (default=enabled)"); static bool vmci_guest_personality_initialized; static bool vmci_host_personality_initialized; static DEFINE_MUTEX(vmci_vsock_mutex); /* protects vmci_vsock_transport_cb */ static vmci_vsock_cb vmci_vsock_transport_cb; static bool vmci_vsock_cb_host_called; /* * vmci_get_context_id() - Gets the current context ID. * * Returns the current context ID. Note that since this is accessed only * from code running in the host, this always returns the host context ID. */ u32 vmci_get_context_id(void) { if (vmci_guest_code_active()) return vmci_get_vm_context_id(); else if (vmci_host_code_active()) return VMCI_HOST_CONTEXT_ID; return VMCI_INVALID_ID; } EXPORT_SYMBOL_GPL(vmci_get_context_id); /* * vmci_register_vsock_callback() - Register the VSOCK vmci_transport callback. * * The callback will be called when the first host or guest becomes active, * or if they are already active when this function is called. * To unregister the callback, call this function with NULL parameter. * * Returns 0 on success. -EBUSY if a callback is already registered. */ int vmci_register_vsock_callback(vmci_vsock_cb callback) { int err = 0; mutex_lock(&vmci_vsock_mutex); if (vmci_vsock_transport_cb && callback) { err = -EBUSY; goto out; } vmci_vsock_transport_cb = callback; if (!vmci_vsock_transport_cb) { vmci_vsock_cb_host_called = false; goto out; } if (vmci_guest_code_active()) vmci_vsock_transport_cb(false); if (vmci_host_users() > 0) { vmci_vsock_cb_host_called = true; vmci_vsock_transport_cb(true); } out: mutex_unlock(&vmci_vsock_mutex); return err; } EXPORT_SYMBOL_GPL(vmci_register_vsock_callback); void vmci_call_vsock_callback(bool is_host) { mutex_lock(&vmci_vsock_mutex); if (!vmci_vsock_transport_cb) goto out; /* In the host, this function could be called multiple times, * but we want to register it only once. */ if (is_host) { if (vmci_vsock_cb_host_called) goto out; vmci_vsock_cb_host_called = true; } vmci_vsock_transport_cb(is_host); out: mutex_unlock(&vmci_vsock_mutex); } static int __init vmci_drv_init(void) { int vmci_err; int error; vmci_err = vmci_event_init(); if (vmci_err < VMCI_SUCCESS) { pr_err("Failed to initialize VMCIEvent (result=%d)\n", vmci_err); return -EINVAL; } if (!vmci_disable_guest) { error = vmci_guest_init(); if (error) { pr_warn("Failed to initialize guest personality (err=%d)\n", error); } else { vmci_guest_personality_initialized = true; pr_info("Guest personality initialized and is %s\n", vmci_guest_code_active() ? "active" : "inactive"); } } if (!vmci_disable_host) { error = vmci_host_init(); if (error) { pr_warn("Unable to initialize host personality (err=%d)\n", error); } else { vmci_host_personality_initialized = true; pr_info("Initialized host personality\n"); } } if (!vmci_guest_personality_initialized && !vmci_host_personality_initialized) { vmci_event_exit(); return -ENODEV; } return 0; } module_init(vmci_drv_init); static void __exit vmci_drv_exit(void) { if (vmci_guest_personality_initialized) vmci_guest_exit(); if (vmci_host_personality_initialized) vmci_host_exit(); vmci_event_exit(); } module_exit(vmci_drv_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface."); MODULE_VERSION("1.1.6.0-k"); MODULE_LICENSE("GPL v2");
1 5 1 4 1 4 2 1 6 6 1 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 // SPDX-License-Identifier: GPL-2.0+ /* * comedi/drivers/ni_usb6501.c * Comedi driver for National Instruments USB-6501 * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 2014 Luca Ellero <luca.ellero@brickedbrain.com> */ /* * Driver: ni_usb6501 * Description: National Instruments USB-6501 module * Devices: [National Instruments] USB-6501 (ni_usb6501) * Author: Luca Ellero <luca.ellero@brickedbrain.com> * Updated: 8 Sep 2014 * Status: works * * * Configuration Options: * none */ /* * NI-6501 - USB PROTOCOL DESCRIPTION * * Every command is composed by two USB packets: * - request (out) * - response (in) * * Every packet is at least 12 bytes long, here is the meaning of * every field (all values are hex): * * byte 0 is always 00 * byte 1 is always 01 * byte 2 is always 00 * byte 3 is the total packet length * * byte 4 is always 00 * byte 5 is the total packet length - 4 * byte 6 is always 01 * byte 7 is the command * * byte 8 is 02 (request) or 00 (response) * byte 9 is 00 (response) or 10 (port request) or 20 (counter request) * byte 10 is always 00 * byte 11 is 00 (request) or 02 (response) * * PORT PACKETS * * CMD: 0xE READ_PORT * REQ: 00 01 00 10 00 0C 01 0E 02 10 00 00 00 03 <PORT> 00 * RES: 00 01 00 10 00 0C 01 00 00 00 00 02 00 03 <BMAP> 00 * * CMD: 0xF WRITE_PORT * REQ: 00 01 00 14 00 10 01 0F 02 10 00 00 00 03 <PORT> 00 03 <BMAP> 00 00 * RES: 00 01 00 0C 00 08 01 00 00 00 00 02 * * CMD: 0x12 SET_PORT_DIR (0 = input, 1 = output) * REQ: 00 01 00 18 00 14 01 12 02 10 00 00 * 00 05 <PORT 0> <PORT 1> <PORT 2> 00 05 00 00 00 00 00 * RES: 00 01 00 0C 00 08 01 00 00 00 00 02 * * COUNTER PACKETS * * CMD 0x9: START_COUNTER * REQ: 00 01 00 0C 00 08 01 09 02 20 00 00 * RES: 00 01 00 0C 00 08 01 00 00 00 00 02 * * CMD 0xC: STOP_COUNTER * REQ: 00 01 00 0C 00 08 01 0C 02 20 00 00 * RES: 00 01 00 0C 00 08 01 00 00 00 00 02 * * CMD 0xE: READ_COUNTER * REQ: 00 01 00 0C 00 08 01 0E 02 20 00 00 * RES: 00 01 00 10 00 0C 01 00 00 00 00 02 <u32 counter value, Big Endian> * * CMD 0xF: WRITE_COUNTER * REQ: 00 01 00 10 00 0C 01 0F 02 20 00 00 <u32 counter value, Big Endian> * RES: 00 01 00 0C 00 08 01 00 00 00 00 02 * * * Please visit https://www.brickedbrain.com if you need * additional information or have any questions. * */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/comedi/comedi_usb.h> #define NI6501_TIMEOUT 1000 /* Port request packets */ static const u8 READ_PORT_REQUEST[] = {0x00, 0x01, 0x00, 0x10, 0x00, 0x0C, 0x01, 0x0E, 0x02, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00}; static const u8 WRITE_PORT_REQUEST[] = {0x00, 0x01, 0x00, 0x14, 0x00, 0x10, 0x01, 0x0F, 0x02, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00}; static const u8 SET_PORT_DIR_REQUEST[] = {0x00, 0x01, 0x00, 0x18, 0x00, 0x14, 0x01, 0x12, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00}; /* Counter request packets */ static const u8 START_COUNTER_REQUEST[] = {0x00, 0x01, 0x00, 0x0C, 0x00, 0x08, 0x01, 0x09, 0x02, 0x20, 0x00, 0x00}; static const u8 STOP_COUNTER_REQUEST[] = {0x00, 0x01, 0x00, 0x0C, 0x00, 0x08, 0x01, 0x0C, 0x02, 0x20, 0x00, 0x00}; static const u8 READ_COUNTER_REQUEST[] = {0x00, 0x01, 0x00, 0x0C, 0x00, 0x08, 0x01, 0x0E, 0x02, 0x20, 0x00, 0x00}; static const u8 WRITE_COUNTER_REQUEST[] = {0x00, 0x01, 0x00, 0x10, 0x00, 0x0C, 0x01, 0x0F, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; /* Response packets */ static const u8 GENERIC_RESPONSE[] = {0x00, 0x01, 0x00, 0x0C, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02}; static const u8 READ_PORT_RESPONSE[] = {0x00, 0x01, 0x00, 0x10, 0x00, 0x0C, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00}; static const u8 READ_COUNTER_RESPONSE[] = {0x00, 0x01, 0x00, 0x10, 0x00, 0x0C, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00}; /* Largest supported packets */ static const size_t TX_MAX_SIZE = sizeof(SET_PORT_DIR_REQUEST); static const size_t RX_MAX_SIZE = sizeof(READ_PORT_RESPONSE); enum commands { READ_PORT, WRITE_PORT, SET_PORT_DIR, START_COUNTER, STOP_COUNTER, READ_COUNTER, WRITE_COUNTER }; struct ni6501_private { struct usb_endpoint_descriptor *ep_rx; struct usb_endpoint_descriptor *ep_tx; struct mutex mut; u8 *usb_rx_buf; u8 *usb_tx_buf; }; static int ni6501_port_command(struct comedi_device *dev, int command, unsigned int val, u8 *bitmap) { struct usb_device *usb = comedi_to_usb_dev(dev); struct ni6501_private *devpriv = dev->private; int request_size, response_size; u8 *tx = devpriv->usb_tx_buf; int ret; if (command != SET_PORT_DIR && !bitmap) return -EINVAL; mutex_lock(&devpriv->mut); switch (command) { case READ_PORT: request_size = sizeof(READ_PORT_REQUEST); response_size = sizeof(READ_PORT_RESPONSE); memcpy(tx, READ_PORT_REQUEST, request_size); tx[14] = val & 0xff; break; case WRITE_PORT: request_size = sizeof(WRITE_PORT_REQUEST); response_size = sizeof(GENERIC_RESPONSE); memcpy(tx, WRITE_PORT_REQUEST, request_size); tx[14] = val & 0xff; tx[17] = *bitmap; break; case SET_PORT_DIR: request_size = sizeof(SET_PORT_DIR_REQUEST); response_size = sizeof(GENERIC_RESPONSE); memcpy(tx, SET_PORT_DIR_REQUEST, request_size); tx[14] = val & 0xff; tx[15] = (val >> 8) & 0xff; tx[16] = (val >> 16) & 0xff; break; default: ret = -EINVAL; goto end; } ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->ep_tx->bEndpointAddress), devpriv->usb_tx_buf, request_size, NULL, NI6501_TIMEOUT); if (ret) goto end; ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->ep_rx->bEndpointAddress), devpriv->usb_rx_buf, response_size, NULL, NI6501_TIMEOUT); if (ret) goto end; /* Check if results are valid */ if (command == READ_PORT) { *bitmap = devpriv->usb_rx_buf[14]; /* mask bitmap for comparing */ devpriv->usb_rx_buf[14] = 0x00; if (memcmp(devpriv->usb_rx_buf, READ_PORT_RESPONSE, sizeof(READ_PORT_RESPONSE))) { ret = -EINVAL; } } else if (memcmp(devpriv->usb_rx_buf, GENERIC_RESPONSE, sizeof(GENERIC_RESPONSE))) { ret = -EINVAL; } end: mutex_unlock(&devpriv->mut); return ret; } static int ni6501_counter_command(struct comedi_device *dev, int command, u32 *val) { struct usb_device *usb = comedi_to_usb_dev(dev); struct ni6501_private *devpriv = dev->private; int request_size, response_size; u8 *tx = devpriv->usb_tx_buf; int ret; if ((command == READ_COUNTER || command == WRITE_COUNTER) && !val) return -EINVAL; mutex_lock(&devpriv->mut); switch (command) { case START_COUNTER: request_size = sizeof(START_COUNTER_REQUEST); response_size = sizeof(GENERIC_RESPONSE); memcpy(tx, START_COUNTER_REQUEST, request_size); break; case STOP_COUNTER: request_size = sizeof(STOP_COUNTER_REQUEST); response_size = sizeof(GENERIC_RESPONSE); memcpy(tx, STOP_COUNTER_REQUEST, request_size); break; case READ_COUNTER: request_size = sizeof(READ_COUNTER_REQUEST); response_size = sizeof(READ_COUNTER_RESPONSE); memcpy(tx, READ_COUNTER_REQUEST, request_size); break; case WRITE_COUNTER: request_size = sizeof(WRITE_COUNTER_REQUEST); response_size = sizeof(GENERIC_RESPONSE); memcpy(tx, WRITE_COUNTER_REQUEST, request_size); /* Setup tx packet: bytes 12,13,14,15 hold the */ /* u32 counter value (Big Endian) */ *((__be32 *)&tx[12]) = cpu_to_be32(*val); break; default: ret = -EINVAL; goto end; } ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->ep_tx->bEndpointAddress), devpriv->usb_tx_buf, request_size, NULL, NI6501_TIMEOUT); if (ret) goto end; ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->ep_rx->bEndpointAddress), devpriv->usb_rx_buf, response_size, NULL, NI6501_TIMEOUT); if (ret) goto end; /* Check if results are valid */ if (command == READ_COUNTER) { int i; /* Read counter value: bytes 12,13,14,15 of rx packet */ /* hold the u32 counter value (Big Endian) */ *val = be32_to_cpu(*((__be32 *)&devpriv->usb_rx_buf[12])); /* mask counter value for comparing */ for (i = 12; i < sizeof(READ_COUNTER_RESPONSE); ++i) devpriv->usb_rx_buf[i] = 0x00; if (memcmp(devpriv->usb_rx_buf, READ_COUNTER_RESPONSE, sizeof(READ_COUNTER_RESPONSE))) { ret = -EINVAL; } } else if (memcmp(devpriv->usb_rx_buf, GENERIC_RESPONSE, sizeof(GENERIC_RESPONSE))) { ret = -EINVAL; } end: mutex_unlock(&devpriv->mut); return ret; } static int ni6501_dio_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; ret = comedi_dio_insn_config(dev, s, insn, data, 0); if (ret) return ret; ret = ni6501_port_command(dev, SET_PORT_DIR, s->io_bits, NULL); if (ret) return ret; return insn->n; } static int ni6501_dio_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int mask; int ret; u8 port; u8 bitmap; mask = comedi_dio_update_state(s, data); for (port = 0; port < 3; port++) { if (mask & (0xFF << port * 8)) { bitmap = (s->state >> port * 8) & 0xFF; ret = ni6501_port_command(dev, WRITE_PORT, port, &bitmap); if (ret) return ret; } } data[1] = 0; for (port = 0; port < 3; port++) { ret = ni6501_port_command(dev, READ_PORT, port, &bitmap); if (ret) return ret; data[1] |= bitmap << port * 8; } return insn->n; } static int ni6501_cnt_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; u32 val = 0; switch (data[0]) { case INSN_CONFIG_ARM: ret = ni6501_counter_command(dev, START_COUNTER, NULL); break; case INSN_CONFIG_DISARM: ret = ni6501_counter_command(dev, STOP_COUNTER, NULL); break; case INSN_CONFIG_RESET: ret = ni6501_counter_command(dev, STOP_COUNTER, NULL); if (ret) break; ret = ni6501_counter_command(dev, WRITE_COUNTER, &val); break; default: return -EINVAL; } return ret ? ret : insn->n; } static int ni6501_cnt_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; u32 val; unsigned int i; for (i = 0; i < insn->n; i++) { ret = ni6501_counter_command(dev, READ_COUNTER, &val); if (ret) return ret; data[i] = val; } return insn->n; } static int ni6501_cnt_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; if (insn->n) { u32 val = data[insn->n - 1]; ret = ni6501_counter_command(dev, WRITE_COUNTER, &val); if (ret) return ret; } return insn->n; } static int ni6501_alloc_usb_buffers(struct comedi_device *dev) { struct ni6501_private *devpriv = dev->private; size_t size; size = usb_endpoint_maxp(devpriv->ep_rx); devpriv->usb_rx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_rx_buf) return -ENOMEM; size = usb_endpoint_maxp(devpriv->ep_tx); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_tx_buf) return -ENOMEM; return 0; } static int ni6501_find_endpoints(struct comedi_device *dev) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct ni6501_private *devpriv = dev->private; struct usb_host_interface *iface_desc = intf->cur_altsetting; struct usb_endpoint_descriptor *ep_desc; int i; if (iface_desc->desc.bNumEndpoints != 2) { dev_err(dev->class_dev, "Wrong number of endpoints\n"); return -ENODEV; } for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { ep_desc = &iface_desc->endpoint[i].desc; if (usb_endpoint_is_bulk_in(ep_desc)) { if (!devpriv->ep_rx) devpriv->ep_rx = ep_desc; continue; } if (usb_endpoint_is_bulk_out(ep_desc)) { if (!devpriv->ep_tx) devpriv->ep_tx = ep_desc; continue; } } if (!devpriv->ep_rx || !devpriv->ep_tx) return -ENODEV; if (usb_endpoint_maxp(devpriv->ep_rx) < RX_MAX_SIZE) return -ENODEV; if (usb_endpoint_maxp(devpriv->ep_tx) < TX_MAX_SIZE) return -ENODEV; return 0; } static int ni6501_auto_attach(struct comedi_device *dev, unsigned long context) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct ni6501_private *devpriv; struct comedi_subdevice *s; int ret; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; mutex_init(&devpriv->mut); usb_set_intfdata(intf, devpriv); ret = ni6501_find_endpoints(dev); if (ret) return ret; ret = ni6501_alloc_usb_buffers(dev); if (ret) return ret; ret = comedi_alloc_subdevices(dev, 2); if (ret) return ret; /* Digital Input/Output subdevice */ s = &dev->subdevices[0]; s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = 24; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = ni6501_dio_insn_bits; s->insn_config = ni6501_dio_insn_config; /* Counter subdevice */ s = &dev->subdevices[1]; s->type = COMEDI_SUBD_COUNTER; s->subdev_flags = SDF_READABLE | SDF_WRITABLE | SDF_LSAMPL; s->n_chan = 1; s->maxdata = 0xffffffff; s->insn_read = ni6501_cnt_insn_read; s->insn_write = ni6501_cnt_insn_write; s->insn_config = ni6501_cnt_insn_config; return 0; } static void ni6501_detach(struct comedi_device *dev) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct ni6501_private *devpriv = dev->private; if (!devpriv) return; mutex_destroy(&devpriv->mut); usb_set_intfdata(intf, NULL); kfree(devpriv->usb_rx_buf); kfree(devpriv->usb_tx_buf); } static struct comedi_driver ni6501_driver = { .module = THIS_MODULE, .driver_name = "ni6501", .auto_attach = ni6501_auto_attach, .detach = ni6501_detach, }; static int ni6501_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { return comedi_usb_auto_config(intf, &ni6501_driver, id->driver_info); } static const struct usb_device_id ni6501_usb_table[] = { { USB_DEVICE(0x3923, 0x718a) }, { } }; MODULE_DEVICE_TABLE(usb, ni6501_usb_table); static struct usb_driver ni6501_usb_driver = { .name = "ni6501", .id_table = ni6501_usb_table, .probe = ni6501_usb_probe, .disconnect = comedi_usb_auto_unconfig, }; module_comedi_usb_driver(ni6501_driver, ni6501_usb_driver); MODULE_AUTHOR("Luca Ellero"); MODULE_DESCRIPTION("Comedi driver for National Instruments USB-6501"); MODULE_LICENSE("GPL");
24 21 12 5 7 8 8 3 8 8 8 21 21 21 20 20 18 8 9 2 9 3 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 /* * INETPEER - A storage for permanent information about peers * * This source is covered by the GNU GPL, the same as all kernel sources. * * Authors: Andrey V. Savochkin <saw@msu.ru> */ #include <linux/cache.h> #include <linux/module.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/random.h> #include <linux/timer.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/net.h> #include <linux/workqueue.h> #include <net/ip.h> #include <net/inetpeer.h> #include <net/secure_seq.h> /* * Theory of operations. * We keep one entry for each peer IP address. The nodes contains long-living * information about the peer which doesn't depend on routes. * * Nodes are removed only when reference counter goes to 0. * When it's happened the node may be removed when a sufficient amount of * time has been passed since its last use. The less-recently-used entry can * also be removed if the pool is overloaded i.e. if the total amount of * entries is greater-or-equal than the threshold. * * Node pool is organised as an RB tree. * Such an implementation has been chosen not just for fun. It's a way to * prevent easy and efficient DoS attacks by creating hash collisions. A huge * amount of long living nodes in a single hash slot would significantly delay * lookups performed with disabled BHs. * * Serialisation issues. * 1. Nodes may appear in the tree only with the pool lock held. * 2. Nodes may disappear from the tree only with the pool lock held * AND reference count being 0. * 3. Global variable peer_total is modified under the pool lock. * 4. struct inet_peer fields modification: * rb_node: pool lock * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * daddr: unchangeable */ static struct kmem_cache *peer_cachep __ro_after_init; void inet_peer_base_init(struct inet_peer_base *bp) { bp->rb_root = RB_ROOT; seqlock_init(&bp->lock); bp->total = 0; } EXPORT_IPV6_MOD_GPL(inet_peer_base_init); #define PEER_MAX_GC 32 /* Exported for sysctl_net_ipv4. */ int inet_peer_threshold __read_mostly; /* start to throw entries more * aggressively at this stage */ int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { u64 nr_entries; /* 1% of physical memory */ nr_entries = div64_ul((u64)totalram_pages() << PAGE_SHIFT, 100 * L1_CACHE_ALIGN(sizeof(struct inet_peer))); inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128); peer_cachep = KMEM_CACHE(inet_peer, SLAB_HWCACHE_ALIGN | SLAB_PANIC); } /* Called with rcu_read_lock() or base->lock held */ static struct inet_peer *lookup(const struct inetpeer_addr *daddr, struct inet_peer_base *base, unsigned int seq, struct inet_peer *gc_stack[], unsigned int *gc_cnt, struct rb_node **parent_p, struct rb_node ***pp_p) { struct rb_node **pp, *parent, *next; struct inet_peer *p; u32 now; pp = &base->rb_root.rb_node; parent = NULL; while (1) { int cmp; next = rcu_dereference_raw(*pp); if (!next) break; parent = next; p = rb_entry(parent, struct inet_peer, rb_node); cmp = inetpeer_addr_cmp(daddr, &p->daddr); if (cmp == 0) { now = jiffies; if (READ_ONCE(p->dtime) != now) WRITE_ONCE(p->dtime, now); return p; } if (gc_stack) { if (*gc_cnt < PEER_MAX_GC) gc_stack[(*gc_cnt)++] = p; } else if (unlikely(read_seqretry(&base->lock, seq))) { break; } if (cmp == -1) pp = &next->rb_left; else pp = &next->rb_right; } *parent_p = parent; *pp_p = pp; return NULL; } /* perform garbage collect on all items stacked during a lookup */ static void inet_peer_gc(struct inet_peer_base *base, struct inet_peer *gc_stack[], unsigned int gc_cnt) { int peer_threshold, peer_maxttl, peer_minttl; struct inet_peer *p; __u32 delta, ttl; int i; peer_threshold = READ_ONCE(inet_peer_threshold); peer_maxttl = READ_ONCE(inet_peer_maxttl); peer_minttl = READ_ONCE(inet_peer_minttl); if (base->total >= peer_threshold) ttl = 0; /* be aggressive */ else ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ * base->total / peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; delta = (__u32)jiffies - READ_ONCE(p->dtime); if (delta < ttl || !refcount_dec_if_one(&p->refcnt)) gc_stack[i] = NULL; } for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; if (p) { rb_erase(&p->rb_node, &base->rb_root); base->total--; kfree_rcu(p, rcu); } } } /* Must be called under RCU : No refcount change is done here. */ struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr) { struct inet_peer *p, *gc_stack[PEER_MAX_GC]; struct rb_node **pp, *parent; unsigned int gc_cnt, seq; /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ seq = read_seqbegin(&base->lock); p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); if (p) return p; /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ parent = NULL; write_seqlock_bh(&base->lock); gc_cnt = 0; p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp); if (!p) { p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); if (p) { p->daddr = *daddr; p->dtime = (__u32)jiffies; refcount_set(&p->refcnt, 1); atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; p->n_redirects = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first * calculation of tokens is at its maximum. */ p->rate_last = jiffies - 60*HZ; rb_link_node(&p->rb_node, parent, pp); rb_insert_color(&p->rb_node, &base->rb_root); base->total++; } } if (gc_cnt) inet_peer_gc(base, gc_stack, gc_cnt); write_sequnlock_bh(&base->lock); return p; } EXPORT_IPV6_MOD_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { if (refcount_dec_and_test(&p->refcnt)) kfree_rcu(p, rcu); } /* * Check transmit rate limitation for given message. * The rate information is held in the inet_peer entries now. * This function is generic and could be used for other purposes * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. * * Note that the same inet_peer fields are modified by functions in * route.c too, but these work for packet destinations while xrlim_allow * works for icmp destinations. This means the rate limiting information * for one "ip object" is shared - and these ICMPs are twice limited: * by source and by destination. * * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate * SHOULD allow setting of rate limits * * Shared between ICMPv4 and ICMPv6. */ #define XRLIM_BURST_FACTOR 6 bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) { unsigned long now, token, otoken, delta; bool rc = false; if (!peer) return true; token = otoken = READ_ONCE(peer->rate_tokens); now = jiffies; delta = now - READ_ONCE(peer->rate_last); if (delta) { WRITE_ONCE(peer->rate_last, now); token += delta; if (token > XRLIM_BURST_FACTOR * timeout) token = XRLIM_BURST_FACTOR * timeout; } if (token >= timeout) { token -= timeout; rc = true; } if (token != otoken) WRITE_ONCE(peer->rate_tokens, token); return rc; } EXPORT_IPV6_MOD(inet_peer_xrlim_allow); void inetpeer_invalidate_tree(struct inet_peer_base *base) { struct rb_node *p = rb_first(&base->rb_root); while (p) { struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node); p = rb_next(p); rb_erase(&peer->rb_node, &base->rb_root); inet_putpeer(peer); cond_resched(); } base->total = 0; } EXPORT_IPV6_MOD(inetpeer_invalidate_tree);
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 // SPDX-License-Identifier: GPL-2.0+ /* * HID driver for Topre REALFORCE Keyboards * * Copyright (c) 2022 Harry Stern <harry@harrystern.net> * * Based on the hid-macally driver */ #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" MODULE_AUTHOR("Harry Stern <harry@harrystern.net>"); MODULE_DESCRIPTION("REALFORCE R2 Keyboard driver"); MODULE_LICENSE("GPL"); /* * Fix the REALFORCE R2's non-boot interface's report descriptor to match the * events it's actually sending. It claims to send array events but is instead * sending variable events. */ static const __u8 *topre_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (*rsize >= 119 && rdesc[69] == 0x29 && rdesc[70] == 0xe7 && rdesc[71] == 0x81 && rdesc[72] == 0x00) { hid_info(hdev, "fixing up Topre REALFORCE keyboard report descriptor\n"); rdesc[72] = 0x02; } else if (*rsize >= 106 && rdesc[28] == 0x29 && rdesc[29] == 0xe7 && rdesc[30] == 0x81 && rdesc[31] == 0x00) { hid_info(hdev, "fixing up Topre REALFORCE keyboard report descriptor\n"); rdesc[31] = 0x02; } return rdesc; } static const struct hid_device_id topre_id_table[] = { { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE, USB_DEVICE_ID_TOPRE_REALFORCE_R2_108) }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE, USB_DEVICE_ID_TOPRE_REALFORCE_R2_87) }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE, USB_DEVICE_ID_TOPRE_REALFORCE_R3S_87) }, { } }; MODULE_DEVICE_TABLE(hid, topre_id_table); static struct hid_driver topre_driver = { .name = "topre", .id_table = topre_id_table, .report_fixup = topre_report_fixup, }; module_hid_driver(topre_driver);
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/fs/hpfs/hpfs_fn.h * * Mikulas Patocka (mikulas@artax.karlin.mff.cuni.cz), 1998-1999 * * function headers */ //#define DBG //#define DEBUG_LOCKS #ifdef pr_fmt #undef pr_fmt #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/mutex.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/blkdev.h> #include <linux/unaligned.h> #include "hpfs.h" #define EIOERROR EIO #define EFSERROR EUCLEAN #define ANODE_ALLOC_FWD 512 #define FNODE_ALLOC_FWD 0 #define ALLOC_FWD_MIN 16 #define ALLOC_FWD_MAX 128 #define ALLOC_M 1 #define FNODE_RD_AHEAD 16 #define ANODE_RD_AHEAD 0 #define DNODE_RD_AHEAD 72 #define COUNT_RD_AHEAD 62 #define FREE_DNODES_ADD 58 #define FREE_DNODES_DEL 29 #define CHKCOND(x,y) if (!(x)) printk y struct hpfs_inode_info { loff_t mmu_private; ino_t i_parent_dir; /* (directories) gives fnode of parent dir */ unsigned i_dno; /* (directories) root dnode */ unsigned i_dpos; /* (directories) temp for readdir */ unsigned i_dsubdno; /* (directories) temp for readdir */ unsigned i_file_sec; /* (files) minimalist cache of alloc info */ unsigned i_disk_sec; /* (files) minimalist cache of alloc info */ unsigned i_n_secs; /* (files) minimalist cache of alloc info */ unsigned i_ea_size; /* size of extended attributes */ unsigned i_ea_mode : 1; /* file's permission is stored in ea */ unsigned i_ea_uid : 1; /* file's uid is stored in ea */ unsigned i_ea_gid : 1; /* file's gid is stored in ea */ unsigned i_dirty : 1; loff_t **i_rddir_off; struct inode vfs_inode; }; struct hpfs_sb_info { struct mutex hpfs_mutex; /* global hpfs lock */ ino_t sb_root; /* inode number of root dir */ unsigned sb_fs_size; /* file system size, sectors */ unsigned sb_bitmaps; /* sector number of bitmap list */ unsigned sb_dirband_start; /* directory band start sector */ unsigned sb_dirband_size; /* directory band size, dnodes */ unsigned sb_dmap; /* sector number of dnode bit map */ unsigned sb_n_free; /* free blocks for statfs, or -1 */ unsigned sb_n_free_dnodes; /* free dnodes for statfs, or -1 */ kuid_t sb_uid; /* uid from mount options */ kgid_t sb_gid; /* gid from mount options */ umode_t sb_mode; /* mode from mount options */ unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */ unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */ unsigned sb_chk : 2; /* checks: 0-no, 1-normal, 2-strict */ unsigned sb_lowercase : 1; /* downcase filenames hackery */ unsigned sb_was_error : 1; /* there was an error, set dirty flag */ unsigned sb_chkdsk : 2; /* chkdsk: 0-no, 1-on errs, 2-allways */ unsigned char *sb_cp_table; /* code page tables: */ /* 128 bytes uppercasing table & */ /* 128 bytes lowercasing table */ __le32 *sb_bmp_dir; /* main bitmap directory */ unsigned sb_c_bitmap; /* current bitmap */ unsigned sb_max_fwd_alloc; /* max forwad allocation */ int sb_timeshift; struct rcu_head rcu; unsigned n_hotfixes; secno hotfix_from[256]; secno hotfix_to[256]; }; /* Four 512-byte buffers and the 2k block obtained by concatenating them */ struct quad_buffer_head { struct buffer_head *bh[4]; void *data; }; /* The b-tree down pointer from a dir entry */ static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) { CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n")); return le32_to_cpu(*(__le32 *) ((void *) de + le16_to_cpu(de->length) - 4)); } /* The first dir entry in a dnode */ static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode) { return (void *) dnode->dirent; } /* The end+1 of the dir entries */ static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode) { CHKCOND(le32_to_cpu(dnode->first_free)>=0x14 && le32_to_cpu(dnode->first_free)<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %x\n",(unsigned)le32_to_cpu(dnode->first_free))); return (void *) dnode + le32_to_cpu(dnode->first_free); } /* The dir entry after dir entry de */ static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de) { CHKCOND(le16_to_cpu(de->length)>=0x20 && le16_to_cpu(de->length)<0x800,("HPFS: de_next_de: de->length = %x\n",(unsigned)le16_to_cpu(de->length))); return (void *) de + le16_to_cpu(de->length); } static inline struct extended_attribute *fnode_ea(struct fnode *fnode) { return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s)); } static inline struct extended_attribute *fnode_end_ea(struct fnode *fnode) { return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s)); } static unsigned ea_valuelen(struct extended_attribute *ea) { return ea->valuelen_lo + 256 * ea->valuelen_hi; } static inline struct extended_attribute *next_ea(struct extended_attribute *ea) { return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea_valuelen(ea)); } static inline secno ea_sec(struct extended_attribute *ea) { return le32_to_cpu(get_unaligned((__le32 *)((char *)ea + 9 + ea->namelen))); } static inline secno ea_len(struct extended_attribute *ea) { return le32_to_cpu(get_unaligned((__le32 *)((char *)ea + 5 + ea->namelen))); } static inline char *ea_data(struct extended_attribute *ea) { return (char *)((char *)ea + 5 + ea->namelen); } static inline unsigned de_size(int namelen, secno down_ptr) { return ((0x1f + namelen + 3) & ~3) + (down_ptr ? 4 : 0); } static inline void copy_de(struct hpfs_dirent *dst, struct hpfs_dirent *src) { int a; int n; if (!dst || !src) return; a = dst->down; n = dst->not_8x3; memcpy((char *)dst + 2, (char *)src + 2, 28); dst->down = a; dst->not_8x3 = n; } static inline unsigned tstbits(__le32 *bmp, unsigned b, unsigned n) { int i; if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n; if (!((le32_to_cpu(bmp[(b & 0x3fff) >> 5]) >> (b & 0x1f)) & 1)) return 1; for (i = 1; i < n; i++) if (!((le32_to_cpu(bmp[((b+i) & 0x3fff) >> 5]) >> ((b+i) & 0x1f)) & 1)) return i + 1; return 0; } /* alloc.c */ int hpfs_chk_sectors(struct super_block *, secno, int, char *); secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int); int hpfs_alloc_if_possible(struct super_block *, secno); void hpfs_free_sectors(struct super_block *, secno, unsigned); int hpfs_check_free_dnodes(struct super_block *, int); void hpfs_free_dnode(struct super_block *, secno); struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *); struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **); struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **); int hpfs_trim_fs(struct super_block *, u64, u64, u64, unsigned *); /* anode.c */ secno hpfs_bplus_lookup(struct super_block *, struct inode *, struct bplus_header *, unsigned, struct buffer_head *); secno hpfs_add_sector_to_btree(struct super_block *, secno, int, unsigned); void hpfs_remove_btree(struct super_block *, struct bplus_header *); int hpfs_ea_read(struct super_block *, secno, int, unsigned, unsigned, char *); int hpfs_ea_write(struct super_block *, secno, int, unsigned, unsigned, const char *); void hpfs_ea_remove(struct super_block *, secno, int, unsigned); void hpfs_truncate_btree(struct super_block *, secno, int, unsigned); void hpfs_remove_fnode(struct super_block *, fnode_secno fno); /* buffer.c */ secno hpfs_search_hotfix_map(struct super_block *s, secno sec); unsigned hpfs_search_hotfix_map_for_range(struct super_block *s, secno sec, unsigned n); void hpfs_prefetch_sectors(struct super_block *, unsigned, int); void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int); void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **); void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int); void *hpfs_get_4sectors(struct super_block *, unsigned, struct quad_buffer_head *); void hpfs_brelse4(struct quad_buffer_head *); void hpfs_mark_4buffers_dirty(struct quad_buffer_head *); /* dentry.c */ extern const struct dentry_operations hpfs_dentry_operations; /* dir.c */ struct dentry *hpfs_lookup(struct inode *, struct dentry *, unsigned int); extern const struct file_operations hpfs_dir_ops; /* dnode.c */ int hpfs_add_pos(struct inode *, loff_t *); void hpfs_del_pos(struct inode *, loff_t *); struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *, const unsigned char *, unsigned, secno); int hpfs_add_dirent(struct inode *, const unsigned char *, unsigned, struct hpfs_dirent *); int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int); void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *); dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno); struct hpfs_dirent *map_pos_dirent(struct inode *, loff_t *, struct quad_buffer_head *); struct hpfs_dirent *map_dirent(struct inode *, dnode_secno, const unsigned char *, unsigned, dnode_secno *, struct quad_buffer_head *); void hpfs_remove_dtree(struct super_block *, dnode_secno); struct hpfs_dirent *map_fnode_dirent(struct super_block *, fnode_secno, struct fnode *, struct quad_buffer_head *); /* ea.c */ void hpfs_ea_ext_remove(struct super_block *, secno, int, unsigned); int hpfs_read_ea(struct super_block *, struct fnode *, char *, char *, int); char *hpfs_get_ea(struct super_block *, struct fnode *, char *, int *); void hpfs_set_ea(struct inode *, struct fnode *, const char *, const char *, int); /* file.c */ int hpfs_file_fsync(struct file *, loff_t, loff_t, int); void hpfs_truncate(struct inode *); extern const struct file_operations hpfs_file_ops; extern const struct inode_operations hpfs_file_iops; extern const struct address_space_operations hpfs_aops; /* inode.c */ void hpfs_init_inode(struct inode *); void hpfs_read_inode(struct inode *); void hpfs_write_inode(struct inode *); void hpfs_write_inode_nolock(struct inode *); int hpfs_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); void hpfs_write_if_changed(struct inode *); void hpfs_evict_inode(struct inode *); /* map.c */ __le32 *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); __le32 *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); void hpfs_prefetch_bitmap(struct super_block *, unsigned); unsigned char *hpfs_load_code_page(struct super_block *, secno); __le32 *hpfs_load_bitmap_directory(struct super_block *, secno bmp); void hpfs_load_hotfix_map(struct super_block *s, struct hpfs_spare_block *spareblock); struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); struct anode *hpfs_map_anode(struct super_block *s, anode_secno, struct buffer_head **); struct dnode *hpfs_map_dnode(struct super_block *s, dnode_secno, struct quad_buffer_head *); dnode_secno hpfs_fnode_dno(struct super_block *s, ino_t ino); /* name.c */ unsigned char hpfs_upcase(unsigned char *, unsigned char); int hpfs_chk_name(const unsigned char *, unsigned *); unsigned char *hpfs_translate_name(struct super_block *, unsigned char *, unsigned, int, int); int hpfs_compare_names(struct super_block *, const unsigned char *, unsigned, const unsigned char *, unsigned, int); int hpfs_is_name_long(const unsigned char *, unsigned); void hpfs_adjust_length(const unsigned char *, unsigned *); /* namei.c */ extern const struct inode_operations hpfs_dir_iops; extern const struct address_space_operations hpfs_symlink_aops; static inline struct hpfs_inode_info *hpfs_i(struct inode *inode) { return container_of(inode, struct hpfs_inode_info, vfs_inode); } static inline struct hpfs_sb_info *hpfs_sb(struct super_block *sb) { return sb->s_fs_info; } /* super.c */ __printf(2, 3) void hpfs_error(struct super_block *, const char *, ...); int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *); unsigned hpfs_get_free_dnodes(struct super_block *); long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg); /* * local time (HPFS) to GMT (Unix) */ static inline time64_t local_to_gmt(struct super_block *s, time64_t t) { extern struct timezone sys_tz; return t + sys_tz.tz_minuteswest * 60 + hpfs_sb(s)->sb_timeshift; } static inline time32_t gmt_to_local(struct super_block *s, time64_t t) { extern struct timezone sys_tz; return t - sys_tz.tz_minuteswest * 60 - hpfs_sb(s)->sb_timeshift; } static inline time32_t local_get_seconds(struct super_block *s) { return gmt_to_local(s, ktime_get_real_seconds()); } /* * Locking: * * hpfs_lock() locks the whole filesystem. It must be taken * on any method called by the VFS. * * We don't do any per-file locking anymore, it is hard to * review and HPFS is not performance-sensitive anyway. */ static inline void hpfs_lock(struct super_block *s) { struct hpfs_sb_info *sbi = hpfs_sb(s); mutex_lock(&sbi->hpfs_mutex); } static inline void hpfs_unlock(struct super_block *s) { struct hpfs_sb_info *sbi = hpfs_sb(s); mutex_unlock(&sbi->hpfs_mutex); } static inline void hpfs_lock_assert(struct super_block *s) { struct hpfs_sb_info *sbi = hpfs_sb(s); WARN_ON(!mutex_is_locked(&sbi->hpfs_mutex)); }
26 26 3 3 65 65 26 66 66 8 1 22 21 22 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 // SPDX-License-Identifier: GPL-2.0-only /* Page fragment allocator * * Page Fragment: * An arbitrary-length arbitrary-offset area of memory which resides within a * 0 or higher order page. Multiple fragments within that page are * individually refcounted, in the page's reference counter. * * The page_frag functions provide a simple allocation framework for page * fragments. This is used by the network stack and network device drivers to * provide a backing region of memory for use as either an sk_buff->head, or to * be used in the "frags" portion of skb_shared_info. */ #include <linux/build_bug.h> #include <linux/export.h> #include <linux/gfp_types.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/page_frag_cache.h> #include "internal.h" static unsigned long encoded_page_create(struct page *page, unsigned int order, bool pfmemalloc) { BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK); BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE); return (unsigned long)page_address(page) | (order & PAGE_FRAG_CACHE_ORDER_MASK) | ((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT); } static unsigned long encoded_page_decode_order(unsigned long encoded_page) { return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK; } static void *encoded_page_decode_virt(unsigned long encoded_page) { return (void *)(encoded_page & PAGE_MASK); } static struct page *encoded_page_decode_page(unsigned long encoded_page) { return virt_to_page((void *)encoded_page); } static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, gfp_t gfp_mask) { unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER; struct page *page = NULL; gfp_t gfp = gfp_mask; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; page = __alloc_pages(gfp_mask, PAGE_FRAG_CACHE_MAX_ORDER, numa_mem_id(), NULL); #endif if (unlikely(!page)) { page = __alloc_pages(gfp, 0, numa_mem_id(), NULL); order = 0; } nc->encoded_page = page ? encoded_page_create(page, order, page_is_pfmemalloc(page)) : 0; return page; } void page_frag_cache_drain(struct page_frag_cache *nc) { if (!nc->encoded_page) return; __page_frag_cache_drain(encoded_page_decode_page(nc->encoded_page), nc->pagecnt_bias); nc->encoded_page = 0; } EXPORT_SYMBOL(page_frag_cache_drain); void __page_frag_cache_drain(struct page *page, unsigned int count) { VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); if (page_ref_sub_and_test(page, count)) free_frozen_pages(page, compound_order(page)); } EXPORT_SYMBOL(__page_frag_cache_drain); void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask, unsigned int align_mask) { unsigned long encoded_page = nc->encoded_page; unsigned int size, offset; struct page *page; if (unlikely(!encoded_page)) { refill: page = __page_frag_cache_refill(nc, gfp_mask); if (!page) return NULL; encoded_page = nc->encoded_page; /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); /* reset page count bias and offset to start of new frag */ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; nc->offset = 0; } size = PAGE_SIZE << encoded_page_decode_order(encoded_page); offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask); if (unlikely(offset + fragsz > size)) { if (unlikely(fragsz > PAGE_SIZE)) { /* * The caller is trying to allocate a fragment * with fragsz > PAGE_SIZE but the cache isn't big * enough to satisfy the request, this may * happen in low memory conditions. * We don't release the cache page because * it could make memory pressure worse * so we simply return NULL here. */ return NULL; } page = encoded_page_decode_page(encoded_page); if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) goto refill; if (unlikely(encoded_page_decode_pfmemalloc(encoded_page))) { free_frozen_pages(page, encoded_page_decode_order(encoded_page)); goto refill; } /* OK, page count is 0, we can safely set it */ set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); /* reset page count bias and offset to start of new frag */ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; offset = 0; } nc->pagecnt_bias--; nc->offset = offset + fragsz; return encoded_page_decode_virt(encoded_page) + offset; } EXPORT_SYMBOL(__page_frag_alloc_align); /* * Frees a page fragment allocated out of either a compound or order 0 page. */ void page_frag_free(void *addr) { struct page *page = virt_to_head_page(addr); if (unlikely(put_page_testzero(page))) free_frozen_pages(page, compound_order(page)); } EXPORT_SYMBOL(page_frag_free);
2 2 2 1 1 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 // SPDX-License-Identifier: GPL-2.0+ /* * usbduxsigma.c * Copyright (C) 2011-2015 Bernd Porr, mail@berndporr.me.uk */ /* * Driver: usbduxsigma * Description: University of Stirling USB DAQ & INCITE Technology Limited * Devices: [ITL] USB-DUX-SIGMA (usbduxsigma) * Author: Bernd Porr <mail@berndporr.me.uk> * Updated: 20 July 2015 * Status: stable */ /* * I must give credit here to Chris Baugher who * wrote the driver for AT-MIO-16d. I used some parts of this * driver. I also must give credits to David Brownell * who supported me with the USB development. * * Note: the raw data from the A/D converter is 24 bit big endian * anything else is little endian to/from the dux board * * * Revision history: * 0.1: initial version * 0.2: all basic functions implemented, digital I/O only for one port * 0.3: proper vendor ID and driver name * 0.4: fixed D/A voltage range * 0.5: various bug fixes, health check at startup * 0.6: corrected wrong input range * 0.7: rewrite code that urb->interval is always 1 */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/input.h> #include <linux/fcntl.h> #include <linux/compiler.h> #include <linux/unaligned.h> #include <linux/comedi/comedi_usb.h> /* timeout for the USB-transfer in ms*/ #define BULK_TIMEOUT 1000 /* constants for "firmware" upload and download */ #define FIRMWARE "usbduxsigma_firmware.bin" #define FIRMWARE_MAX_LEN 0x4000 #define USBDUXSUB_FIRMWARE 0xa0 #define VENDOR_DIR_IN 0xc0 #define VENDOR_DIR_OUT 0x40 /* internal addresses of the 8051 processor */ #define USBDUXSUB_CPUCS 0xE600 /* 300Hz max frequ under PWM */ #define MIN_PWM_PERIOD ((long)(1E9 / 300)) /* Default PWM frequency */ #define PWM_DEFAULT_PERIOD ((long)(1E9 / 100)) /* Number of channels (16 AD and offset)*/ #define NUMCHANNELS 16 /* Size of one A/D value */ #define SIZEADIN ((sizeof(u32))) /* * Size of the async input-buffer IN BYTES, the DIO state is transmitted * as the first byte. */ #define SIZEINBUF (((NUMCHANNELS + 1) * SIZEADIN)) /* 16 bytes. */ #define SIZEINSNBUF 16 /* Number of DA channels */ #define NUMOUTCHANNELS 8 /* size of one value for the D/A converter: channel and value */ #define SIZEDAOUT ((sizeof(u8) + sizeof(uint16_t))) /* * Size of the output-buffer in bytes * Actually only the first 4 triplets are used but for the * high speed mode we need to pad it to 8 (microframes). */ #define SIZEOUTBUF ((8 * SIZEDAOUT)) /* * Size of the buffer for the dux commands: just now max size is determined * by the analogue out + command byte + panic bytes... */ #define SIZEOFDUXBUFFER ((8 * SIZEDAOUT + 2)) /* Number of in-URBs which receive the data: min=2 */ #define NUMOFINBUFFERSFULL 5 /* Number of out-URBs which send the data: min=2 */ #define NUMOFOUTBUFFERSFULL 5 /* Number of in-URBs which receive the data: min=5 */ /* must have more buffers due to buggy USB ctr */ #define NUMOFINBUFFERSHIGH 10 /* Number of out-URBs which send the data: min=5 */ /* must have more buffers due to buggy USB ctr */ #define NUMOFOUTBUFFERSHIGH 10 /* number of retries to get the right dux command */ #define RETRIES 10 /* bulk transfer commands to usbduxsigma */ #define USBBUXSIGMA_AD_CMD 9 #define USBDUXSIGMA_DA_CMD 1 #define USBDUXSIGMA_DIO_CFG_CMD 2 #define USBDUXSIGMA_DIO_BITS_CMD 3 #define USBDUXSIGMA_SINGLE_AD_CMD 4 #define USBDUXSIGMA_PWM_ON_CMD 7 #define USBDUXSIGMA_PWM_OFF_CMD 8 static const struct comedi_lrange usbduxsigma_ai_range = { 1, { BIP_RANGE(2.5 * 0x800000 / 0x780000 / 2.0) } }; struct usbduxsigma_private { /* actual number of in-buffers */ int n_ai_urbs; /* actual number of out-buffers */ int n_ao_urbs; /* ISO-transfer handling: buffers */ struct urb **ai_urbs; struct urb **ao_urbs; /* pwm-transfer handling */ struct urb *pwm_urb; /* PWM period */ unsigned int pwm_period; /* PWM internal delay for the GPIF in the FX2 */ u8 pwm_delay; /* size of the PWM buffer which holds the bit pattern */ int pwm_buf_sz; /* input buffer for the ISO-transfer */ __be32 *in_buf; /* input buffer for single insn */ u8 *insn_buf; unsigned high_speed:1; unsigned ai_cmd_running:1; unsigned ao_cmd_running:1; unsigned pwm_cmd_running:1; /* time between samples in units of the timer */ unsigned int ai_timer; unsigned int ao_timer; /* counter between acquisitions */ unsigned int ai_counter; unsigned int ao_counter; /* interval in frames/uframes */ unsigned int ai_interval; /* commands */ u8 *dux_commands; struct mutex mut; }; static void usbduxsigma_unlink_urbs(struct urb **urbs, int num_urbs) { int i; for (i = 0; i < num_urbs; i++) usb_kill_urb(urbs[i]); } static void usbduxsigma_ai_stop(struct comedi_device *dev, int do_unlink) { struct usbduxsigma_private *devpriv = dev->private; if (do_unlink && devpriv->ai_urbs) usbduxsigma_unlink_urbs(devpriv->ai_urbs, devpriv->n_ai_urbs); devpriv->ai_cmd_running = 0; } static int usbduxsigma_ai_cancel(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; mutex_lock(&devpriv->mut); /* unlink only if it is really running */ usbduxsigma_ai_stop(dev, devpriv->ai_cmd_running); mutex_unlock(&devpriv->mut); return 0; } static void usbduxsigma_ai_handle_urb(struct comedi_device *dev, struct comedi_subdevice *s, struct urb *urb) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_async *async = s->async; struct comedi_cmd *cmd = &async->cmd; u32 val; int ret; int i; if ((urb->actual_length > 0) && (urb->status != -EXDEV)) { devpriv->ai_counter--; if (devpriv->ai_counter == 0) { devpriv->ai_counter = devpriv->ai_timer; /* * Get the data from the USB bus and hand it over * to comedi. Note, first byte is the DIO state. */ for (i = 0; i < cmd->chanlist_len; i++) { val = be32_to_cpu(devpriv->in_buf[i + 1]); val &= 0x00ffffff; /* strip status byte */ val = comedi_offset_munge(s, val); if (!comedi_buf_write_samples(s, &val, 1)) return; } if (cmd->stop_src == TRIG_COUNT && async->scans_done >= cmd->stop_arg) async->events |= COMEDI_CB_EOA; } } /* if command is still running, resubmit urb */ if (!(async->events & COMEDI_CB_CANCEL_MASK)) { urb->dev = comedi_to_usb_dev(dev); ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret < 0) { dev_err(dev->class_dev, "urb resubmit failed (%d)\n", ret); if (ret == -EL2NSYNC) dev_err(dev->class_dev, "buggy USB host controller or bug in IRQ handler\n"); async->events |= COMEDI_CB_ERROR; } } } static void usbduxsigma_ai_urb_complete(struct urb *urb) { struct comedi_device *dev = urb->context; struct usbduxsigma_private *devpriv = dev->private; struct comedi_subdevice *s = dev->read_subdev; struct comedi_async *async = s->async; /* exit if not running a command, do not resubmit urb */ if (!devpriv->ai_cmd_running) return; switch (urb->status) { case 0: /* copy the result in the transfer buffer */ memcpy(devpriv->in_buf, urb->transfer_buffer, SIZEINBUF); usbduxsigma_ai_handle_urb(dev, s, urb); break; case -EILSEQ: /* * error in the ISOchronous data * we don't copy the data into the transfer buffer * and recycle the last data byte */ dev_dbg(dev->class_dev, "CRC error in ISO IN stream\n"); usbduxsigma_ai_handle_urb(dev, s, urb); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -ECONNABORTED: /* happens after an unlink command */ async->events |= COMEDI_CB_ERROR; break; default: /* a real error */ dev_err(dev->class_dev, "non-zero urb status (%d)\n", urb->status); async->events |= COMEDI_CB_ERROR; break; } /* * comedi_handle_events() cannot be used in this driver. The (*cancel) * operation would unlink the urb. */ if (async->events & COMEDI_CB_CANCEL_MASK) usbduxsigma_ai_stop(dev, 0); comedi_event(dev, s); } static void usbduxsigma_ao_stop(struct comedi_device *dev, int do_unlink) { struct usbduxsigma_private *devpriv = dev->private; if (do_unlink && devpriv->ao_urbs) usbduxsigma_unlink_urbs(devpriv->ao_urbs, devpriv->n_ao_urbs); devpriv->ao_cmd_running = 0; } static int usbduxsigma_ao_cancel(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; mutex_lock(&devpriv->mut); /* unlink only if it is really running */ usbduxsigma_ao_stop(dev, devpriv->ao_cmd_running); mutex_unlock(&devpriv->mut); return 0; } static void usbduxsigma_ao_handle_urb(struct comedi_device *dev, struct comedi_subdevice *s, struct urb *urb) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_async *async = s->async; struct comedi_cmd *cmd = &async->cmd; u8 *datap; int ret; int i; devpriv->ao_counter--; if (devpriv->ao_counter == 0) { devpriv->ao_counter = devpriv->ao_timer; if (cmd->stop_src == TRIG_COUNT && async->scans_done >= cmd->stop_arg) { async->events |= COMEDI_CB_EOA; return; } /* transmit data to the USB bus */ datap = urb->transfer_buffer; *datap++ = cmd->chanlist_len; for (i = 0; i < cmd->chanlist_len; i++) { unsigned int chan = CR_CHAN(cmd->chanlist[i]); unsigned short val; if (!comedi_buf_read_samples(s, &val, 1)) { dev_err(dev->class_dev, "buffer underflow\n"); async->events |= COMEDI_CB_OVERFLOW; return; } *datap++ = val; *datap++ = chan; s->readback[chan] = val; } } /* if command is still running, resubmit urb */ if (!(async->events & COMEDI_CB_CANCEL_MASK)) { urb->transfer_buffer_length = SIZEOUTBUF; urb->dev = comedi_to_usb_dev(dev); urb->status = 0; urb->interval = 1; /* (u)frames */ urb->number_of_packets = 1; urb->iso_frame_desc[0].offset = 0; urb->iso_frame_desc[0].length = SIZEOUTBUF; urb->iso_frame_desc[0].status = 0; ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret < 0) { dev_err(dev->class_dev, "urb resubmit failed (%d)\n", ret); if (ret == -EL2NSYNC) dev_err(dev->class_dev, "buggy USB host controller or bug in IRQ handler\n"); async->events |= COMEDI_CB_ERROR; } } } static void usbduxsigma_ao_urb_complete(struct urb *urb) { struct comedi_device *dev = urb->context; struct usbduxsigma_private *devpriv = dev->private; struct comedi_subdevice *s = dev->write_subdev; struct comedi_async *async = s->async; /* exit if not running a command, do not resubmit urb */ if (!devpriv->ao_cmd_running) return; switch (urb->status) { case 0: usbduxsigma_ao_handle_urb(dev, s, urb); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -ECONNABORTED: /* happens after an unlink command */ async->events |= COMEDI_CB_ERROR; break; default: /* a real error */ dev_err(dev->class_dev, "non-zero urb status (%d)\n", urb->status); async->events |= COMEDI_CB_ERROR; break; } /* * comedi_handle_events() cannot be used in this driver. The (*cancel) * operation would unlink the urb. */ if (async->events & COMEDI_CB_CANCEL_MASK) usbduxsigma_ao_stop(dev, 0); comedi_event(dev, s); } static int usbduxsigma_submit_urbs(struct comedi_device *dev, struct urb **urbs, int num_urbs, int input_urb) { struct usb_device *usb = comedi_to_usb_dev(dev); struct urb *urb; int ret; int i; /* Submit all URBs and start the transfer on the bus */ for (i = 0; i < num_urbs; i++) { urb = urbs[i]; /* in case of a resubmission after an unlink... */ if (input_urb) urb->interval = 1; urb->context = dev; urb->dev = usb; urb->status = 0; urb->transfer_flags = URB_ISO_ASAP; ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret) return ret; } return 0; } static int usbduxsigma_chans_to_interval(int num_chan) { if (num_chan <= 2) return 2; /* 4kHz */ if (num_chan <= 8) return 4; /* 2kHz */ return 8; /* 1kHz */ } static int usbduxsigma_ai_cmdtest(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_cmd *cmd) { struct usbduxsigma_private *devpriv = dev->private; int high_speed = devpriv->high_speed; int interval = usbduxsigma_chans_to_interval(cmd->chanlist_len); unsigned int tmp; int err = 0; /* Step 1 : check if triggers are trivially valid */ err |= comedi_check_trigger_src(&cmd->start_src, TRIG_NOW | TRIG_INT); err |= comedi_check_trigger_src(&cmd->scan_begin_src, TRIG_TIMER); err |= comedi_check_trigger_src(&cmd->convert_src, TRIG_NOW); err |= comedi_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT); err |= comedi_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE); if (err) return 1; /* Step 2a : make sure trigger sources are unique */ err |= comedi_check_trigger_is_unique(cmd->start_src); err |= comedi_check_trigger_is_unique(cmd->stop_src); /* Step 2b : and mutually compatible */ if (err) return 2; /* Step 3: check if arguments are trivially valid */ err |= comedi_check_trigger_arg_is(&cmd->start_arg, 0); if (high_speed) { /* * In high speed mode microframes are possible. * However, during one microframe we can roughly * sample two channels. Thus, the more channels * are in the channel list the more time we need. */ err |= comedi_check_trigger_arg_min(&cmd->scan_begin_arg, (125000 * interval)); } else { /* full speed */ /* 1kHz scans every USB frame */ err |= comedi_check_trigger_arg_min(&cmd->scan_begin_arg, 1000000); } err |= comedi_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len); if (cmd->stop_src == TRIG_COUNT) err |= comedi_check_trigger_arg_min(&cmd->stop_arg, 1); else /* TRIG_NONE */ err |= comedi_check_trigger_arg_is(&cmd->stop_arg, 0); if (err) return 3; /* Step 4: fix up any arguments */ tmp = rounddown(cmd->scan_begin_arg, high_speed ? 125000 : 1000000); err |= comedi_check_trigger_arg_is(&cmd->scan_begin_arg, tmp); if (err) return 4; return 0; } /* * creates the ADC command for the MAX1271 * range is the range value from comedi */ static void create_adc_command(unsigned int chan, u8 *muxsg0, u8 *muxsg1) { if (chan < 8) (*muxsg0) = (*muxsg0) | (1 << chan); else if (chan < 16) (*muxsg1) = (*muxsg1) | (1 << (chan - 8)); } static int usbbuxsigma_send_cmd(struct comedi_device *dev, int cmd_type) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbduxsigma_private *devpriv = dev->private; int nsent; devpriv->dux_commands[0] = cmd_type; return usb_bulk_msg(usb, usb_sndbulkpipe(usb, 1), devpriv->dux_commands, SIZEOFDUXBUFFER, &nsent, BULK_TIMEOUT); } static int usbduxsigma_receive_cmd(struct comedi_device *dev, int command) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbduxsigma_private *devpriv = dev->private; int nrec; int ret; int i; for (i = 0; i < RETRIES; i++) { ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, 8), devpriv->insn_buf, SIZEINSNBUF, &nrec, BULK_TIMEOUT); if (ret < 0) return ret; if (devpriv->insn_buf[0] == command) return 0; } /* * This is only reached if the data has been requested a * couple of times and the command was not received. */ return -EFAULT; } static int usbduxsigma_ai_inttrig(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int trig_num) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; int ret; if (trig_num != cmd->start_arg) return -EINVAL; mutex_lock(&devpriv->mut); if (!devpriv->ai_cmd_running) { devpriv->ai_cmd_running = 1; ret = usbduxsigma_submit_urbs(dev, devpriv->ai_urbs, devpriv->n_ai_urbs, 1); if (ret < 0) { devpriv->ai_cmd_running = 0; mutex_unlock(&devpriv->mut); return ret; } s->async->inttrig = NULL; } mutex_unlock(&devpriv->mut); return 1; } static int usbduxsigma_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; unsigned int len = cmd->chanlist_len; u8 muxsg0 = 0; u8 muxsg1 = 0; u8 sysred = 0; int ret; int i; mutex_lock(&devpriv->mut); if (devpriv->high_speed) { /* * every 2 channels get a time window of 125us. Thus, if we * sample all 16 channels we need 1ms. If we sample only one * channel we need only 125us */ unsigned int interval = usbduxsigma_chans_to_interval(len); devpriv->ai_interval = interval; devpriv->ai_timer = cmd->scan_begin_arg / (125000 * interval); } else { /* interval always 1ms */ devpriv->ai_interval = 1; devpriv->ai_timer = cmd->scan_begin_arg / 1000000; } for (i = 0; i < len; i++) { unsigned int chan = CR_CHAN(cmd->chanlist[i]); create_adc_command(chan, &muxsg0, &muxsg1); } devpriv->dux_commands[1] = devpriv->ai_interval; devpriv->dux_commands[2] = len; /* num channels per time step */ devpriv->dux_commands[3] = 0x12; /* CONFIG0 */ devpriv->dux_commands[4] = 0x03; /* CONFIG1: 23kHz sample, delay 0us */ devpriv->dux_commands[5] = 0x00; /* CONFIG3: diff. channels off */ devpriv->dux_commands[6] = muxsg0; devpriv->dux_commands[7] = muxsg1; devpriv->dux_commands[8] = sysred; ret = usbbuxsigma_send_cmd(dev, USBBUXSIGMA_AD_CMD); if (ret < 0) { mutex_unlock(&devpriv->mut); return ret; } devpriv->ai_counter = devpriv->ai_timer; if (cmd->start_src == TRIG_NOW) { /* enable this acquisition operation */ devpriv->ai_cmd_running = 1; ret = usbduxsigma_submit_urbs(dev, devpriv->ai_urbs, devpriv->n_ai_urbs, 1); if (ret < 0) { devpriv->ai_cmd_running = 0; mutex_unlock(&devpriv->mut); return ret; } s->async->inttrig = NULL; } else { /* TRIG_INT */ s->async->inttrig = usbduxsigma_ai_inttrig; } mutex_unlock(&devpriv->mut); return 0; } static int usbduxsigma_ai_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbduxsigma_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); u8 muxsg0 = 0; u8 muxsg1 = 0; u8 sysred = 0; int ret; int i; mutex_lock(&devpriv->mut); if (devpriv->ai_cmd_running) { mutex_unlock(&devpriv->mut); return -EBUSY; } create_adc_command(chan, &muxsg0, &muxsg1); /* Mode 0 is used to get a single conversion on demand */ devpriv->dux_commands[1] = 0x16; /* CONFIG0: chopper on */ devpriv->dux_commands[2] = 0x80; /* CONFIG1: 2kHz sampling rate */ devpriv->dux_commands[3] = 0x00; /* CONFIG3: diff. channels off */ devpriv->dux_commands[4] = muxsg0; devpriv->dux_commands[5] = muxsg1; devpriv->dux_commands[6] = sysred; /* adc commands */ ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD); if (ret < 0) { mutex_unlock(&devpriv->mut); return ret; } for (i = 0; i < insn->n; i++) { u32 val; ret = usbduxsigma_receive_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD); if (ret < 0) { mutex_unlock(&devpriv->mut); return ret; } /* 32 bits big endian from the A/D converter */ val = be32_to_cpu(get_unaligned((__be32 *)(devpriv->insn_buf + 1))); val &= 0x00ffffff; /* strip status byte */ data[i] = comedi_offset_munge(s, val); } mutex_unlock(&devpriv->mut); return insn->n; } static int usbduxsigma_ao_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbduxsigma_private *devpriv = dev->private; int ret; mutex_lock(&devpriv->mut); ret = comedi_readback_insn_read(dev, s, insn, data); mutex_unlock(&devpriv->mut); return ret; } static int usbduxsigma_ao_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbduxsigma_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); int ret; int i; mutex_lock(&devpriv->mut); if (devpriv->ao_cmd_running) { mutex_unlock(&devpriv->mut); return -EBUSY; } for (i = 0; i < insn->n; i++) { devpriv->dux_commands[1] = 1; /* num channels */ devpriv->dux_commands[2] = data[i]; /* value */ devpriv->dux_commands[3] = chan; /* channel number */ ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_DA_CMD); if (ret < 0) { mutex_unlock(&devpriv->mut); return ret; } s->readback[chan] = data[i]; } mutex_unlock(&devpriv->mut); return insn->n; } static int usbduxsigma_ao_inttrig(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int trig_num) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; int ret; if (trig_num != cmd->start_arg) return -EINVAL; mutex_lock(&devpriv->mut); if (!devpriv->ao_cmd_running) { devpriv->ao_cmd_running = 1; ret = usbduxsigma_submit_urbs(dev, devpriv->ao_urbs, devpriv->n_ao_urbs, 0); if (ret < 0) { devpriv->ao_cmd_running = 0; mutex_unlock(&devpriv->mut); return ret; } s->async->inttrig = NULL; } mutex_unlock(&devpriv->mut); return 1; } static int usbduxsigma_ao_cmdtest(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_cmd *cmd) { struct usbduxsigma_private *devpriv = dev->private; unsigned int tmp; int err = 0; /* Step 1 : check if triggers are trivially valid */ err |= comedi_check_trigger_src(&cmd->start_src, TRIG_NOW | TRIG_INT); /* * For now, always use "scan" timing with all channels updated at once * (cmd->scan_begin_src == TRIG_TIMER, cmd->convert_src == TRIG_NOW). * * In a future version, "convert" timing with channels updated * indivually may be supported in high speed mode * (cmd->scan_begin_src == TRIG_FOLLOW, cmd->convert_src == TRIG_TIMER). */ err |= comedi_check_trigger_src(&cmd->scan_begin_src, TRIG_TIMER); err |= comedi_check_trigger_src(&cmd->convert_src, TRIG_NOW); err |= comedi_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT); err |= comedi_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE); if (err) { mutex_unlock(&devpriv->mut); return 1; } /* Step 2a : make sure trigger sources are unique */ err |= comedi_check_trigger_is_unique(cmd->start_src); err |= comedi_check_trigger_is_unique(cmd->stop_src); /* Step 2b : and mutually compatible */ if (err) return 2; /* Step 3: check if arguments are trivially valid */ err |= comedi_check_trigger_arg_is(&cmd->start_arg, 0); err |= comedi_check_trigger_arg_min(&cmd->scan_begin_arg, 1000000); err |= comedi_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len); if (cmd->stop_src == TRIG_COUNT) err |= comedi_check_trigger_arg_min(&cmd->stop_arg, 1); else /* TRIG_NONE */ err |= comedi_check_trigger_arg_is(&cmd->stop_arg, 0); if (err) return 3; /* Step 4: fix up any arguments */ tmp = rounddown(cmd->scan_begin_arg, 1000000); err |= comedi_check_trigger_arg_is(&cmd->scan_begin_arg, tmp); if (err) return 4; return 0; } static int usbduxsigma_ao_cmd(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; int ret; mutex_lock(&devpriv->mut); /* * For now, only "scan" timing is supported. A future version may * support "convert" timing in high speed mode. * * Timing of the scan: every 1ms all channels updated at once. */ devpriv->ao_timer = cmd->scan_begin_arg / 1000000; devpriv->ao_counter = devpriv->ao_timer; if (cmd->start_src == TRIG_NOW) { /* enable this acquisition operation */ devpriv->ao_cmd_running = 1; ret = usbduxsigma_submit_urbs(dev, devpriv->ao_urbs, devpriv->n_ao_urbs, 0); if (ret < 0) { devpriv->ao_cmd_running = 0; mutex_unlock(&devpriv->mut); return ret; } s->async->inttrig = NULL; } else { /* TRIG_INT */ s->async->inttrig = usbduxsigma_ao_inttrig; } mutex_unlock(&devpriv->mut); return 0; } static int usbduxsigma_dio_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; ret = comedi_dio_insn_config(dev, s, insn, data, 0); if (ret) return ret; /* * We don't tell the firmware here as it would take 8 frames * to submit the information. We do it in the (*insn_bits). */ return insn->n; } static int usbduxsigma_dio_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbduxsigma_private *devpriv = dev->private; int ret; mutex_lock(&devpriv->mut); comedi_dio_update_state(s, data); /* Always update the hardware. See the (*insn_config). */ devpriv->dux_commands[1] = s->io_bits & 0xff; devpriv->dux_commands[4] = s->state & 0xff; devpriv->dux_commands[2] = (s->io_bits >> 8) & 0xff; devpriv->dux_commands[5] = (s->state >> 8) & 0xff; devpriv->dux_commands[3] = (s->io_bits >> 16) & 0xff; devpriv->dux_commands[6] = (s->state >> 16) & 0xff; ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_DIO_BITS_CMD); if (ret < 0) goto done; ret = usbduxsigma_receive_cmd(dev, USBDUXSIGMA_DIO_BITS_CMD); if (ret < 0) goto done; s->state = devpriv->insn_buf[1] | (devpriv->insn_buf[2] << 8) | (devpriv->insn_buf[3] << 16); data[1] = s->state; ret = insn->n; done: mutex_unlock(&devpriv->mut); return ret; } static void usbduxsigma_pwm_stop(struct comedi_device *dev, int do_unlink) { struct usbduxsigma_private *devpriv = dev->private; if (do_unlink) { if (devpriv->pwm_urb) usb_kill_urb(devpriv->pwm_urb); } devpriv->pwm_cmd_running = 0; } static int usbduxsigma_pwm_cancel(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; /* unlink only if it is really running */ usbduxsigma_pwm_stop(dev, devpriv->pwm_cmd_running); return usbbuxsigma_send_cmd(dev, USBDUXSIGMA_PWM_OFF_CMD); } static void usbduxsigma_pwm_urb_complete(struct urb *urb) { struct comedi_device *dev = urb->context; struct usbduxsigma_private *devpriv = dev->private; int ret; switch (urb->status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -ECONNABORTED: /* happens after an unlink command */ if (devpriv->pwm_cmd_running) usbduxsigma_pwm_stop(dev, 0); /* w/o unlink */ return; default: /* a real error */ if (devpriv->pwm_cmd_running) { dev_err(dev->class_dev, "non-zero urb status (%d)\n", urb->status); usbduxsigma_pwm_stop(dev, 0); /* w/o unlink */ } return; } if (!devpriv->pwm_cmd_running) return; urb->transfer_buffer_length = devpriv->pwm_buf_sz; urb->dev = comedi_to_usb_dev(dev); urb->status = 0; ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret < 0) { dev_err(dev->class_dev, "urb resubmit failed (%d)\n", ret); if (ret == -EL2NSYNC) dev_err(dev->class_dev, "buggy USB host controller or bug in IRQ handler\n"); usbduxsigma_pwm_stop(dev, 0); /* w/o unlink */ } } static int usbduxsigma_submit_pwm_urb(struct comedi_device *dev) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbduxsigma_private *devpriv = dev->private; struct urb *urb = devpriv->pwm_urb; /* in case of a resubmission after an unlink... */ usb_fill_bulk_urb(urb, usb, usb_sndbulkpipe(usb, 4), urb->transfer_buffer, devpriv->pwm_buf_sz, usbduxsigma_pwm_urb_complete, dev); return usb_submit_urb(urb, GFP_ATOMIC); } static int usbduxsigma_pwm_period(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int period) { struct usbduxsigma_private *devpriv = dev->private; int fx2delay; if (period < MIN_PWM_PERIOD) return -EAGAIN; fx2delay = (period / (6 * 512 * 1000 / 33)) - 6; if (fx2delay > 255) return -EAGAIN; devpriv->pwm_delay = fx2delay; devpriv->pwm_period = period; return 0; } static int usbduxsigma_pwm_start(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; int ret; if (devpriv->pwm_cmd_running) return 0; devpriv->dux_commands[1] = devpriv->pwm_delay; ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_PWM_ON_CMD); if (ret < 0) return ret; memset(devpriv->pwm_urb->transfer_buffer, 0, devpriv->pwm_buf_sz); devpriv->pwm_cmd_running = 1; ret = usbduxsigma_submit_pwm_urb(dev); if (ret < 0) { devpriv->pwm_cmd_running = 0; return ret; } return 0; } static void usbduxsigma_pwm_pattern(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int chan, unsigned int value, unsigned int sign) { struct usbduxsigma_private *devpriv = dev->private; char pwm_mask = (1 << chan); /* DIO bit for the PWM data */ char sgn_mask = (16 << chan); /* DIO bit for the sign */ char *buf = (char *)(devpriv->pwm_urb->transfer_buffer); int szbuf = devpriv->pwm_buf_sz; int i; for (i = 0; i < szbuf; i++) { char c = *buf; c &= ~pwm_mask; if (i < value) c |= pwm_mask; if (!sign) c &= ~sgn_mask; else c |= sgn_mask; *buf++ = c; } } static int usbduxsigma_pwm_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); /* * It doesn't make sense to support more than one value here * because it would just overwrite the PWM buffer. */ if (insn->n != 1) return -EINVAL; /* * The sign is set via a special INSN only, this gives us 8 bits * for normal operation, sign is 0 by default. */ usbduxsigma_pwm_pattern(dev, s, chan, data[0], 0); return insn->n; } static int usbduxsigma_pwm_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbduxsigma_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); switch (data[0]) { case INSN_CONFIG_ARM: /* * if not zero the PWM is limited to a certain time which is * not supported here */ if (data[1] != 0) return -EINVAL; return usbduxsigma_pwm_start(dev, s); case INSN_CONFIG_DISARM: return usbduxsigma_pwm_cancel(dev, s); case INSN_CONFIG_GET_PWM_STATUS: data[1] = devpriv->pwm_cmd_running; return 0; case INSN_CONFIG_PWM_SET_PERIOD: return usbduxsigma_pwm_period(dev, s, data[1]); case INSN_CONFIG_PWM_GET_PERIOD: data[1] = devpriv->pwm_period; return 0; case INSN_CONFIG_PWM_SET_H_BRIDGE: /* * data[1] = value * data[2] = sign (for a relay) */ usbduxsigma_pwm_pattern(dev, s, chan, data[1], (data[2] != 0)); return 0; case INSN_CONFIG_PWM_GET_H_BRIDGE: /* values are not kept in this driver, nothing to return */ return -EINVAL; } return -EINVAL; } static int usbduxsigma_getstatusinfo(struct comedi_device *dev, int chan) { struct comedi_subdevice *s = dev->read_subdev; struct usbduxsigma_private *devpriv = dev->private; u8 sysred; u32 val; int ret; switch (chan) { default: case 0: sysred = 0; /* ADC zero */ break; case 1: sysred = 1; /* ADC offset */ break; case 2: sysred = 4; /* VCC */ break; case 3: sysred = 8; /* temperature */ break; case 4: sysred = 16; /* gain */ break; case 5: sysred = 32; /* ref */ break; } devpriv->dux_commands[1] = 0x12; /* CONFIG0 */ devpriv->dux_commands[2] = 0x80; /* CONFIG1: 2kHz sampling rate */ devpriv->dux_commands[3] = 0x00; /* CONFIG3: diff. channels off */ devpriv->dux_commands[4] = 0; devpriv->dux_commands[5] = 0; devpriv->dux_commands[6] = sysred; ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD); if (ret < 0) return ret; ret = usbduxsigma_receive_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD); if (ret < 0) return ret; /* 32 bits big endian from the A/D converter */ val = be32_to_cpu(get_unaligned((__be32 *)(devpriv->insn_buf + 1))); val &= 0x00ffffff; /* strip status byte */ return (int)comedi_offset_munge(s, val); } static int usbduxsigma_firmware_upload(struct comedi_device *dev, const u8 *data, size_t size, unsigned long context) { struct usb_device *usb = comedi_to_usb_dev(dev); u8 *buf; u8 *tmp; int ret; if (!data) return 0; if (size > FIRMWARE_MAX_LEN) { dev_err(dev->class_dev, "firmware binary too large for FX2\n"); return -ENOMEM; } /* we generate a local buffer for the firmware */ buf = kmemdup(data, size, GFP_KERNEL); if (!buf) return -ENOMEM; /* we need a malloc'ed buffer for usb_control_msg() */ tmp = kmalloc(1, GFP_KERNEL); if (!tmp) { kfree(buf); return -ENOMEM; } /* stop the current firmware on the device */ *tmp = 1; /* 7f92 to one */ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0), USBDUXSUB_FIRMWARE, VENDOR_DIR_OUT, USBDUXSUB_CPUCS, 0x0000, tmp, 1, BULK_TIMEOUT); if (ret < 0) { dev_err(dev->class_dev, "can not stop firmware\n"); goto done; } /* upload the new firmware to the device */ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0), USBDUXSUB_FIRMWARE, VENDOR_DIR_OUT, 0, 0x0000, buf, size, BULK_TIMEOUT); if (ret < 0) { dev_err(dev->class_dev, "firmware upload failed\n"); goto done; } /* start the new firmware on the device */ *tmp = 0; /* 7f92 to zero */ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0), USBDUXSUB_FIRMWARE, VENDOR_DIR_OUT, USBDUXSUB_CPUCS, 0x0000, tmp, 1, BULK_TIMEOUT); if (ret < 0) dev_err(dev->class_dev, "can not start firmware\n"); done: kfree(tmp); kfree(buf); return ret; } static int usbduxsigma_alloc_usb_buffers(struct comedi_device *dev) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbduxsigma_private *devpriv = dev->private; struct urb *urb; int i; devpriv->dux_commands = kzalloc(SIZEOFDUXBUFFER, GFP_KERNEL); devpriv->in_buf = kzalloc(SIZEINBUF, GFP_KERNEL); devpriv->insn_buf = kzalloc(SIZEINSNBUF, GFP_KERNEL); devpriv->ai_urbs = kcalloc(devpriv->n_ai_urbs, sizeof(urb), GFP_KERNEL); devpriv->ao_urbs = kcalloc(devpriv->n_ao_urbs, sizeof(urb), GFP_KERNEL); if (!devpriv->dux_commands || !devpriv->in_buf || !devpriv->insn_buf || !devpriv->ai_urbs || !devpriv->ao_urbs) return -ENOMEM; for (i = 0; i < devpriv->n_ai_urbs; i++) { /* one frame: 1ms */ urb = usb_alloc_urb(1, GFP_KERNEL); if (!urb) return -ENOMEM; devpriv->ai_urbs[i] = urb; urb->dev = usb; /* will be filled later with a pointer to the comedi-device */ /* and ONLY then the urb should be submitted */ urb->context = NULL; urb->pipe = usb_rcvisocpipe(usb, 6); urb->transfer_flags = URB_ISO_ASAP; urb->transfer_buffer = kzalloc(SIZEINBUF, GFP_KERNEL); if (!urb->transfer_buffer) return -ENOMEM; urb->complete = usbduxsigma_ai_urb_complete; urb->number_of_packets = 1; urb->transfer_buffer_length = SIZEINBUF; urb->iso_frame_desc[0].offset = 0; urb->iso_frame_desc[0].length = SIZEINBUF; } for (i = 0; i < devpriv->n_ao_urbs; i++) { /* one frame: 1ms */ urb = usb_alloc_urb(1, GFP_KERNEL); if (!urb) return -ENOMEM; devpriv->ao_urbs[i] = urb; urb->dev = usb; /* will be filled later with a pointer to the comedi-device */ /* and ONLY then the urb should be submitted */ urb->context = NULL; urb->pipe = usb_sndisocpipe(usb, 2); urb->transfer_flags = URB_ISO_ASAP; urb->transfer_buffer = kzalloc(SIZEOUTBUF, GFP_KERNEL); if (!urb->transfer_buffer) return -ENOMEM; urb->complete = usbduxsigma_ao_urb_complete; urb->number_of_packets = 1; urb->transfer_buffer_length = SIZEOUTBUF; urb->iso_frame_desc[0].offset = 0; urb->iso_frame_desc[0].length = SIZEOUTBUF; urb->interval = 1; /* (u)frames */ } if (devpriv->pwm_buf_sz) { urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) return -ENOMEM; devpriv->pwm_urb = urb; urb->transfer_buffer = kzalloc(devpriv->pwm_buf_sz, GFP_KERNEL); if (!urb->transfer_buffer) return -ENOMEM; } return 0; } static void usbduxsigma_free_usb_buffers(struct comedi_device *dev) { struct usbduxsigma_private *devpriv = dev->private; struct urb *urb; int i; urb = devpriv->pwm_urb; if (urb) { kfree(urb->transfer_buffer); usb_free_urb(urb); } if (devpriv->ao_urbs) { for (i = 0; i < devpriv->n_ao_urbs; i++) { urb = devpriv->ao_urbs[i]; if (urb) { kfree(urb->transfer_buffer); usb_free_urb(urb); } } kfree(devpriv->ao_urbs); } if (devpriv->ai_urbs) { for (i = 0; i < devpriv->n_ai_urbs; i++) { urb = devpriv->ai_urbs[i]; if (urb) { kfree(urb->transfer_buffer); usb_free_urb(urb); } } kfree(devpriv->ai_urbs); } kfree(devpriv->insn_buf); kfree(devpriv->in_buf); kfree(devpriv->dux_commands); } static int usbduxsigma_auto_attach(struct comedi_device *dev, unsigned long context_unused) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct usb_device *usb = comedi_to_usb_dev(dev); struct usbduxsigma_private *devpriv; struct comedi_subdevice *s; int offset; int ret; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; mutex_init(&devpriv->mut); usb_set_intfdata(intf, devpriv); devpriv->high_speed = (usb->speed == USB_SPEED_HIGH); if (devpriv->high_speed) { devpriv->n_ai_urbs = NUMOFINBUFFERSHIGH; devpriv->n_ao_urbs = NUMOFOUTBUFFERSHIGH; devpriv->pwm_buf_sz = 512; } else { devpriv->n_ai_urbs = NUMOFINBUFFERSFULL; devpriv->n_ao_urbs = NUMOFOUTBUFFERSFULL; } ret = usbduxsigma_alloc_usb_buffers(dev); if (ret) return ret; /* setting to alternate setting 3: enabling iso ep and bulk ep. */ ret = usb_set_interface(usb, intf->altsetting->desc.bInterfaceNumber, 3); if (ret < 0) { dev_err(dev->class_dev, "could not set alternate setting 3 in high speed\n"); return ret; } ret = comedi_load_firmware(dev, &usb->dev, FIRMWARE, usbduxsigma_firmware_upload, 0); if (ret) return ret; ret = comedi_alloc_subdevices(dev, (devpriv->high_speed) ? 4 : 3); if (ret) return ret; /* Analog Input subdevice */ s = &dev->subdevices[0]; dev->read_subdev = s; s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | SDF_GROUND | SDF_CMD_READ | SDF_LSAMPL; s->n_chan = NUMCHANNELS; s->len_chanlist = NUMCHANNELS; s->maxdata = 0x00ffffff; s->range_table = &usbduxsigma_ai_range; s->insn_read = usbduxsigma_ai_insn_read; s->do_cmdtest = usbduxsigma_ai_cmdtest; s->do_cmd = usbduxsigma_ai_cmd; s->cancel = usbduxsigma_ai_cancel; /* Analog Output subdevice */ s = &dev->subdevices[1]; dev->write_subdev = s; s->type = COMEDI_SUBD_AO; s->subdev_flags = SDF_WRITABLE | SDF_GROUND | SDF_CMD_WRITE; s->n_chan = 4; s->len_chanlist = s->n_chan; s->maxdata = 0x00ff; s->range_table = &range_unipolar2_5; s->insn_write = usbduxsigma_ao_insn_write; s->insn_read = usbduxsigma_ao_insn_read; s->do_cmdtest = usbduxsigma_ao_cmdtest; s->do_cmd = usbduxsigma_ao_cmd; s->cancel = usbduxsigma_ao_cancel; ret = comedi_alloc_subdev_readback(s); if (ret) return ret; /* Digital I/O subdevice */ s = &dev->subdevices[2]; s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = 24; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = usbduxsigma_dio_insn_bits; s->insn_config = usbduxsigma_dio_insn_config; if (devpriv->high_speed) { /* Timer / pwm subdevice */ s = &dev->subdevices[3]; s->type = COMEDI_SUBD_PWM; s->subdev_flags = SDF_WRITABLE | SDF_PWM_HBRIDGE; s->n_chan = 8; s->maxdata = devpriv->pwm_buf_sz; s->insn_write = usbduxsigma_pwm_write; s->insn_config = usbduxsigma_pwm_config; usbduxsigma_pwm_period(dev, s, PWM_DEFAULT_PERIOD); } offset = usbduxsigma_getstatusinfo(dev, 0); if (offset < 0) { dev_err(dev->class_dev, "Communication to USBDUXSIGMA failed! Check firmware and cabling.\n"); return offset; } dev_info(dev->class_dev, "ADC_zero = %x\n", offset); return 0; } static void usbduxsigma_detach(struct comedi_device *dev) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct usbduxsigma_private *devpriv = dev->private; usb_set_intfdata(intf, NULL); if (!devpriv) return; mutex_lock(&devpriv->mut); /* force unlink all urbs */ usbduxsigma_ai_stop(dev, 1); usbduxsigma_ao_stop(dev, 1); usbduxsigma_pwm_stop(dev, 1); usbduxsigma_free_usb_buffers(dev); mutex_unlock(&devpriv->mut); mutex_destroy(&devpriv->mut); } static struct comedi_driver usbduxsigma_driver = { .driver_name = "usbduxsigma", .module = THIS_MODULE, .auto_attach = usbduxsigma_auto_attach, .detach = usbduxsigma_detach, }; static int usbduxsigma_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { return comedi_usb_auto_config(intf, &usbduxsigma_driver, 0); } static const struct usb_device_id usbduxsigma_usb_table[] = { { USB_DEVICE(0x13d8, 0x0020) }, { USB_DEVICE(0x13d8, 0x0021) }, { USB_DEVICE(0x13d8, 0x0022) }, { } }; MODULE_DEVICE_TABLE(usb, usbduxsigma_usb_table); static struct usb_driver usbduxsigma_usb_driver = { .name = "usbduxsigma", .probe = usbduxsigma_usb_probe, .disconnect = comedi_usb_auto_unconfig, .id_table = usbduxsigma_usb_table, }; module_comedi_usb_driver(usbduxsigma_driver, usbduxsigma_usb_driver); MODULE_AUTHOR("Bernd Porr, mail@berndporr.me.uk"); MODULE_DESCRIPTION("Stirling/ITL USB-DUX SIGMA -- mail@berndporr.me.uk"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FIRMWARE);
6 5 5 6 6 5 6 6 6 5 6 6 5 3 1 4 1 1 4 5 5 5 6 6 5 12 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 // SPDX-License-Identifier: GPL-2.0 #include <linux/in.h> #include <linux/inet.h> #include <linux/list.h> #include <linux/module.h> #include <linux/net.h> #include <linux/proc_fs.h> #include <linux/rculist.h> #include <linux/seq_file.h> #include <linux/socket.h> #include <net/inet_sock.h> #include <net/kcm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/tcp.h> #ifdef CONFIG_PROC_FS static struct kcm_mux *kcm_get_first(struct seq_file *seq) { struct net *net = seq_file_net(seq); struct kcm_net *knet = net_generic(net, kcm_net_id); return list_first_or_null_rcu(&knet->mux_list, struct kcm_mux, kcm_mux_list); } static struct kcm_mux *kcm_get_next(struct kcm_mux *mux) { struct kcm_net *knet = mux->knet; return list_next_or_null_rcu(&knet->mux_list, &mux->kcm_mux_list, struct kcm_mux, kcm_mux_list); } static struct kcm_mux *kcm_get_idx(struct seq_file *seq, loff_t pos) { struct net *net = seq_file_net(seq); struct kcm_net *knet = net_generic(net, kcm_net_id); struct kcm_mux *m; list_for_each_entry_rcu(m, &knet->mux_list, kcm_mux_list) { if (!pos) return m; --pos; } return NULL; } static void *kcm_seq_next(struct seq_file *seq, void *v, loff_t *pos) { void *p; if (v == SEQ_START_TOKEN) p = kcm_get_first(seq); else p = kcm_get_next(v); ++*pos; return p; } static void *kcm_seq_start(struct seq_file *seq, loff_t *pos) __acquires(rcu) { rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; else return kcm_get_idx(seq, *pos - 1); } static void kcm_seq_stop(struct seq_file *seq, void *v) __releases(rcu) { rcu_read_unlock(); } struct kcm_proc_mux_state { struct seq_net_private p; int idx; }; static void kcm_format_mux_header(struct seq_file *seq) { struct net *net = seq_file_net(seq); struct kcm_net *knet = net_generic(net, kcm_net_id); seq_printf(seq, "*** KCM statistics (%d MUX) ****\n", knet->count); seq_printf(seq, "%-14s %-10s %-16s %-10s %-16s %-8s %-8s %-8s %-8s %s", "Object", "RX-Msgs", "RX-Bytes", "TX-Msgs", "TX-Bytes", "Recv-Q", "Rmem", "Send-Q", "Smem", "Status"); /* XXX: pdsts header stuff here */ seq_puts(seq, "\n"); } static void kcm_format_sock(struct kcm_sock *kcm, struct seq_file *seq, int i, int *len) { seq_printf(seq, " kcm-%-7u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8s ", kcm->index, kcm->stats.rx_msgs, kcm->stats.rx_bytes, kcm->stats.tx_msgs, kcm->stats.tx_bytes, kcm->sk.sk_receive_queue.qlen, sk_rmem_alloc_get(&kcm->sk), kcm->sk.sk_write_queue.qlen, "-"); if (kcm->tx_psock) seq_printf(seq, "Psck-%u ", kcm->tx_psock->index); if (kcm->tx_wait) seq_puts(seq, "TxWait "); if (kcm->tx_wait_more) seq_puts(seq, "WMore "); if (kcm->rx_wait) seq_puts(seq, "RxWait "); seq_puts(seq, "\n"); } static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, int i, int *len) { seq_printf(seq, " psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ", psock->index, psock->strp.stats.msgs, psock->strp.stats.bytes, psock->stats.tx_msgs, psock->stats.tx_bytes, psock->sk->sk_receive_queue.qlen, atomic_read(&psock->sk->sk_rmem_alloc), psock->sk->sk_write_queue.qlen, refcount_read(&psock->sk->sk_wmem_alloc)); if (psock->done) seq_puts(seq, "Done "); if (psock->tx_stopped) seq_puts(seq, "TxStop "); if (psock->strp.stopped) seq_puts(seq, "RxStop "); if (psock->tx_kcm) seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index); if (!psock->strp.paused && !psock->ready_rx_msg) { if (psock->sk->sk_receive_queue.qlen) { if (psock->strp.need_bytes) seq_printf(seq, "RxWait=%u ", psock->strp.need_bytes); else seq_printf(seq, "RxWait "); } } else { if (psock->strp.paused) seq_puts(seq, "RxPause "); if (psock->ready_rx_msg) seq_puts(seq, "RdyRx "); } seq_puts(seq, "\n"); } static void kcm_format_mux(struct kcm_mux *mux, loff_t idx, struct seq_file *seq) { int i, len; struct kcm_sock *kcm; struct kcm_psock *psock; /* mux information */ seq_printf(seq, "%-6s%-8s %-10llu %-16llu %-10llu %-16llu %-8s %-8s %-8s %-8s ", "mux", "", mux->stats.rx_msgs, mux->stats.rx_bytes, mux->stats.tx_msgs, mux->stats.tx_bytes, "-", "-", "-", "-"); seq_printf(seq, "KCMs: %d, Psocks %d\n", mux->kcm_socks_cnt, mux->psocks_cnt); /* kcm sock information */ i = 0; spin_lock_bh(&mux->lock); list_for_each_entry(kcm, &mux->kcm_socks, kcm_sock_list) { kcm_format_sock(kcm, seq, i, &len); i++; } i = 0; list_for_each_entry(psock, &mux->psocks, psock_list) { kcm_format_psock(psock, seq, i, &len); i++; } spin_unlock_bh(&mux->lock); } static int kcm_seq_show(struct seq_file *seq, void *v) { struct kcm_proc_mux_state *mux_state; mux_state = seq->private; if (v == SEQ_START_TOKEN) { mux_state->idx = 0; kcm_format_mux_header(seq); } else { kcm_format_mux(v, mux_state->idx, seq); mux_state->idx++; } return 0; } static const struct seq_operations kcm_seq_ops = { .show = kcm_seq_show, .start = kcm_seq_start, .next = kcm_seq_next, .stop = kcm_seq_stop, }; static int kcm_stats_seq_show(struct seq_file *seq, void *v) { struct kcm_psock_stats psock_stats; struct kcm_mux_stats mux_stats; struct strp_aggr_stats strp_stats; struct kcm_mux *mux; struct kcm_psock *psock; struct net *net = seq->private; struct kcm_net *knet = net_generic(net, kcm_net_id); memset(&mux_stats, 0, sizeof(mux_stats)); memset(&psock_stats, 0, sizeof(psock_stats)); memset(&strp_stats, 0, sizeof(strp_stats)); mutex_lock(&knet->mutex); aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats); aggregate_psock_stats(&knet->aggregate_psock_stats, &psock_stats); aggregate_strp_stats(&knet->aggregate_strp_stats, &strp_stats); list_for_each_entry(mux, &knet->mux_list, kcm_mux_list) { spin_lock_bh(&mux->lock); aggregate_mux_stats(&mux->stats, &mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, &psock_stats); aggregate_strp_stats(&mux->aggregate_strp_stats, &strp_stats); list_for_each_entry(psock, &mux->psocks, psock_list) { aggregate_psock_stats(&psock->stats, &psock_stats); save_strp_stats(&psock->strp, &strp_stats); } spin_unlock_bh(&mux->lock); } mutex_unlock(&knet->mutex); seq_printf(seq, "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s\n", "MUX", "RX-Msgs", "RX-Bytes", "TX-Msgs", "TX-Bytes", "TX-Retries", "Attach", "Unattach", "UnattchRsvd", "RX-RdyDrops"); seq_printf(seq, "%-8s %-10llu %-16llu %-10llu %-16llu %-10u %-10u %-10u %-10u %-10u\n", "", mux_stats.rx_msgs, mux_stats.rx_bytes, mux_stats.tx_msgs, mux_stats.tx_bytes, mux_stats.tx_retries, mux_stats.psock_attach, mux_stats.psock_unattach_rsvd, mux_stats.psock_unattach, mux_stats.rx_ready_drops); seq_printf(seq, "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", "Psock", "RX-Msgs", "RX-Bytes", "TX-Msgs", "TX-Bytes", "Reserved", "Unreserved", "RX-Aborts", "RX-Intr", "RX-Unrecov", "RX-MemFail", "RX-NeedMor", "RX-BadLen", "RX-TooBig", "RX-Timeout", "TX-Aborts"); seq_printf(seq, "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", "", strp_stats.msgs, strp_stats.bytes, psock_stats.tx_msgs, psock_stats.tx_bytes, psock_stats.reserved, psock_stats.unreserved, strp_stats.aborts, strp_stats.interrupted, strp_stats.unrecov_intr, strp_stats.mem_fail, strp_stats.need_more_hdr, strp_stats.bad_hdr_len, strp_stats.msg_too_big, strp_stats.msg_timeouts, psock_stats.tx_aborts); return 0; } static int kcm_proc_init_net(struct net *net) { if (!proc_create_net_single("kcm_stats", 0444, net->proc_net, kcm_stats_seq_show, NULL)) goto out_kcm_stats; if (!proc_create_net("kcm", 0444, net->proc_net, &kcm_seq_ops, sizeof(struct kcm_proc_mux_state))) goto out_kcm; return 0; out_kcm: remove_proc_entry("kcm_stats", net->proc_net); out_kcm_stats: return -ENOMEM; } static void kcm_proc_exit_net(struct net *net) { remove_proc_entry("kcm", net->proc_net); remove_proc_entry("kcm_stats", net->proc_net); } static struct pernet_operations kcm_net_ops = { .init = kcm_proc_init_net, .exit = kcm_proc_exit_net, }; int __init kcm_proc_init(void) { return register_pernet_subsys(&kcm_net_ops); } void __exit kcm_proc_exit(void) { unregister_pernet_subsys(&kcm_net_ops); } #endif /* CONFIG_PROC_FS */
13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_connmark.c netfilter connmark retriever action * skb mark is over-written * * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org> */ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/pkt_cls.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <uapi/linux/tc_act/tc_connmark.h> #include <net/tc_act/tc_connmark.h> #include <net/tc_wrapper.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> static struct tc_action_ops act_connmark_ops; TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { const struct nf_conntrack_tuple_hash *thash; struct nf_conntrack_tuple tuple; enum ip_conntrack_info ctinfo; struct tcf_connmark_info *ca = to_connmark(a); struct tcf_connmark_parms *parms; struct nf_conntrack_zone zone; struct nf_conn *c; int proto; tcf_lastuse_update(&ca->tcf_tm); tcf_action_update_bstats(&ca->common, skb); parms = rcu_dereference_bh(ca->parms); switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): if (skb->len < sizeof(struct iphdr)) goto out; proto = NFPROTO_IPV4; break; case htons(ETH_P_IPV6): if (skb->len < sizeof(struct ipv6hdr)) goto out; proto = NFPROTO_IPV6; break; default: goto out; } c = nf_ct_get(skb, &ctinfo); if (c) { skb->mark = READ_ONCE(c->mark); goto count; } if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, parms->net, &tuple)) goto out; zone.id = parms->zone; zone.dir = NF_CT_DEFAULT_ZONE_DIR; thash = nf_conntrack_find_get(parms->net, &zone, &tuple); if (!thash) goto out; c = nf_ct_tuplehash_to_ctrack(thash); skb->mark = READ_ONCE(c->mark); nf_ct_put(c); count: /* using overlimits stats to count how many packets marked */ tcf_action_inc_overlimit_qstats(&ca->common); out: return parms->action; } static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { [TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) }, }; static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); struct tcf_connmark_parms *nparms, *oparms; struct nlattr *tb[TCA_CONNMARK_MAX + 1]; bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_chain *goto_ch = NULL; struct tcf_connmark_info *ci; struct tc_connmark *parm; int ret = 0, err; u32 index; if (!nla) return -EINVAL; ret = nla_parse_nested_deprecated(tb, TCA_CONNMARK_MAX, nla, connmark_policy, NULL); if (ret < 0) return ret; if (!tb[TCA_CONNMARK_PARMS]) return -EINVAL; nparms = kzalloc(sizeof(*nparms), GFP_KERNEL); if (!nparms) return -ENOMEM; parm = nla_data(tb[TCA_CONNMARK_PARMS]); index = parm->index; ret = tcf_idr_check_alloc(tn, &index, a, bind); if (!ret) { ret = tcf_idr_create_from_flags(tn, index, est, a, &act_connmark_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); err = ret; goto out_free; } ci = to_connmark(*a); nparms->net = net; nparms->zone = parm->zone; ret = ACT_P_CREATED; } else if (ret > 0) { ci = to_connmark(*a); if (bind) { err = ACT_P_BOUND; goto out_free; } if (!(flags & TCA_ACT_FLAGS_REPLACE)) { err = -EEXIST; goto release_idr; } nparms->net = rtnl_dereference(ci->parms)->net; nparms->zone = parm->zone; ret = 0; } else { err = ret; goto out_free; } err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; nparms->action = parm->action; spin_lock_bh(&ci->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); oparms = rcu_replace_pointer(ci->parms, nparms, lockdep_is_held(&ci->tcf_lock)); spin_unlock_bh(&ci->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); if (oparms) kfree_rcu(oparms, rcu); return ret; release_idr: tcf_idr_release(*a, bind); out_free: kfree(nparms); return err; } static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { const struct tcf_connmark_info *ci = to_connmark(a); unsigned char *b = skb_tail_pointer(skb); const struct tcf_connmark_parms *parms; struct tc_connmark opt; struct tcf_t t; memset(&opt, 0, sizeof(opt)); opt.index = ci->tcf_index; opt.refcnt = refcount_read(&ci->tcf_refcnt) - ref; opt.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind; rcu_read_lock(); parms = rcu_dereference(ci->parms); opt.action = parms->action; opt.zone = parms->zone; if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt)) goto nla_put_failure; tcf_tm_dump(&t, &ci->tcf_tm); if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t, TCA_CONNMARK_PAD)) goto nla_put_failure; rcu_read_unlock(); return skb->len; nla_put_failure: rcu_read_unlock(); nlmsg_trim(skb, b); return -1; } static void tcf_connmark_cleanup(struct tc_action *a) { struct tcf_connmark_info *ci = to_connmark(a); struct tcf_connmark_parms *parms; parms = rcu_dereference_protected(ci->parms, 1); if (parms) kfree_rcu(parms, rcu); } static struct tc_action_ops act_connmark_ops = { .kind = "connmark", .id = TCA_ID_CONNMARK, .owner = THIS_MODULE, .act = tcf_connmark_act, .dump = tcf_connmark_dump, .init = tcf_connmark_init, .cleanup = tcf_connmark_cleanup, .size = sizeof(struct tcf_connmark_info), }; MODULE_ALIAS_NET_ACT("connmark"); static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); return tc_action_net_init(net, tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_connmark_ops.net_id); } static struct pernet_operations connmark_net_ops = { .init = connmark_init_net, .exit_batch = connmark_exit_net, .id = &act_connmark_ops.net_id, .size = sizeof(struct tc_action_net), }; static int __init connmark_init_module(void) { return tcf_register_action(&act_connmark_ops, &connmark_net_ops); } static void __exit connmark_cleanup_module(void) { tcf_unregister_action(&act_connmark_ops, &connmark_net_ops); } module_init(connmark_init_module); module_exit(connmark_cleanup_module); MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>"); MODULE_DESCRIPTION("Connection tracking mark restoring"); MODULE_LICENSE("GPL");
3 3 41 59 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM vmscan #if !defined(_TRACE_VMSCAN_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_VMSCAN_H #include <linux/types.h> #include <linux/tracepoint.h> #include <linux/mm.h> #include <linux/memcontrol.h> #include <trace/events/mmflags.h> #define RECLAIM_WB_ANON 0x0001u #define RECLAIM_WB_FILE 0x0002u #define RECLAIM_WB_MIXED 0x0010u #define RECLAIM_WB_SYNC 0x0004u /* Unused, all reclaim async */ #define RECLAIM_WB_ASYNC 0x0008u #define RECLAIM_WB_LRU (RECLAIM_WB_ANON|RECLAIM_WB_FILE) #define show_reclaim_flags(flags) \ (flags) ? __print_flags(flags, "|", \ {RECLAIM_WB_ANON, "RECLAIM_WB_ANON"}, \ {RECLAIM_WB_FILE, "RECLAIM_WB_FILE"}, \ {RECLAIM_WB_MIXED, "RECLAIM_WB_MIXED"}, \ {RECLAIM_WB_SYNC, "RECLAIM_WB_SYNC"}, \ {RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \ ) : "RECLAIM_WB_NONE" #define _VMSCAN_THROTTLE_WRITEBACK (1 << VMSCAN_THROTTLE_WRITEBACK) #define _VMSCAN_THROTTLE_ISOLATED (1 << VMSCAN_THROTTLE_ISOLATED) #define _VMSCAN_THROTTLE_NOPROGRESS (1 << VMSCAN_THROTTLE_NOPROGRESS) #define _VMSCAN_THROTTLE_CONGESTED (1 << VMSCAN_THROTTLE_CONGESTED) #define show_throttle_flags(flags) \ (flags) ? __print_flags(flags, "|", \ {_VMSCAN_THROTTLE_WRITEBACK, "VMSCAN_THROTTLE_WRITEBACK"}, \ {_VMSCAN_THROTTLE_ISOLATED, "VMSCAN_THROTTLE_ISOLATED"}, \ {_VMSCAN_THROTTLE_NOPROGRESS, "VMSCAN_THROTTLE_NOPROGRESS"}, \ {_VMSCAN_THROTTLE_CONGESTED, "VMSCAN_THROTTLE_CONGESTED"} \ ) : "VMSCAN_THROTTLE_NONE" #define trace_reclaim_flags(file) ( \ (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ (RECLAIM_WB_ASYNC) \ ) TRACE_EVENT(mm_vmscan_kswapd_sleep, TP_PROTO(int nid), TP_ARGS(nid), TP_STRUCT__entry( __field( int, nid ) ), TP_fast_assign( __entry->nid = nid; ), TP_printk("nid=%d", __entry->nid) ); TRACE_EVENT(mm_vmscan_kswapd_wake, TP_PROTO(int nid, int zid, int order), TP_ARGS(nid, zid, order), TP_STRUCT__entry( __field( int, nid ) __field( int, zid ) __field( int, order ) ), TP_fast_assign( __entry->nid = nid; __entry->zid = zid; __entry->order = order; ), TP_printk("nid=%d order=%d", __entry->nid, __entry->order) ); TRACE_EVENT(mm_vmscan_wakeup_kswapd, TP_PROTO(int nid, int zid, int order, gfp_t gfp_flags), TP_ARGS(nid, zid, order, gfp_flags), TP_STRUCT__entry( __field( int, nid ) __field( int, zid ) __field( int, order ) __field( unsigned long, gfp_flags ) ), TP_fast_assign( __entry->nid = nid; __entry->zid = zid; __entry->order = order; __entry->gfp_flags = (__force unsigned long)gfp_flags; ), TP_printk("nid=%d order=%d gfp_flags=%s", __entry->nid, __entry->order, show_gfp_flags(__entry->gfp_flags)) ); DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags), TP_STRUCT__entry( __field( int, order ) __field( unsigned long, gfp_flags ) ), TP_fast_assign( __entry->order = order; __entry->gfp_flags = (__force unsigned long)gfp_flags; ), TP_printk("order=%d gfp_flags=%s", __entry->order, show_gfp_flags(__entry->gfp_flags)) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_begin, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags) ); #ifdef CONFIG_MEMCG DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin, TP_PROTO(int order, gfp_t gfp_flags), TP_ARGS(order, gfp_flags) ); #endif /* CONFIG_MEMCG */ DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed), TP_STRUCT__entry( __field( unsigned long, nr_reclaimed ) ), TP_fast_assign( __entry->nr_reclaimed = nr_reclaimed; ), TP_printk("nr_reclaimed=%lu", __entry->nr_reclaimed) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_direct_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); #ifdef CONFIG_MEMCG DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); #endif /* CONFIG_MEMCG */ TRACE_EVENT(mm_shrink_slab_start, TP_PROTO(struct shrinker *shr, struct shrink_control *sc, long nr_objects_to_shrink, unsigned long cache_items, unsigned long long delta, unsigned long total_scan, int priority), TP_ARGS(shr, sc, nr_objects_to_shrink, cache_items, delta, total_scan, priority), TP_STRUCT__entry( __field(struct shrinker *, shr) __field(void *, shrink) __field(int, nid) __field(long, nr_objects_to_shrink) __field(unsigned long, gfp_flags) __field(unsigned long, cache_items) __field(unsigned long long, delta) __field(unsigned long, total_scan) __field(int, priority) ), TP_fast_assign( __entry->shr = shr; __entry->shrink = shr->scan_objects; __entry->nid = sc->nid; __entry->nr_objects_to_shrink = nr_objects_to_shrink; __entry->gfp_flags = (__force unsigned long)sc->gfp_mask; __entry->cache_items = cache_items; __entry->delta = delta; __entry->total_scan = total_scan; __entry->priority = priority; ), TP_printk("%pS %p: nid: %d objects to shrink %ld gfp_flags %s cache items %ld delta %lld total_scan %ld priority %d", __entry->shrink, __entry->shr, __entry->nid, __entry->nr_objects_to_shrink, show_gfp_flags(__entry->gfp_flags), __entry->cache_items, __entry->delta, __entry->total_scan, __entry->priority) ); TRACE_EVENT(mm_shrink_slab_end, TP_PROTO(struct shrinker *shr, int nid, int shrinker_retval, long unused_scan_cnt, long new_scan_cnt, long total_scan), TP_ARGS(shr, nid, shrinker_retval, unused_scan_cnt, new_scan_cnt, total_scan), TP_STRUCT__entry( __field(struct shrinker *, shr) __field(int, nid) __field(void *, shrink) __field(long, unused_scan) __field(long, new_scan) __field(int, retval) __field(long, total_scan) ), TP_fast_assign( __entry->shr = shr; __entry->nid = nid; __entry->shrink = shr->scan_objects; __entry->unused_scan = unused_scan_cnt; __entry->new_scan = new_scan_cnt; __entry->retval = shrinker_retval; __entry->total_scan = total_scan; ), TP_printk("%pS %p: nid: %d unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d", __entry->shrink, __entry->shr, __entry->nid, __entry->unused_scan, __entry->new_scan, __entry->total_scan, __entry->retval) ); TRACE_EVENT(mm_vmscan_lru_isolate, TP_PROTO(int highest_zoneidx, int order, unsigned long nr_requested, unsigned long nr_scanned, unsigned long nr_skipped, unsigned long nr_taken, int lru), TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, lru), TP_STRUCT__entry( __field(int, highest_zoneidx) __field(int, order) __field(unsigned long, nr_requested) __field(unsigned long, nr_scanned) __field(unsigned long, nr_skipped) __field(unsigned long, nr_taken) __field(int, lru) ), TP_fast_assign( __entry->highest_zoneidx = highest_zoneidx; __entry->order = order; __entry->nr_requested = nr_requested; __entry->nr_scanned = nr_scanned; __entry->nr_skipped = nr_skipped; __entry->nr_taken = nr_taken; __entry->lru = lru; ), /* * classzone is previous name of the highest_zoneidx. * Reason not to change it is the ABI requirement of the tracepoint. */ TP_printk("classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", __entry->highest_zoneidx, __entry->order, __entry->nr_requested, __entry->nr_scanned, __entry->nr_skipped, __entry->nr_taken, __print_symbolic(__entry->lru, LRU_NAMES)) ); TRACE_EVENT(mm_vmscan_write_folio, TP_PROTO(struct folio *folio), TP_ARGS(folio), TP_STRUCT__entry( __field(unsigned long, pfn) __field(int, reclaim_flags) ), TP_fast_assign( __entry->pfn = folio_pfn(folio); __entry->reclaim_flags = trace_reclaim_flags( folio_is_file_lru(folio)); ), TP_printk("page=%p pfn=0x%lx flags=%s", pfn_to_page(__entry->pfn), __entry->pfn, show_reclaim_flags(__entry->reclaim_flags)) ); TRACE_EVENT(mm_vmscan_reclaim_pages, TP_PROTO(int nid, unsigned long nr_scanned, unsigned long nr_reclaimed, struct reclaim_stat *stat), TP_ARGS(nid, nr_scanned, nr_reclaimed, stat), TP_STRUCT__entry( __field(int, nid) __field(unsigned long, nr_scanned) __field(unsigned long, nr_reclaimed) __field(unsigned long, nr_dirty) __field(unsigned long, nr_writeback) __field(unsigned long, nr_congested) __field(unsigned long, nr_immediate) __field(unsigned int, nr_activate0) __field(unsigned int, nr_activate1) __field(unsigned long, nr_ref_keep) __field(unsigned long, nr_unmap_fail) ), TP_fast_assign( __entry->nid = nid; __entry->nr_scanned = nr_scanned; __entry->nr_reclaimed = nr_reclaimed; __entry->nr_dirty = stat->nr_dirty; __entry->nr_writeback = stat->nr_writeback; __entry->nr_congested = stat->nr_congested; __entry->nr_immediate = stat->nr_immediate; __entry->nr_activate0 = stat->nr_activate[0]; __entry->nr_activate1 = stat->nr_activate[1]; __entry->nr_ref_keep = stat->nr_ref_keep; __entry->nr_unmap_fail = stat->nr_unmap_fail; ), TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld", __entry->nid, __entry->nr_scanned, __entry->nr_reclaimed, __entry->nr_dirty, __entry->nr_writeback, __entry->nr_congested, __entry->nr_immediate, __entry->nr_activate0, __entry->nr_activate1, __entry->nr_ref_keep, __entry->nr_unmap_fail) ); TRACE_EVENT(mm_vmscan_lru_shrink_inactive, TP_PROTO(int nid, unsigned long nr_scanned, unsigned long nr_reclaimed, struct reclaim_stat *stat, int priority, int file), TP_ARGS(nid, nr_scanned, nr_reclaimed, stat, priority, file), TP_STRUCT__entry( __field(int, nid) __field(unsigned long, nr_scanned) __field(unsigned long, nr_reclaimed) __field(unsigned long, nr_dirty) __field(unsigned long, nr_writeback) __field(unsigned long, nr_congested) __field(unsigned long, nr_immediate) __field(unsigned int, nr_activate0) __field(unsigned int, nr_activate1) __field(unsigned long, nr_ref_keep) __field(unsigned long, nr_unmap_fail) __field(int, priority) __field(int, reclaim_flags) ), TP_fast_assign( __entry->nid = nid; __entry->nr_scanned = nr_scanned; __entry->nr_reclaimed = nr_reclaimed; __entry->nr_dirty = stat->nr_dirty; __entry->nr_writeback = stat->nr_writeback; __entry->nr_congested = stat->nr_congested; __entry->nr_immediate = stat->nr_immediate; __entry->nr_activate0 = stat->nr_activate[0]; __entry->nr_activate1 = stat->nr_activate[1]; __entry->nr_ref_keep = stat->nr_ref_keep; __entry->nr_unmap_fail = stat->nr_unmap_fail; __entry->priority = priority; __entry->reclaim_flags = trace_reclaim_flags(file); ), TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s", __entry->nid, __entry->nr_scanned, __entry->nr_reclaimed, __entry->nr_dirty, __entry->nr_writeback, __entry->nr_congested, __entry->nr_immediate, __entry->nr_activate0, __entry->nr_activate1, __entry->nr_ref_keep, __entry->nr_unmap_fail, __entry->priority, show_reclaim_flags(__entry->reclaim_flags)) ); TRACE_EVENT(mm_vmscan_lru_shrink_active, TP_PROTO(int nid, unsigned long nr_taken, unsigned long nr_active, unsigned long nr_deactivated, unsigned long nr_referenced, int priority, int file), TP_ARGS(nid, nr_taken, nr_active, nr_deactivated, nr_referenced, priority, file), TP_STRUCT__entry( __field(int, nid) __field(unsigned long, nr_taken) __field(unsigned long, nr_active) __field(unsigned long, nr_deactivated) __field(unsigned long, nr_referenced) __field(int, priority) __field(int, reclaim_flags) ), TP_fast_assign( __entry->nid = nid; __entry->nr_taken = nr_taken; __entry->nr_active = nr_active; __entry->nr_deactivated = nr_deactivated; __entry->nr_referenced = nr_referenced; __entry->priority = priority; __entry->reclaim_flags = trace_reclaim_flags(file); ), TP_printk("nid=%d nr_taken=%ld nr_active=%ld nr_deactivated=%ld nr_referenced=%ld priority=%d flags=%s", __entry->nid, __entry->nr_taken, __entry->nr_active, __entry->nr_deactivated, __entry->nr_referenced, __entry->priority, show_reclaim_flags(__entry->reclaim_flags)) ); TRACE_EVENT(mm_vmscan_node_reclaim_begin, TP_PROTO(int nid, int order, gfp_t gfp_flags), TP_ARGS(nid, order, gfp_flags), TP_STRUCT__entry( __field(int, nid) __field(int, order) __field(unsigned long, gfp_flags) ), TP_fast_assign( __entry->nid = nid; __entry->order = order; __entry->gfp_flags = (__force unsigned long)gfp_flags; ), TP_printk("nid=%d order=%d gfp_flags=%s", __entry->nid, __entry->order, show_gfp_flags(__entry->gfp_flags)) ); DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end, TP_PROTO(unsigned long nr_reclaimed), TP_ARGS(nr_reclaimed) ); TRACE_EVENT(mm_vmscan_throttled, TP_PROTO(int nid, int usec_timeout, int usec_delayed, int reason), TP_ARGS(nid, usec_timeout, usec_delayed, reason), TP_STRUCT__entry( __field(int, nid) __field(int, usec_timeout) __field(int, usec_delayed) __field(int, reason) ), TP_fast_assign( __entry->nid = nid; __entry->usec_timeout = usec_timeout; __entry->usec_delayed = usec_delayed; __entry->reason = 1U << reason; ), TP_printk("nid=%d usec_timeout=%d usect_delayed=%d reason=%s", __entry->nid, __entry->usec_timeout, __entry->usec_delayed, show_throttle_flags(__entry->reason)) ); #endif /* _TRACE_VMSCAN_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
78 93 74 24 12 16 32 23 35 5 28 26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 /* * videobuf2-core.h - Video Buffer 2 Core Framework * * Copyright (C) 2010 Samsung Electronics * * Author: Pawel Osciak <pawel@osciak.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation. */ #ifndef _MEDIA_VIDEOBUF2_CORE_H #define _MEDIA_VIDEOBUF2_CORE_H #include <linux/mm_types.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/dma-buf.h> #include <linux/bitops.h> #include <media/media-request.h> #include <media/frame_vector.h> #define VB2_MAX_FRAME (32) #define VB2_MAX_PLANES (8) /** * enum vb2_memory - type of memory model used to make the buffers visible * on userspace. * * @VB2_MEMORY_UNKNOWN: Buffer status is unknown or it is not used yet on * userspace. * @VB2_MEMORY_MMAP: The buffers are allocated by the Kernel and it is * memory mapped via mmap() ioctl. This model is * also used when the user is using the buffers via * read() or write() system calls. * @VB2_MEMORY_USERPTR: The buffers was allocated in userspace and it is * memory mapped via mmap() ioctl. * @VB2_MEMORY_DMABUF: The buffers are passed to userspace via DMA buffer. */ enum vb2_memory { VB2_MEMORY_UNKNOWN = 0, VB2_MEMORY_MMAP = 1, VB2_MEMORY_USERPTR = 2, VB2_MEMORY_DMABUF = 4, }; struct vb2_fileio_data; struct vb2_threadio_data; struct vb2_buffer; /** * struct vb2_mem_ops - memory handling/memory allocator operations. * @alloc: allocate video memory and, optionally, allocator private data, * return ERR_PTR() on failure or a pointer to allocator private, * per-buffer data on success; the returned private structure * will then be passed as @buf_priv argument to other ops in this * structure. The size argument to this function shall be * *page aligned*. * @put: inform the allocator that the buffer will no longer be used; * usually will result in the allocator freeing the buffer (if * no other users of this buffer are present); the @buf_priv * argument is the allocator private per-buffer structure * previously returned from the alloc callback. * @get_dmabuf: acquire userspace memory for a hardware operation; used for * DMABUF memory types. * @get_userptr: acquire userspace memory for a hardware operation; used for * USERPTR memory types; vaddr is the address passed to the * videobuf2 layer when queuing a video buffer of USERPTR type; * should return an allocator private per-buffer structure * associated with the buffer on success, ERR_PTR() on failure; * the returned private structure will then be passed as @buf_priv * argument to other ops in this structure. * @put_userptr: inform the allocator that a USERPTR buffer will no longer * be used. * @prepare: called every time the buffer is passed from userspace to the * driver, useful for cache synchronisation, optional. * @finish: called every time the buffer is passed back from the driver * to the userspace, also optional. * @attach_dmabuf: attach a shared &struct dma_buf for a hardware operation; * used for DMABUF memory types; dev is the alloc device * dbuf is the shared dma_buf; returns ERR_PTR() on failure; * allocator private per-buffer structure on success; * this needs to be used for further accesses to the buffer. * @detach_dmabuf: inform the exporter of the buffer that the current DMABUF * buffer is no longer used; the @buf_priv argument is the * allocator private per-buffer structure previously returned * from the attach_dmabuf callback. * @map_dmabuf: request for access to the dmabuf from allocator; the allocator * of dmabuf is informed that this driver is going to use the * dmabuf. * @unmap_dmabuf: releases access control to the dmabuf - allocator is notified * that this driver is done using the dmabuf for now. * @vaddr: return a kernel virtual address to a given memory buffer * associated with the passed private structure or NULL if no * such mapping exists. * @cookie: return allocator specific cookie for a given memory buffer * associated with the passed private structure or NULL if not * available. * @num_users: return the current number of users of a memory buffer; * return 1 if the videobuf2 layer (or actually the driver using * it) is the only user. * @mmap: setup a userspace mapping for a given memory buffer under * the provided virtual memory region. * * Those operations are used by the videobuf2 core to implement the memory * handling/memory allocators for each type of supported streaming I/O method. * * .. note:: * #) Required ops for USERPTR types: get_userptr, put_userptr. * * #) Required ops for MMAP types: alloc, put, num_users, mmap. * * #) Required ops for read/write access types: alloc, put, num_users, vaddr. * * #) Required ops for DMABUF types: attach_dmabuf, detach_dmabuf, * map_dmabuf, unmap_dmabuf. */ struct vb2_mem_ops { void *(*alloc)(struct vb2_buffer *vb, struct device *dev, unsigned long size); void (*put)(void *buf_priv); struct dma_buf *(*get_dmabuf)(struct vb2_buffer *vb, void *buf_priv, unsigned long flags); void *(*get_userptr)(struct vb2_buffer *vb, struct device *dev, unsigned long vaddr, unsigned long size); void (*put_userptr)(void *buf_priv); void (*prepare)(void *buf_priv); void (*finish)(void *buf_priv); void *(*attach_dmabuf)(struct vb2_buffer *vb, struct device *dev, struct dma_buf *dbuf, unsigned long size); void (*detach_dmabuf)(void *buf_priv); int (*map_dmabuf)(void *buf_priv); void (*unmap_dmabuf)(void *buf_priv); void *(*vaddr)(struct vb2_buffer *vb, void *buf_priv); void *(*cookie)(struct vb2_buffer *vb, void *buf_priv); unsigned int (*num_users)(void *buf_priv); int (*mmap)(void *buf_priv, struct vm_area_struct *vma); }; /** * struct vb2_plane - plane information. * @mem_priv: private data with this plane. * @dbuf: dma_buf - shared buffer object. * @dbuf_mapped: flag to show whether dbuf is mapped or not * @dbuf_duplicated: boolean to show whether dbuf is duplicated with a * previous plane of the buffer. * @bytesused: number of bytes occupied by data in the plane (payload). * @length: size of this plane (NOT the payload) in bytes. The maximum * valid size is MAX_UINT - PAGE_SIZE. * @min_length: minimum required size of this plane (NOT the payload) in bytes. * @length is always greater or equal to @min_length, and like * @length, it is limited to MAX_UINT - PAGE_SIZE. * @m: Union with memtype-specific data. * @m.offset: when memory in the associated struct vb2_buffer is * %VB2_MEMORY_MMAP, equals the offset from the start of * the device memory for this plane (or is a "cookie" that * should be passed to mmap() called on the video node). * @m.userptr: when memory is %VB2_MEMORY_USERPTR, a userspace pointer * pointing to this plane. * @m.fd: when memory is %VB2_MEMORY_DMABUF, a userspace file * descriptor associated with this plane. * @data_offset: offset in the plane to the start of data; usually 0, * unless there is a header in front of the data. * * Should contain enough information to be able to cover all the fields * of &struct v4l2_plane at videodev2.h. */ struct vb2_plane { void *mem_priv; struct dma_buf *dbuf; unsigned int dbuf_mapped; bool dbuf_duplicated; unsigned int bytesused; unsigned int length; unsigned int min_length; union { unsigned int offset; unsigned long userptr; int fd; } m; unsigned int data_offset; }; /** * enum vb2_io_modes - queue access methods. * @VB2_MMAP: driver supports MMAP with streaming API. * @VB2_USERPTR: driver supports USERPTR with streaming API. * @VB2_READ: driver supports read() style access. * @VB2_WRITE: driver supports write() style access. * @VB2_DMABUF: driver supports DMABUF with streaming API. */ enum vb2_io_modes { VB2_MMAP = BIT(0), VB2_USERPTR = BIT(1), VB2_READ = BIT(2), VB2_WRITE = BIT(3), VB2_DMABUF = BIT(4), }; /** * enum vb2_buffer_state - current video buffer state. * @VB2_BUF_STATE_DEQUEUED: buffer under userspace control. * @VB2_BUF_STATE_IN_REQUEST: buffer is queued in media request. * @VB2_BUF_STATE_PREPARING: buffer is being prepared in videobuf2. * @VB2_BUF_STATE_QUEUED: buffer queued in videobuf2, but not in driver. * @VB2_BUF_STATE_ACTIVE: buffer queued in driver and possibly used * in a hardware operation. * @VB2_BUF_STATE_DONE: buffer returned from driver to videobuf2, but * not yet dequeued to userspace. * @VB2_BUF_STATE_ERROR: same as above, but the operation on the buffer * has ended with an error, which will be reported * to the userspace when it is dequeued. */ enum vb2_buffer_state { VB2_BUF_STATE_DEQUEUED, VB2_BUF_STATE_IN_REQUEST, VB2_BUF_STATE_PREPARING, VB2_BUF_STATE_QUEUED, VB2_BUF_STATE_ACTIVE, VB2_BUF_STATE_DONE, VB2_BUF_STATE_ERROR, }; struct vb2_queue; /** * struct vb2_buffer - represents a video buffer. * @vb2_queue: pointer to &struct vb2_queue with the queue to * which this driver belongs. * @index: id number of the buffer. * @type: buffer type. * @memory: the method, in which the actual data is passed. * @num_planes: number of planes in the buffer * on an internal driver queue. * @timestamp: frame timestamp in ns. * @request: the request this buffer is associated with. * @req_obj: used to bind this buffer to a request. This * request object has a refcount. */ struct vb2_buffer { struct vb2_queue *vb2_queue; unsigned int index; unsigned int type; unsigned int memory; unsigned int num_planes; u64 timestamp; struct media_request *request; struct media_request_object req_obj; /* private: internal use only * * state: current buffer state; do not change * synced: this buffer has been synced for DMA, i.e. the * 'prepare' memop was called. It is cleared again * after the 'finish' memop is called. * prepared: this buffer has been prepared, i.e. the * buf_prepare op was called. It is cleared again * after the 'buf_finish' op is called. * copied_timestamp: the timestamp of this capture buffer was copied * from an output buffer. * skip_cache_sync_on_prepare: when set buffer's ->prepare() function * skips cache sync/invalidation. * skip_cache_sync_on_finish: when set buffer's ->finish() function * skips cache sync/invalidation. * planes: per-plane information; do not change * queued_entry: entry on the queued buffers list, which holds * all buffers queued from userspace * done_entry: entry on the list that stores all buffers ready * to be dequeued to userspace */ enum vb2_buffer_state state; unsigned int synced:1; unsigned int prepared:1; unsigned int copied_timestamp:1; unsigned int skip_cache_sync_on_prepare:1; unsigned int skip_cache_sync_on_finish:1; struct vb2_plane planes[VB2_MAX_PLANES]; struct list_head queued_entry; struct list_head done_entry; #ifdef CONFIG_VIDEO_ADV_DEBUG /* * Counters for how often these buffer-related ops are * called. Used to check for unbalanced ops. */ u32 cnt_mem_alloc; u32 cnt_mem_put; u32 cnt_mem_get_dmabuf; u32 cnt_mem_get_userptr; u32 cnt_mem_put_userptr; u32 cnt_mem_prepare; u32 cnt_mem_finish; u32 cnt_mem_attach_dmabuf; u32 cnt_mem_detach_dmabuf; u32 cnt_mem_map_dmabuf; u32 cnt_mem_unmap_dmabuf; u32 cnt_mem_vaddr; u32 cnt_mem_cookie; u32 cnt_mem_num_users; u32 cnt_mem_mmap; u32 cnt_buf_out_validate; u32 cnt_buf_init; u32 cnt_buf_prepare; u32 cnt_buf_finish; u32 cnt_buf_cleanup; u32 cnt_buf_queue; u32 cnt_buf_request_complete; /* This counts the number of calls to vb2_buffer_done() */ u32 cnt_buf_done; #endif }; /** * struct vb2_ops - driver-specific callbacks. * * These operations are not called from interrupt context except where * mentioned specifically. * * @queue_setup: called from VIDIOC_REQBUFS() and VIDIOC_CREATE_BUFS() * handlers before memory allocation. It can be called * twice: if the original number of requested buffers * could not be allocated, then it will be called a * second time with the actually allocated number of * buffers to verify if that is OK. * The driver should return the required number of buffers * in \*num_buffers, the required number of planes per * buffer in \*num_planes, the size of each plane should be * set in the sizes\[\] array and optional per-plane * allocator specific device in the alloc_devs\[\] array. * When called from VIDIOC_REQBUFS(), \*num_planes == 0, * the driver has to use the currently configured format to * determine the plane sizes and \*num_buffers is the total * number of buffers that are being allocated. When called * from VIDIOC_CREATE_BUFS(), \*num_planes != 0 and it * describes the requested number of planes and sizes\[\] * contains the requested plane sizes. In this case * \*num_buffers are being allocated additionally to * the buffers already allocated. If either \*num_planes * or the requested sizes are invalid callback must return %-EINVAL. * @buf_out_validate: called when the output buffer is prepared or queued * to a request; drivers can use this to validate * userspace-provided information; this is required only * for OUTPUT queues. * @buf_init: called once after allocating a buffer (in MMAP case) * or after acquiring a new USERPTR buffer; drivers may * perform additional buffer-related initialization; * initialization failure (return != 0) will prevent * queue setup from completing successfully; optional. * @buf_prepare: called every time the buffer is queued from userspace * and from the VIDIOC_PREPARE_BUF() ioctl; drivers may * perform any initialization required before each * hardware operation in this callback; drivers can * access/modify the buffer here as it is still synced for * the CPU; drivers that support VIDIOC_CREATE_BUFS() must * also validate the buffer size; if an error is returned, * the buffer will not be queued in driver; optional. * @buf_finish: called before every dequeue of the buffer back to * userspace; the buffer is synced for the CPU, so drivers * can access/modify the buffer contents; drivers may * perform any operations required before userspace * accesses the buffer; optional. The buffer state can be * one of the following: %DONE and %ERROR occur while * streaming is in progress, and the %PREPARED state occurs * when the queue has been canceled and all pending * buffers are being returned to their default %DEQUEUED * state. Typically you only have to do something if the * state is %VB2_BUF_STATE_DONE, since in all other cases * the buffer contents will be ignored anyway. * @buf_cleanup: called once before the buffer is freed; drivers may * perform any additional cleanup; optional. * @prepare_streaming: called once to prepare for 'streaming' state; this is * where validation can be done to verify everything is * okay and streaming resources can be claimed. It is * called when the VIDIOC_STREAMON ioctl is called. The * actual streaming starts when @start_streaming is called. * Optional. * @start_streaming: called once to enter 'streaming' state; the driver may * receive buffers with @buf_queue callback * before @start_streaming is called; the driver gets the * number of already queued buffers in count parameter; * driver can return an error if hardware fails, in that * case all buffers that have been already given by * the @buf_queue callback are to be returned by the driver * by calling vb2_buffer_done() with %VB2_BUF_STATE_QUEUED. * If you need a minimum number of buffers before you can * start streaming, then set * &vb2_queue->min_queued_buffers. If that is non-zero * then @start_streaming won't be called until at least * that many buffers have been queued up by userspace. * @stop_streaming: called when 'streaming' state must be disabled; driver * should stop any DMA transactions or wait until they * finish and give back all buffers it got from &buf_queue * callback by calling vb2_buffer_done() with either * %VB2_BUF_STATE_DONE or %VB2_BUF_STATE_ERROR; may use * vb2_wait_for_all_buffers() function * @unprepare_streaming:called as counterpart to @prepare_streaming; any claimed * streaming resources can be released here. It is * called when the VIDIOC_STREAMOFF ioctls is called or * when the streaming filehandle is closed. Optional. * @buf_queue: passes buffer vb to the driver; driver may start * hardware operation on this buffer; driver should give * the buffer back by calling vb2_buffer_done() function; * it is always called after calling VIDIOC_STREAMON() * ioctl; might be called before @start_streaming callback * if user pre-queued buffers before calling * VIDIOC_STREAMON(). * @buf_request_complete: a buffer that was never queued to the driver but is * associated with a queued request was canceled. * The driver will have to mark associated objects in the * request as completed; required if requests are * supported. */ struct vb2_ops { int (*queue_setup)(struct vb2_queue *q, unsigned int *num_buffers, unsigned int *num_planes, unsigned int sizes[], struct device *alloc_devs[]); int (*buf_out_validate)(struct vb2_buffer *vb); int (*buf_init)(struct vb2_buffer *vb); int (*buf_prepare)(struct vb2_buffer *vb); void (*buf_finish)(struct vb2_buffer *vb); void (*buf_cleanup)(struct vb2_buffer *vb); int (*prepare_streaming)(struct vb2_queue *q); int (*start_streaming)(struct vb2_queue *q, unsigned int count); void (*stop_streaming)(struct vb2_queue *q); void (*unprepare_streaming)(struct vb2_queue *q); void (*buf_queue)(struct vb2_buffer *vb); void (*buf_request_complete)(struct vb2_buffer *vb); }; /** * struct vb2_buf_ops - driver-specific callbacks. * * @verify_planes_array: Verify that a given user space structure contains * enough planes for the buffer. This is called * for each dequeued buffer. * @init_buffer: given a &vb2_buffer initialize the extra data after * struct vb2_buffer. * For V4L2 this is a &struct vb2_v4l2_buffer. * @fill_user_buffer: given a &vb2_buffer fill in the userspace structure. * For V4L2 this is a &struct v4l2_buffer. * @fill_vb2_buffer: given a userspace structure, fill in the &vb2_buffer. * If the userspace structure is invalid, then this op * will return an error. * @copy_timestamp: copy the timestamp from a userspace structure to * the &struct vb2_buffer. */ struct vb2_buf_ops { int (*verify_planes_array)(struct vb2_buffer *vb, const void *pb); void (*init_buffer)(struct vb2_buffer *vb); void (*fill_user_buffer)(struct vb2_buffer *vb, void *pb); int (*fill_vb2_buffer)(struct vb2_buffer *vb, struct vb2_plane *planes); void (*copy_timestamp)(struct vb2_buffer *vb, const void *pb); }; /** * struct vb2_queue - a videobuf2 queue. * * @type: private buffer type whose content is defined by the vb2-core * caller. For example, for V4L2, it should match * the types defined on &enum v4l2_buf_type. * @io_modes: supported io methods (see &enum vb2_io_modes). * @dev: device to use for the default allocation context if the driver * doesn't fill in the @alloc_devs array. * @dma_attrs: DMA attributes to use for the DMA. * @bidirectional: when this flag is set the DMA direction for the buffers of * this queue will be overridden with %DMA_BIDIRECTIONAL direction. * This is useful in cases where the hardware (firmware) writes to * a buffer which is mapped as read (%DMA_TO_DEVICE), or reads from * buffer which is mapped for write (%DMA_FROM_DEVICE) in order * to satisfy some internal hardware restrictions or adds a padding * needed by the processing algorithm. In case the DMA mapping is * not bidirectional but the hardware (firmware) trying to access * the buffer (in the opposite direction) this could lead to an * IOMMU protection faults. * @fileio_read_once: report EOF after reading the first buffer * @fileio_write_immediately: queue buffer after each write() call * @allow_zero_bytesused: allow bytesused == 0 to be passed to the driver * @quirk_poll_must_check_waiting_for_buffers: Return %EPOLLERR at poll when QBUF * has not been called. This is a vb1 idiom that has been adopted * also by vb2. * @supports_requests: this queue supports the Request API. * @requires_requests: this queue requires the Request API. If this is set to 1, * then supports_requests must be set to 1 as well. * @uses_qbuf: qbuf was used directly for this queue. Set to 1 the first * time this is called. Set to 0 when the queue is canceled. * If this is 1, then you cannot queue buffers from a request. * @uses_requests: requests are used for this queue. Set to 1 the first time * a request is queued. Set to 0 when the queue is canceled. * If this is 1, then you cannot queue buffers directly. * @allow_cache_hints: when set user-space can pass cache management hints in * order to skip cache flush/invalidation on ->prepare() or/and * ->finish(). * @non_coherent_mem: when set queue will attempt to allocate buffers using * non-coherent memory. * @lock: pointer to a mutex that protects the &struct vb2_queue. The * driver must set this to a mutex to let the v4l2 core serialize * the queuing ioctls. This lock is used when waiting for a new * buffer to arrive: the lock is released, we wait for the new * buffer, and then retaken. * @owner: The filehandle that 'owns' the buffers, i.e. the filehandle * that called reqbufs, create_buffers or started fileio. * This field is not used by the videobuf2 core API, but it allows * drivers to easily associate an owner filehandle with the queue. * @ops: driver-specific callbacks * @mem_ops: memory allocator specific callbacks * @buf_ops: callbacks to deliver buffer information. * between user-space and kernel-space. * @drv_priv: driver private data. * @subsystem_flags: Flags specific to the subsystem (V4L2/DVB/etc.). Not used * by the vb2 core. * @buf_struct_size: size of the driver-specific buffer structure; * "0" indicates the driver doesn't want to use a custom buffer * structure type. In that case a subsystem-specific struct * will be used (in the case of V4L2 that is * ``sizeof(struct vb2_v4l2_buffer)``). The first field of the * driver-specific buffer structure must be the subsystem-specific * struct (vb2_v4l2_buffer in the case of V4L2). * @timestamp_flags: Timestamp flags; ``V4L2_BUF_FLAG_TIMESTAMP_*`` and * ``V4L2_BUF_FLAG_TSTAMP_SRC_*`` * @gfp_flags: additional gfp flags used when allocating the buffers. * Typically this is 0, but it may be e.g. %GFP_DMA or %__GFP_DMA32 * to force the buffer allocation to a specific memory zone. * @min_queued_buffers: the minimum number of queued buffers needed before * @start_streaming can be called. Used when a DMA engine * cannot be started unless at least this number of buffers * have been queued into the driver. * VIDIOC_REQBUFS will ensure at least @min_queued_buffers + 1 * buffers will be allocated. Note that VIDIOC_CREATE_BUFS will not * modify the requested buffer count. * @min_reqbufs_allocation: the minimum number of buffers to be allocated when * calling VIDIOC_REQBUFS. Note that VIDIOC_CREATE_BUFS will *not* * modify the requested buffer count and does not use this field. * Drivers can set this if there has to be a certain number of * buffers available for the hardware to work effectively. * This allows calling VIDIOC_REQBUFS with a buffer count of 1 and * it will be automatically adjusted to a workable buffer count. * If set, then @min_reqbufs_allocation must be larger than * @min_queued_buffers + 1. * If this field is > 3, then it is highly recommended that the * driver implements the V4L2_CID_MIN_BUFFERS_FOR_CAPTURE/OUTPUT * control. * @alloc_devs: &struct device memory type/allocator-specific per-plane device */ /* * Private elements (won't appear at the uAPI book): * @mmap_lock: private mutex used when buffers are allocated/freed/mmapped * @memory: current memory type used * @dma_dir: DMA mapping direction. * @bufs: videobuf2 buffer structures. If it is non-NULL then * bufs_bitmap is also non-NULL. * @bufs_bitmap: bitmap tracking whether each bufs[] entry is used * @max_num_buffers: upper limit of number of allocated/used buffers. * If set to 0 v4l2 core will change it VB2_MAX_FRAME * for backward compatibility. * @queued_list: list of buffers currently queued from userspace * @queued_count: number of buffers queued and ready for streaming. * @owned_by_drv_count: number of buffers owned by the driver * @done_list: list of buffers ready to be dequeued to userspace * @done_lock: lock to protect done_list list * @done_wq: waitqueue for processes waiting for buffers ready to be dequeued * @streaming: current streaming state * @start_streaming_called: @start_streaming was called successfully and we * started streaming. * @error: a fatal error occurred on the queue * @waiting_for_buffers: used in poll() to check if vb2 is still waiting for * buffers. Only set for capture queues if qbuf has not yet been * called since poll() needs to return %EPOLLERR in that situation. * @waiting_in_dqbuf: set by the core for the duration of a blocking DQBUF, when * it has to wait for a buffer to become available with vb2_queue->lock * released. Used to prevent destroying the queue by other threads. * @is_multiplanar: set if buffer type is multiplanar * @is_output: set if buffer type is output * @is_busy: set if at least one buffer has been allocated at some time. * @copy_timestamp: set if vb2-core should set timestamps * @last_buffer_dequeued: used in poll() and DQBUF to immediately return if the * last decoded buffer was already dequeued. Set for capture queues * when a buffer with the %V4L2_BUF_FLAG_LAST is dequeued. * @fileio: file io emulator internal data, used only if emulator is active * @threadio: thread io internal data, used only if thread is active * @name: queue name, used for logging purpose. Initialized automatically * if left empty by drivers. */ struct vb2_queue { unsigned int type; unsigned int io_modes; struct device *dev; unsigned long dma_attrs; unsigned int bidirectional:1; unsigned int fileio_read_once:1; unsigned int fileio_write_immediately:1; unsigned int allow_zero_bytesused:1; unsigned int quirk_poll_must_check_waiting_for_buffers:1; unsigned int supports_requests:1; unsigned int requires_requests:1; unsigned int uses_qbuf:1; unsigned int uses_requests:1; unsigned int allow_cache_hints:1; unsigned int non_coherent_mem:1; struct mutex *lock; void *owner; const struct vb2_ops *ops; const struct vb2_mem_ops *mem_ops; const struct vb2_buf_ops *buf_ops; void *drv_priv; u32 subsystem_flags; unsigned int buf_struct_size; u32 timestamp_flags; gfp_t gfp_flags; u32 min_queued_buffers; u32 min_reqbufs_allocation; struct device *alloc_devs[VB2_MAX_PLANES]; /* private: internal use only */ struct mutex mmap_lock; unsigned int memory; enum dma_data_direction dma_dir; struct vb2_buffer **bufs; unsigned long *bufs_bitmap; unsigned int max_num_buffers; struct list_head queued_list; unsigned int queued_count; atomic_t owned_by_drv_count; struct list_head done_list; spinlock_t done_lock; wait_queue_head_t done_wq; unsigned int streaming:1; unsigned int start_streaming_called:1; unsigned int error:1; unsigned int waiting_for_buffers:1; unsigned int waiting_in_dqbuf:1; unsigned int is_multiplanar:1; unsigned int is_output:1; unsigned int is_busy:1; unsigned int copy_timestamp:1; unsigned int last_buffer_dequeued:1; struct vb2_fileio_data *fileio; struct vb2_threadio_data *threadio; char name[32]; #ifdef CONFIG_VIDEO_ADV_DEBUG /* * Counters for how often these queue-related ops are * called. Used to check for unbalanced ops. */ u32 cnt_queue_setup; u32 cnt_prepare_streaming; u32 cnt_start_streaming; u32 cnt_stop_streaming; u32 cnt_unprepare_streaming; #endif }; /** * vb2_queue_allows_cache_hints() - Return true if the queue allows cache * and memory consistency hints. * * @q: pointer to &struct vb2_queue with videobuf2 queue */ static inline bool vb2_queue_allows_cache_hints(struct vb2_queue *q) { return q->allow_cache_hints && q->memory == VB2_MEMORY_MMAP; } /** * vb2_plane_vaddr() - Return a kernel virtual address of a given plane. * @vb: pointer to &struct vb2_buffer to which the plane in * question belongs to. * @plane_no: plane number for which the address is to be returned. * * This function returns a kernel virtual address of a given plane if * such a mapping exist, NULL otherwise. */ void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no); /** * vb2_plane_cookie() - Return allocator specific cookie for the given plane. * @vb: pointer to &struct vb2_buffer to which the plane in * question belongs to. * @plane_no: plane number for which the cookie is to be returned. * * This function returns an allocator specific cookie for a given plane if * available, NULL otherwise. The allocator should provide some simple static * inline function, which would convert this cookie to the allocator specific * type that can be used directly by the driver to access the buffer. This can * be for example physical address, pointer to scatter list or IOMMU mapping. */ void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no); /** * vb2_buffer_done() - inform videobuf2 that an operation on a buffer * is finished. * @vb: pointer to &struct vb2_buffer to be used. * @state: state of the buffer, as defined by &enum vb2_buffer_state. * Either %VB2_BUF_STATE_DONE if the operation finished * successfully, %VB2_BUF_STATE_ERROR if the operation finished * with an error or %VB2_BUF_STATE_QUEUED. * * This function should be called by the driver after a hardware operation on * a buffer is finished and the buffer may be returned to userspace. The driver * cannot use this buffer anymore until it is queued back to it by videobuf * by the means of &vb2_ops->buf_queue callback. Only buffers previously queued * to the driver by &vb2_ops->buf_queue can be passed to this function. * * While streaming a buffer can only be returned in state DONE or ERROR. * The &vb2_ops->start_streaming op can also return them in case the DMA engine * cannot be started for some reason. In that case the buffers should be * returned with state QUEUED to put them back into the queue. */ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state); /** * vb2_discard_done() - discard all buffers marked as DONE. * @q: pointer to &struct vb2_queue with videobuf2 queue. * * This function is intended to be used with suspend/resume operations. It * discards all 'done' buffers as they would be too old to be requested after * resume. * * Drivers must stop the hardware and synchronize with interrupt handlers and/or * delayed works before calling this function to make sure no buffer will be * touched by the driver and/or hardware. */ void vb2_discard_done(struct vb2_queue *q); /** * vb2_wait_for_all_buffers() - wait until all buffers are given back to vb2. * @q: pointer to &struct vb2_queue with videobuf2 queue. * * This function will wait until all buffers that have been given to the driver * by &vb2_ops->buf_queue are given back to vb2 with vb2_buffer_done(). * It is intended to be called with all locks taken, for example from * &vb2_ops->stop_streaming callback. */ int vb2_wait_for_all_buffers(struct vb2_queue *q); /** * vb2_core_querybuf() - query video buffer information. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @vb: pointer to struct &vb2_buffer. * @pb: buffer struct passed from userspace. * * Videobuf2 core helper to implement VIDIOC_QUERYBUF() operation. It is called * internally by VB2 by an API-specific handler, like ``videobuf2-v4l2.h``. * * The passed buffer should have been verified. * * This function fills the relevant information for the userspace. * * Return: returns zero on success; an error code otherwise. */ void vb2_core_querybuf(struct vb2_queue *q, struct vb2_buffer *vb, void *pb); /** * vb2_core_reqbufs() - Initiate streaming. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @memory: memory type, as defined by &enum vb2_memory. * @flags: auxiliary queue/buffer management flags. Currently, the only * used flag is %V4L2_MEMORY_FLAG_NON_COHERENT. * @count: requested buffer count. * * Videobuf2 core helper to implement VIDIOC_REQBUF() operation. It is called * internally by VB2 by an API-specific handler, like ``videobuf2-v4l2.h``. * * This function: * * #) verifies streaming parameters passed from the userspace; * #) sets up the queue; * #) negotiates number of buffers and planes per buffer with the driver * to be used during streaming; * #) allocates internal buffer structures (&struct vb2_buffer), according to * the agreed parameters; * #) for MMAP memory type, allocates actual video memory, using the * memory handling/allocation routines provided during queue initialization. * * If req->count is 0, all the memory will be freed instead. * * If the queue has been allocated previously by a previous vb2_core_reqbufs() * call and the queue is not busy, memory will be reallocated. * * Return: returns zero on success; an error code otherwise. */ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, unsigned int flags, unsigned int *count); /** * vb2_core_create_bufs() - Allocate buffers and any required auxiliary structs * @q: pointer to &struct vb2_queue with videobuf2 queue. * @memory: memory type, as defined by &enum vb2_memory. * @flags: auxiliary queue/buffer management flags. * @count: requested buffer count. * @requested_planes: number of planes requested. * @requested_sizes: array with the size of the planes. * @first_index: index of the first created buffer, all allocated buffers have * indices in the range [first_index..first_index+count-1] * * Videobuf2 core helper to implement VIDIOC_CREATE_BUFS() operation. It is * called internally by VB2 by an API-specific handler, like * ``videobuf2-v4l2.h``. * * This function: * * #) verifies parameter sanity; * #) calls the &vb2_ops->queue_setup queue operation; * #) performs any necessary memory allocations. * * Return: returns zero on success; an error code otherwise. */ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, unsigned int flags, unsigned int *count, unsigned int requested_planes, const unsigned int requested_sizes[], unsigned int *first_index); /** * vb2_core_prepare_buf() - Pass ownership of a buffer from userspace * to the kernel. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @vb: pointer to struct &vb2_buffer. * @pb: buffer structure passed from userspace to * &v4l2_ioctl_ops->vidioc_prepare_buf handler in driver. * * Videobuf2 core helper to implement VIDIOC_PREPARE_BUF() operation. It is * called internally by VB2 by an API-specific handler, like * ``videobuf2-v4l2.h``. * * The passed buffer should have been verified. * * This function calls vb2_ops->buf_prepare callback in the driver * (if provided), in which driver-specific buffer initialization can * be performed. * * Return: returns zero on success; an error code otherwise. */ int vb2_core_prepare_buf(struct vb2_queue *q, struct vb2_buffer *vb, void *pb); /** * vb2_core_remove_bufs() - * @q: pointer to &struct vb2_queue with videobuf2 queue. * @start: first index of the range of buffers to remove. * @count: number of buffers to remove. * * Return: returns zero on success; an error code otherwise. */ int vb2_core_remove_bufs(struct vb2_queue *q, unsigned int start, unsigned int count); /** * vb2_core_qbuf() - Queue a buffer from userspace * * @q: pointer to &struct vb2_queue with videobuf2 queue. * @vb: pointer to struct &vb2_buffer. * @pb: buffer structure passed from userspace to * v4l2_ioctl_ops->vidioc_qbuf handler in driver * @req: pointer to &struct media_request, may be NULL. * * Videobuf2 core helper to implement VIDIOC_QBUF() operation. It is called * internally by VB2 by an API-specific handler, like ``videobuf2-v4l2.h``. * * This function: * * #) If @req is non-NULL, then the buffer will be bound to this * media request and it returns. The buffer will be prepared and * queued to the driver (i.e. the next two steps) when the request * itself is queued. * #) if necessary, calls &vb2_ops->buf_prepare callback in the driver * (if provided), in which driver-specific buffer initialization can * be performed; * #) if streaming is on, queues the buffer in driver by the means of * &vb2_ops->buf_queue callback for processing. * * Return: returns zero on success; an error code otherwise. */ int vb2_core_qbuf(struct vb2_queue *q, struct vb2_buffer *vb, void *pb, struct media_request *req); /** * vb2_core_dqbuf() - Dequeue a buffer to the userspace * @q: pointer to &struct vb2_queue with videobuf2 queue * @pindex: pointer to the buffer index. May be NULL * @pb: buffer structure passed from userspace to * v4l2_ioctl_ops->vidioc_dqbuf handler in driver. * @nonblocking: if true, this call will not sleep waiting for a buffer if no * buffers ready for dequeuing are present. Normally the driver * would be passing (file->f_flags & O_NONBLOCK) here. * * Videobuf2 core helper to implement VIDIOC_DQBUF() operation. It is called * internally by VB2 by an API-specific handler, like ``videobuf2-v4l2.h``. * * This function: * * #) calls buf_finish callback in the driver (if provided), in which * driver can perform any additional operations that may be required before * returning the buffer to userspace, such as cache sync, * #) the buffer struct members are filled with relevant information for * the userspace. * * Return: returns zero on success; an error code otherwise. */ int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb, bool nonblocking); /** * vb2_core_streamon() - Implements VB2 stream ON logic * * @q: pointer to &struct vb2_queue with videobuf2 queue * @type: type of the queue to be started. * For V4L2, this is defined by &enum v4l2_buf_type type. * * Videobuf2 core helper to implement VIDIOC_STREAMON() operation. It is called * internally by VB2 by an API-specific handler, like ``videobuf2-v4l2.h``. * * Return: returns zero on success; an error code otherwise. */ int vb2_core_streamon(struct vb2_queue *q, unsigned int type); /** * vb2_core_streamoff() - Implements VB2 stream OFF logic * * @q: pointer to &struct vb2_queue with videobuf2 queue * @type: type of the queue to be started. * For V4L2, this is defined by &enum v4l2_buf_type type. * * Videobuf2 core helper to implement VIDIOC_STREAMOFF() operation. It is * called internally by VB2 by an API-specific handler, like * ``videobuf2-v4l2.h``. * * Return: returns zero on success; an error code otherwise. */ int vb2_core_streamoff(struct vb2_queue *q, unsigned int type); /** * vb2_core_expbuf() - Export a buffer as a file descriptor. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @fd: pointer to the file descriptor associated with DMABUF * (set by driver). * @type: buffer type. * @vb: pointer to struct &vb2_buffer. * @plane: index of the plane to be exported, 0 for single plane queues * @flags: file flags for newly created file, as defined at * include/uapi/asm-generic/fcntl.h. * Currently, the only used flag is %O_CLOEXEC. * is supported, refer to manual of open syscall for more details. * * * Videobuf2 core helper to implement VIDIOC_EXPBUF() operation. It is called * internally by VB2 by an API-specific handler, like ``videobuf2-v4l2.h``. * * Return: returns zero on success; an error code otherwise. */ int vb2_core_expbuf(struct vb2_queue *q, int *fd, unsigned int type, struct vb2_buffer *vb, unsigned int plane, unsigned int flags); /** * vb2_core_queue_init() - initialize a videobuf2 queue * @q: pointer to &struct vb2_queue with videobuf2 queue. * This structure should be allocated in driver * * The &vb2_queue structure should be allocated by the driver. The driver is * responsible of clearing it's content and setting initial values for some * required entries before calling this function. * * .. note:: * * The following fields at @q should be set before calling this function: * &vb2_queue->ops, &vb2_queue->mem_ops, &vb2_queue->type. */ int vb2_core_queue_init(struct vb2_queue *q); /** * vb2_core_queue_release() - stop streaming, release the queue and free memory * @q: pointer to &struct vb2_queue with videobuf2 queue. * * This function stops streaming and performs necessary clean ups, including * freeing video buffer memory. The driver is responsible for freeing * the &struct vb2_queue itself. */ void vb2_core_queue_release(struct vb2_queue *q); /** * vb2_queue_error() - signal a fatal error on the queue * @q: pointer to &struct vb2_queue with videobuf2 queue. * * Flag that a fatal unrecoverable error has occurred and wake up all processes * waiting on the queue. Polling will now set %EPOLLERR and queuing and dequeuing * buffers will return %-EIO. * * The error flag will be cleared when canceling the queue, either from * vb2_streamoff() or vb2_queue_release(). Drivers should thus not call this * function before starting the stream, otherwise the error flag will remain set * until the queue is released when closing the device node. */ void vb2_queue_error(struct vb2_queue *q); /** * vb2_mmap() - map video buffers into application address space. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @vma: pointer to &struct vm_area_struct with the vma passed * to the mmap file operation handler in the driver. * * Should be called from mmap file operation handler of a driver. * This function maps one plane of one of the available video buffers to * userspace. To map whole video memory allocated on reqbufs, this function * has to be called once per each plane per each buffer previously allocated. * * When the userspace application calls mmap, it passes to it an offset returned * to it earlier by the means of &v4l2_ioctl_ops->vidioc_querybuf handler. * That offset acts as a "cookie", which is then used to identify the plane * to be mapped. * * This function finds a plane with a matching offset and a mapping is performed * by the means of a provided memory operation. * * The return values from this function are intended to be directly returned * from the mmap handler in driver. */ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma); #ifndef CONFIG_MMU /** * vb2_get_unmapped_area - map video buffers into application address space. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @addr: memory address. * @len: buffer size. * @pgoff: page offset. * @flags: memory flags. * * This function is used in noMMU platforms to propose address mapping * for a given buffer. It's intended to be used as a handler for the * &file_operations->get_unmapped_area operation. * * This is called by the mmap() syscall routines will call this * to get a proposed address for the mapping, when ``!CONFIG_MMU``. */ unsigned long vb2_get_unmapped_area(struct vb2_queue *q, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif /** * vb2_core_poll() - implements poll syscall() logic. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @file: &struct file argument passed to the poll * file operation handler. * @wait: &poll_table wait argument passed to the poll * file operation handler. * * This function implements poll file operation handler for a driver. * For CAPTURE queues, if a buffer is ready to be dequeued, the userspace will * be informed that the file descriptor of a video device is available for * reading. * For OUTPUT queues, if a buffer is ready to be dequeued, the file descriptor * will be reported as available for writing. * * The return values from this function are intended to be directly returned * from poll handler in driver. */ __poll_t vb2_core_poll(struct vb2_queue *q, struct file *file, poll_table *wait); /** * vb2_read() - implements read() syscall logic. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @data: pointed to target userspace buffer * @count: number of bytes to read * @ppos: file handle position tracking pointer * @nonblock: mode selector (1 means blocking calls, 0 means nonblocking) */ size_t vb2_read(struct vb2_queue *q, char __user *data, size_t count, loff_t *ppos, int nonblock); /** * vb2_write() - implements write() syscall logic. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @data: pointed to target userspace buffer * @count: number of bytes to write * @ppos: file handle position tracking pointer * @nonblock: mode selector (1 means blocking calls, 0 means nonblocking) */ size_t vb2_write(struct vb2_queue *q, const char __user *data, size_t count, loff_t *ppos, int nonblock); /** * typedef vb2_thread_fnc - callback function for use with vb2_thread. * * @vb: pointer to struct &vb2_buffer. * @priv: pointer to a private data. * * This is called whenever a buffer is dequeued in the thread. */ typedef int (*vb2_thread_fnc)(struct vb2_buffer *vb, void *priv); /** * vb2_thread_start() - start a thread for the given queue. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @fnc: &vb2_thread_fnc callback function. * @priv: priv pointer passed to the callback function. * @thread_name:the name of the thread. This will be prefixed with "vb2-". * * This starts a thread that will queue and dequeue until an error occurs * or vb2_thread_stop() is called. * * .. attention:: * * This function should not be used for anything else but the videobuf2-dvb * support. If you think you have another good use-case for this, then please * contact the linux-media mailing list first. */ int vb2_thread_start(struct vb2_queue *q, vb2_thread_fnc fnc, void *priv, const char *thread_name); /** * vb2_thread_stop() - stop the thread for the given queue. * @q: pointer to &struct vb2_queue with videobuf2 queue. */ int vb2_thread_stop(struct vb2_queue *q); /** * vb2_is_streaming() - return streaming status of the queue. * @q: pointer to &struct vb2_queue with videobuf2 queue. */ static inline bool vb2_is_streaming(struct vb2_queue *q) { return q->streaming; } /** * vb2_fileio_is_active() - return true if fileio is active. * @q: pointer to &struct vb2_queue with videobuf2 queue. * * This returns true if read() or write() is used to stream the data * as opposed to stream I/O. This is almost never an important distinction, * except in rare cases. One such case is that using read() or write() to * stream a format using %V4L2_FIELD_ALTERNATE is not allowed since there * is no way you can pass the field information of each buffer to/from * userspace. A driver that supports this field format should check for * this in the &vb2_ops->queue_setup op and reject it if this function returns * true. */ static inline bool vb2_fileio_is_active(struct vb2_queue *q) { return q->fileio; } /** * vb2_get_num_buffers() - get the number of buffer in a queue * @q: pointer to &struct vb2_queue with videobuf2 queue. */ static inline unsigned int vb2_get_num_buffers(struct vb2_queue *q) { if (q->bufs_bitmap) return bitmap_weight(q->bufs_bitmap, q->max_num_buffers); return 0; } /** * vb2_is_busy() - return busy status of the queue. * @q: pointer to &struct vb2_queue with videobuf2 queue. * * This function checks if queue has any buffers allocated. */ static inline bool vb2_is_busy(struct vb2_queue *q) { return !!q->is_busy; } /** * vb2_get_drv_priv() - return driver private data associated with the queue. * @q: pointer to &struct vb2_queue with videobuf2 queue. */ static inline void *vb2_get_drv_priv(struct vb2_queue *q) { return q->drv_priv; } /** * vb2_set_plane_payload() - set bytesused for the plane @plane_no. * @vb: pointer to &struct vb2_buffer to which the plane in * question belongs to. * @plane_no: plane number for which payload should be set. * @size: payload in bytes. */ static inline void vb2_set_plane_payload(struct vb2_buffer *vb, unsigned int plane_no, unsigned long size) { /* * size must never be larger than the buffer length, so * warn and clamp to the buffer length if that's the case. */ if (plane_no < vb->num_planes) { if (WARN_ON_ONCE(size > vb->planes[plane_no].length)) size = vb->planes[plane_no].length; vb->planes[plane_no].bytesused = size; } } /** * vb2_get_plane_payload() - get bytesused for the plane plane_no * @vb: pointer to &struct vb2_buffer to which the plane in * question belongs to. * @plane_no: plane number for which payload should be set. */ static inline unsigned long vb2_get_plane_payload(struct vb2_buffer *vb, unsigned int plane_no) { if (plane_no < vb->num_planes) return vb->planes[plane_no].bytesused; return 0; } /** * vb2_plane_size() - return plane size in bytes. * @vb: pointer to &struct vb2_buffer to which the plane in * question belongs to. * @plane_no: plane number for which size should be returned. */ static inline unsigned long vb2_plane_size(struct vb2_buffer *vb, unsigned int plane_no) { if (plane_no < vb->num_planes) return vb->planes[plane_no].length; return 0; } /** * vb2_start_streaming_called() - return streaming status of driver. * @q: pointer to &struct vb2_queue with videobuf2 queue. */ static inline bool vb2_start_streaming_called(struct vb2_queue *q) { return q->start_streaming_called; } /** * vb2_clear_last_buffer_dequeued() - clear last buffer dequeued flag of queue. * @q: pointer to &struct vb2_queue with videobuf2 queue. */ static inline void vb2_clear_last_buffer_dequeued(struct vb2_queue *q) { q->last_buffer_dequeued = false; } /** * vb2_get_buffer() - get a buffer from a queue * @q: pointer to &struct vb2_queue with videobuf2 queue. * @index: buffer index * * This function obtains a buffer from a queue, by its index. * Keep in mind that there is no refcounting involved in this * operation, so the buffer lifetime should be taken into * consideration. */ static inline struct vb2_buffer *vb2_get_buffer(struct vb2_queue *q, unsigned int index) { if (!q->bufs) return NULL; if (index >= q->max_num_buffers) return NULL; if (test_bit(index, q->bufs_bitmap)) return q->bufs[index]; return NULL; } /* * The following functions are not part of the vb2 core API, but are useful * functions for videobuf2-*. */ /** * vb2_buffer_in_use() - return true if the buffer is in use and * the queue cannot be freed (by the means of VIDIOC_REQBUFS(0)) call. * * @vb: buffer for which plane size should be returned. * @q: pointer to &struct vb2_queue with videobuf2 queue. */ bool vb2_buffer_in_use(struct vb2_queue *q, struct vb2_buffer *vb); /** * vb2_verify_memory_type() - Check whether the memory type and buffer type * passed to a buffer operation are compatible with the queue. * * @q: pointer to &struct vb2_queue with videobuf2 queue. * @memory: memory model, as defined by enum &vb2_memory. * @type: private buffer type whose content is defined by the vb2-core * caller. For example, for V4L2, it should match * the types defined on enum &v4l2_buf_type. */ int vb2_verify_memory_type(struct vb2_queue *q, enum vb2_memory memory, unsigned int type); /** * vb2_request_object_is_buffer() - return true if the object is a buffer * * @obj: the request object. */ bool vb2_request_object_is_buffer(struct media_request_object *obj); /** * vb2_request_buffer_cnt() - return the number of buffers in the request * * @req: the request. */ unsigned int vb2_request_buffer_cnt(struct media_request *req); #endif /* _MEDIA_VIDEOBUF2_CORE_H */
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 // SPDX-License-Identifier: GPL-2.0-or-later /* * Twofish for CryptoAPI * * Originally Twofish for GPG * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998 * 256-bit key length added March 20, 1999 * Some modifications to reduce the text size by Werner Koch, April, 1998 * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com> * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net> * * The original author has disclaimed all copyright interest in this * code and thus put it in the public domain. The subsequent authors * have put this under the GNU General Public License. * * This code is a "clean room" implementation, written from the paper * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available * through http://www.counterpane.com/twofish.html * * For background information on multiplication in finite fields, used for * the matrix operations in the key schedule, see the book _Contemporary * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the * Third Edition. */ #include <linux/unaligned.h> #include <crypto/algapi.h> #include <crypto/twofish.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/bitops.h> /* Macros to compute the g() function in the encryption and decryption * rounds. G1 is the straight g() function; G2 includes the 8-bit * rotation for the high 32-bit word. */ #define G1(a) \ (ctx->s[0][(a) & 0xFF]) ^ (ctx->s[1][((a) >> 8) & 0xFF]) \ ^ (ctx->s[2][((a) >> 16) & 0xFF]) ^ (ctx->s[3][(a) >> 24]) #define G2(b) \ (ctx->s[1][(b) & 0xFF]) ^ (ctx->s[2][((b) >> 8) & 0xFF]) \ ^ (ctx->s[3][((b) >> 16) & 0xFF]) ^ (ctx->s[0][(b) >> 24]) /* Encryption and decryption Feistel rounds. Each one calls the two g() * macros, does the PHT, and performs the XOR and the appropriate bit * rotations. The parameters are the round number (used to select subkeys), * and the four 32-bit chunks of the text. */ #define ENCROUND(n, a, b, c, d) \ x = G1 (a); y = G2 (b); \ x += y; y += x + ctx->k[2 * (n) + 1]; \ (c) ^= x + ctx->k[2 * (n)]; \ (c) = ror32((c), 1); \ (d) = rol32((d), 1) ^ y #define DECROUND(n, a, b, c, d) \ x = G1 (a); y = G2 (b); \ x += y; y += x; \ (d) ^= y + ctx->k[2 * (n) + 1]; \ (d) = ror32((d), 1); \ (c) = rol32((c), 1); \ (c) ^= (x + ctx->k[2 * (n)]) /* Encryption and decryption cycles; each one is simply two Feistel rounds * with the 32-bit chunks re-ordered to simulate the "swap" */ #define ENCCYCLE(n) \ ENCROUND (2 * (n), a, b, c, d); \ ENCROUND (2 * (n) + 1, c, d, a, b) #define DECCYCLE(n) \ DECROUND (2 * (n) + 1, c, d, a, b); \ DECROUND (2 * (n), a, b, c, d) /* Macros to convert the input and output bytes into 32-bit words, * and simultaneously perform the whitening step. INPACK packs word * number n into the variable named by x, using whitening subkey number m. * OUTUNPACK unpacks word number n from the variable named by x, using * whitening subkey number m. */ #define INPACK(n, x, m) \ x = get_unaligned_le32(in + (n) * 4) ^ ctx->w[m] #define OUTUNPACK(n, x, m) \ x ^= ctx->w[m]; \ put_unaligned_le32(x, out + (n) * 4) /* Encrypt one block. in and out may be the same. */ static void twofish_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct twofish_ctx *ctx = crypto_tfm_ctx(tfm); /* The four 32-bit chunks of the text. */ u32 a, b, c, d; /* Temporaries used by the round function. */ u32 x, y; /* Input whitening and packing. */ INPACK (0, a, 0); INPACK (1, b, 1); INPACK (2, c, 2); INPACK (3, d, 3); /* Encryption Feistel cycles. */ ENCCYCLE (0); ENCCYCLE (1); ENCCYCLE (2); ENCCYCLE (3); ENCCYCLE (4); ENCCYCLE (5); ENCCYCLE (6); ENCCYCLE (7); /* Output whitening and unpacking. */ OUTUNPACK (0, c, 4); OUTUNPACK (1, d, 5); OUTUNPACK (2, a, 6); OUTUNPACK (3, b, 7); } /* Decrypt one block. in and out may be the same. */ static void twofish_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct twofish_ctx *ctx = crypto_tfm_ctx(tfm); /* The four 32-bit chunks of the text. */ u32 a, b, c, d; /* Temporaries used by the round function. */ u32 x, y; /* Input whitening and packing. */ INPACK (0, c, 4); INPACK (1, d, 5); INPACK (2, a, 6); INPACK (3, b, 7); /* Encryption Feistel cycles. */ DECCYCLE (7); DECCYCLE (6); DECCYCLE (5); DECCYCLE (4); DECCYCLE (3); DECCYCLE (2); DECCYCLE (1); DECCYCLE (0); /* Output whitening and unpacking. */ OUTUNPACK (0, a, 0); OUTUNPACK (1, b, 1); OUTUNPACK (2, c, 2); OUTUNPACK (3, d, 3); } static struct crypto_alg alg = { .cra_name = "twofish", .cra_driver_name = "twofish-generic", .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = TF_BLOCK_SIZE, .cra_ctxsize = sizeof(struct twofish_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = TF_MIN_KEY_SIZE, .cia_max_keysize = TF_MAX_KEY_SIZE, .cia_setkey = twofish_setkey, .cia_encrypt = twofish_encrypt, .cia_decrypt = twofish_decrypt } } }; static int __init twofish_mod_init(void) { return crypto_register_alg(&alg); } static void __exit twofish_mod_fini(void) { crypto_unregister_alg(&alg); } module_init(twofish_mod_init); module_exit(twofish_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Twofish Cipher Algorithm"); MODULE_ALIAS_CRYPTO("twofish"); MODULE_ALIAS_CRYPTO("twofish-generic");
1 682 686 8 20 739 22 740 740 21 25 8 29 1 1 23 5 26 1 78 889 575 695 80 110 33 41 30 64 40 42 31 37 3 45 643 231 42 506 133 45 2 2 1 653 584 512 350 391 121 56 104 121 664 3 1 8 3 5 596 596 1 541 2 854 527 18 686 708 686 13 54 605 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __KVM_HOST_H #define __KVM_HOST_H #include <linux/entry-virt.h> #include <linux/types.h> #include <linux/hardirq.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/sched/stat.h> #include <linux/bug.h> #include <linux/minmax.h> #include <linux/mm.h> #include <linux/mmu_notifier.h> #include <linux/preempt.h> #include <linux/msi.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/rcupdate.h> #include <linux/ratelimit.h> #include <linux/err.h> #include <linux/irqflags.h> #include <linux/context_tracking.h> #include <linux/irqbypass.h> #include <linux/rcuwait.h> #include <linux/refcount.h> #include <linux/nospec.h> #include <linux/notifier.h> #include <linux/ftrace.h> #include <linux/hashtable.h> #include <linux/instrumentation.h> #include <linux/interval_tree.h> #include <linux/rbtree.h> #include <linux/xarray.h> #include <asm/signal.h> #include <linux/kvm.h> #include <linux/kvm_para.h> #include <linux/kvm_types.h> #include <asm/kvm_host.h> #include <linux/kvm_dirty_ring.h> #ifndef KVM_MAX_VCPU_IDS #define KVM_MAX_VCPU_IDS KVM_MAX_VCPUS #endif /* * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally * used in kvm, other bits are visible for userspace which are defined in * include/uapi/linux/kvm.h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) #define KVM_MEMSLOT_GMEM_ONLY (1UL << 17) /* * Bit 63 of the memslot generation number is an "update in-progress flag", * e.g. is temporarily set for the duration of kvm_swap_active_memslots(). * This flag effectively creates a unique generation number that is used to * mark cached memslot data, e.g. MMIO accesses, as potentially being stale, * i.e. may (or may not) have come from the previous memslots generation. * * This is necessary because the actual memslots update is not atomic with * respect to the generation number update. Updating the generation number * first would allow a vCPU to cache a spte from the old memslots using the * new generation number, and updating the generation number after switching * to the new memslots would allow cache hits using the old generation number * to reference the defunct memslots. * * This mechanism is used to prevent getting hits in KVM's caches while a * memslot update is in-progress, and to prevent cache hits *after* updating * the actual generation number against accesses that were inserted into the * cache *before* the memslots were updated. */ #define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS BIT_ULL(63) /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 #ifndef KVM_MAX_NR_ADDRESS_SPACES #define KVM_MAX_NR_ADDRESS_SPACES 1 #endif /* * For the normal pfn, the highest 12 bits should be zero, * so we can mask bit 62 ~ bit 52 to indicate the error pfn, * mask bit 63 to indicate the noslot pfn. */ #define KVM_PFN_ERR_MASK (0x7ffULL << 52) #define KVM_PFN_ERR_NOSLOT_MASK (0xfffULL << 52) #define KVM_PFN_NOSLOT (0x1ULL << 63) #define KVM_PFN_ERR_FAULT (KVM_PFN_ERR_MASK) #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1) #define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) #define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3) #define KVM_PFN_ERR_NEEDS_IO (KVM_PFN_ERR_MASK + 4) /* * error pfns indicate that the gfn is in slot but faild to * translate it to pfn on host. */ static inline bool is_error_pfn(kvm_pfn_t pfn) { return !!(pfn & KVM_PFN_ERR_MASK); } /* * KVM_PFN_ERR_SIGPENDING indicates that fetching the PFN was interrupted * by a pending signal. Note, the signal may or may not be fatal. */ static inline bool is_sigpending_pfn(kvm_pfn_t pfn) { return pfn == KVM_PFN_ERR_SIGPENDING; } /* * error_noslot pfns indicate that the gfn can not be * translated to pfn - it is not in slot or failed to * translate it to pfn. */ static inline bool is_error_noslot_pfn(kvm_pfn_t pfn) { return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK); } /* noslot pfn indicates that the gfn is not in slot. */ static inline bool is_noslot_pfn(kvm_pfn_t pfn) { return pfn == KVM_PFN_NOSLOT; } /* * architectures with KVM_HVA_ERR_BAD other than PAGE_OFFSET (e.g. s390) * provide own defines and kvm_is_error_hva */ #ifndef KVM_HVA_ERR_BAD #define KVM_HVA_ERR_BAD (PAGE_OFFSET) #define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE) static inline bool kvm_is_error_hva(unsigned long addr) { return addr >= PAGE_OFFSET; } #endif static inline bool kvm_is_error_gpa(gpa_t gpa) { return gpa == INVALID_GPA; } #define KVM_REQUEST_MASK GENMASK(7,0) #define KVM_REQUEST_NO_WAKEUP BIT(8) #define KVM_REQUEST_WAIT BIT(9) #define KVM_REQUEST_NO_ACTION BIT(10) /* * Architecture-independent vcpu->requests bit members * Bits 3-7 are reserved for more arch-independent bits. */ #define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UNBLOCK 2 #define KVM_REQ_DIRTY_RING_SOFT_FULL 3 #define KVM_REQUEST_ARCH_BASE 8 /* * KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to * OUTSIDE_GUEST_MODE. KVM_REQ_OUTSIDE_GUEST_MODE differs from a vCPU "kick" * in that it ensures the vCPU has reached OUTSIDE_GUEST_MODE before continuing * on. A kick only guarantees that the vCPU is on its way out, e.g. a previous * kick may have set vcpu->mode to EXITING_GUEST_MODE, and so there's no * guarantee the vCPU received an IPI and has actually exited guest mode. */ #define KVM_REQ_OUTSIDE_GUEST_MODE (KVM_REQUEST_NO_ACTION | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \ (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ }) #define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, unsigned long *vcpu_bitmap); bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 #define KVM_PIT_IRQ_SOURCE_ID 2 extern struct mutex kvm_lock; extern struct list_head vm_list; struct kvm_io_range { gpa_t addr; int len; struct kvm_io_device *dev; }; #define NR_IOBUS_DEVS 1000 struct kvm_io_bus { int dev_count; int ioeventfd_count; struct rcu_head rcu; struct kvm_io_range range[]; }; enum kvm_bus { KVM_MMIO_BUS, KVM_PIO_BUS, KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_FAST_MMIO_BUS, KVM_IOCSR_BUS, KVM_NR_BUSES }; int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val); int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val, long cookie); int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_device *dev); struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr); #ifdef CONFIG_KVM_ASYNC_PF struct kvm_async_pf { struct work_struct work; struct list_head link; struct list_head queue; struct kvm_vcpu *vcpu; gpa_t cr2_or_gpa; unsigned long addr; struct kvm_arch_async_pf arch; bool wakeup_all; bool notpresent_injected; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, unsigned long hva, struct kvm_arch_async_pf *arch); int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER union kvm_mmu_notifier_arg { unsigned long attributes; }; enum kvm_gfn_range_filter { KVM_FILTER_SHARED = BIT(0), KVM_FILTER_PRIVATE = BIT(1), }; struct kvm_gfn_range { struct kvm_memory_slot *slot; gfn_t start; gfn_t end; union kvm_mmu_notifier_arg arg; enum kvm_gfn_range_filter attr_filter; bool may_block; bool lockless; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); #endif enum { OUTSIDE_GUEST_MODE, IN_GUEST_MODE, EXITING_GUEST_MODE, READING_SHADOW_PAGE_TABLES, }; struct kvm_host_map { /* * Only valid if the 'pfn' is managed by the host kernel (i.e. There is * a 'struct page' for it. When using mem= kernel parameter some memory * can be used as guest memory but they are not managed by host * kernel). */ struct page *pinned_page; struct page *page; void *hva; kvm_pfn_t pfn; kvm_pfn_t gfn; bool writable; }; /* * Used to check if the mapping is valid or not. Never use 'kvm_host_map' * directly to check for that. */ static inline bool kvm_vcpu_mapped(struct kvm_host_map *map) { return !!map->hva; } static inline bool kvm_vcpu_can_poll(ktime_t cur, ktime_t stop) { return single_task_running() && !need_resched() && ktime_before(cur, stop); } /* * Sometimes a large or cross-page mmio needs to be broken up into separate * exits for userspace servicing. */ struct kvm_mmio_fragment { gpa_t gpa; void *data; unsigned len; }; struct kvm_vcpu { struct kvm *kvm; #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; #endif int cpu; int vcpu_id; /* id given by userspace at creation */ int vcpu_idx; /* index into kvm->vcpu_array */ int ____srcu_idx; /* Don't use this directly. You've been warned. */ #ifdef CONFIG_PROVE_RCU int srcu_depth; #endif int mode; u64 requests; unsigned long guest_debug; struct mutex mutex; struct kvm_run *run; #ifndef __KVM_HAVE_ARCH_WQP struct rcuwait wait; #endif struct pid *pid; rwlock_t pid_lock; int sigset_active; sigset_t sigset; unsigned int halt_poll_ns; bool valid_wakeup; #ifdef CONFIG_HAS_IOMEM int mmio_needed; int mmio_read_completed; int mmio_is_write; int mmio_cur_fragment; int mmio_nr_fragments; struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS]; #endif #ifdef CONFIG_KVM_ASYNC_PF struct { u32 queued; struct list_head queue; struct list_head done; spinlock_t lock; } async_pf; #endif #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT /* * Cpu relax intercept or pause loop exit optimization * in_spin_loop: set when a vcpu does a pause loop exit * or cpu relax intercepted. * dy_eligible: indicates whether vcpu is eligible for directed yield. */ struct { bool in_spin_loop; bool dy_eligible; } spin_loop; #endif bool wants_to_run; bool preempted; bool ready; bool scheduled_out; struct kvm_vcpu_arch arch; struct kvm_vcpu_stat stat; char stats_id[KVM_STATS_NAME_SIZE]; struct kvm_dirty_ring dirty_ring; /* * The most recently used memslot by this vCPU and the slots generation * for which it is valid. * No wraparound protection is needed since generations won't overflow in * thousands of years, even assuming 1M memslot operations per second. */ struct kvm_memory_slot *last_used_slot; u64 last_used_slot_gen; }; /* * Start accounting time towards a guest. * Must be called before entering guest context. */ static __always_inline void guest_timing_enter_irqoff(void) { /* * This is running in ioctl context so its safe to assume that it's the * stime pending cputime to flush. */ instrumentation_begin(); vtime_account_guest_enter(); instrumentation_end(); } /* * Enter guest context and enter an RCU extended quiescent state. * * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is * unsafe to use any code which may directly or indirectly use RCU, tracing * (including IRQ flag tracing), or lockdep. All code in this period must be * non-instrumentable. */ static __always_inline void guest_context_enter_irqoff(void) { /* * KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspace from rcu point of view. In * addition CPU may stay in a guest mode for quite a long time (up to * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. */ if (!context_tracking_guest_enter()) { instrumentation_begin(); rcu_virt_note_context_switch(); instrumentation_end(); } } /* * Deprecated. Architectures should move to guest_timing_enter_irqoff() and * guest_state_enter_irqoff(). */ static __always_inline void guest_enter_irqoff(void) { guest_timing_enter_irqoff(); guest_context_enter_irqoff(); } /** * guest_state_enter_irqoff - Fixup state when entering a guest * * Entry to a guest will enable interrupts, but the kernel state is interrupts * disabled when this is invoked. Also tell RCU about it. * * 1) Trace interrupts on state * 2) Invoke context tracking if enabled to adjust RCU state * 3) Tell lockdep that interrupts are enabled * * Invoked from architecture specific code before entering a guest. * Must be called with interrupts disabled and the caller must be * non-instrumentable. * The caller has to invoke guest_timing_enter_irqoff() before this. * * Note: this is analogous to exit_to_user_mode(). */ static __always_inline void guest_state_enter_irqoff(void) { instrumentation_begin(); trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(); instrumentation_end(); guest_context_enter_irqoff(); lockdep_hardirqs_on(CALLER_ADDR0); } /* * Exit guest context and exit an RCU extended quiescent state. * * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is * unsafe to use any code which may directly or indirectly use RCU, tracing * (including IRQ flag tracing), or lockdep. All code in this period must be * non-instrumentable. */ static __always_inline void guest_context_exit_irqoff(void) { /* * Guest mode is treated as a quiescent state, see * guest_context_enter_irqoff() for more details. */ if (!context_tracking_guest_exit()) { instrumentation_begin(); rcu_virt_note_context_switch(); instrumentation_end(); } } /* * Stop accounting time towards a guest. * Must be called after exiting guest context. */ static __always_inline void guest_timing_exit_irqoff(void) { instrumentation_begin(); /* Flush the guest cputime we spent on the guest */ vtime_account_guest_exit(); instrumentation_end(); } /* * Deprecated. Architectures should move to guest_state_exit_irqoff() and * guest_timing_exit_irqoff(). */ static __always_inline void guest_exit_irqoff(void) { guest_context_exit_irqoff(); guest_timing_exit_irqoff(); } static inline void guest_exit(void) { unsigned long flags; local_irq_save(flags); guest_exit_irqoff(); local_irq_restore(flags); } /** * guest_state_exit_irqoff - Establish state when returning from guest mode * * Entry from a guest disables interrupts, but guest mode is traced as * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. * * 1) Tell lockdep that interrupts are disabled * 2) Invoke context tracking if enabled to reactivate RCU * 3) Trace interrupts off state * * Invoked from architecture specific code after exiting a guest. * Must be invoked with interrupts disabled and the caller must be * non-instrumentable. * The caller has to invoke guest_timing_exit_irqoff() after this. * * Note: this is analogous to enter_from_user_mode(). */ static __always_inline void guest_state_exit_irqoff(void) { lockdep_hardirqs_off(CALLER_ADDR0); guest_context_exit_irqoff(); instrumentation_begin(); trace_hardirqs_off_finish(); instrumentation_end(); } static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { /* * The memory barrier ensures a previous write to vcpu->requests cannot * be reordered with the read of vcpu->mode. It pairs with the general * memory barrier following the write of vcpu->mode in VCPU RUN. */ smp_mb__before_atomic(); return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE); } /* * Some of the bitops functions do not support too long bitmaps. * This number must be determined not to exceed such limits. */ #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) /* * Since at idle each memslot belongs to two memslot sets it has to contain * two embedded nodes for each data structure that it forms a part of. * * Two memslot sets (one active and one inactive) are necessary so the VM * continues to run on one memslot set while the other is being modified. * * These two memslot sets normally point to the same set of memslots. * They can, however, be desynchronized when performing a memslot management * operation by replacing the memslot to be modified by its copy. * After the operation is complete, both memslot sets once again point to * the same, common set of memslot data. * * The memslots themselves are independent of each other so they can be * individually added or deleted. */ struct kvm_memory_slot { struct hlist_node id_node[2]; struct interval_tree_node hva_node[2]; struct rb_node gfn_node[2]; gfn_t base_gfn; unsigned long npages; unsigned long *dirty_bitmap; struct kvm_arch_memory_slot arch; unsigned long userspace_addr; u32 flags; short id; u16 as_id; #ifdef CONFIG_KVM_GUEST_MEMFD struct { /* * Writes protected by kvm->slots_lock. Acquiring a * reference via kvm_gmem_get_file() is protected by * either kvm->slots_lock or kvm->srcu. */ struct file *file; pgoff_t pgoff; } gmem; #endif }; static inline bool kvm_slot_has_gmem(const struct kvm_memory_slot *slot) { return slot && (slot->flags & KVM_MEM_GUEST_MEMFD); } static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot) { return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; } static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) { return ALIGN(memslot->npages, BITS_PER_LONG) / 8; } static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *memslot) { unsigned long len = kvm_dirty_bitmap_bytes(memslot); return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap); } #ifndef KVM_DIRTY_LOG_MANUAL_CAPS #define KVM_DIRTY_LOG_MANUAL_CAPS KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE #endif struct kvm_s390_adapter_int { u64 ind_addr; u64 summary_addr; u64 ind_offset; u32 summary_offset; u32 adapter_id; }; struct kvm_hv_sint { u32 vcpu; u32 sint; }; struct kvm_xen_evtchn { u32 port; u32 vcpu_id; int vcpu_idx; u32 priority; }; struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; int (*set)(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status); union { struct { unsigned irqchip; unsigned pin; } irqchip; struct { u32 address_lo; u32 address_hi; u32 data; u32 flags; u32 devid; } msi; struct kvm_s390_adapter_int adapter; struct kvm_hv_sint hv_sint; struct kvm_xen_evtchn xen_evtchn; }; struct hlist_node link; }; #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING struct kvm_irq_routing_table { int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; u32 nr_rt_entries; /* * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to. */ struct hlist_head map[] __counted_by(nr_rt_entries); }; #endif bool kvm_arch_irqchip_in_kernel(struct kvm *kvm); #ifndef KVM_INTERNAL_MEM_SLOTS #define KVM_INTERNAL_MEM_SLOTS 0 #endif #define KVM_MEM_SLOTS_NUM SHRT_MAX #define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS) #if KVM_MAX_NR_ADDRESS_SPACES == 1 static inline int kvm_arch_nr_memslot_as_ids(struct kvm *kvm) { return KVM_MAX_NR_ADDRESS_SPACES; } static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) { return 0; } #endif #ifndef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES static inline bool kvm_arch_has_private_mem(struct kvm *kvm) { return false; } #endif #ifdef CONFIG_KVM_GUEST_MEMFD bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm); static inline u64 kvm_gmem_get_supported_flags(struct kvm *kvm) { u64 flags = GUEST_MEMFD_FLAG_MMAP; if (!kvm || kvm_arch_supports_gmem_init_shared(kvm)) flags |= GUEST_MEMFD_FLAG_INIT_SHARED; return flags; } #endif #ifndef kvm_arch_has_readonly_mem static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) { return IS_ENABLED(CONFIG_HAVE_KVM_READONLY_MEM); } #endif struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; struct rb_root_cached hva_tree; struct rb_root gfn_tree; /* * The mapping table from slot id to memslot. * * 7-bit bucket count matches the size of the old id to index array for * 512 slots, while giving good performance with this slot count. * Higher bucket counts bring only small performance improvements but * always result in higher memory usage (even for lower memslot counts). */ DECLARE_HASHTABLE(id_hash, 7); int node_idx; }; struct kvm { #ifdef KVM_HAVE_MMU_RWLOCK rwlock_t mmu_lock; #else spinlock_t mmu_lock; #endif /* KVM_HAVE_MMU_RWLOCK */ struct mutex slots_lock; /* * Protects the arch-specific fields of struct kvm_memory_slots in * use by the VM. To be used under the slots_lock (above) or in a * kvm->srcu critical section where acquiring the slots_lock would * lead to deadlock with the synchronize_srcu in * kvm_swap_active_memslots(). */ struct mutex slots_arch_lock; struct mm_struct *mm; /* userspace tied to this vm */ unsigned long nr_memslot_pages; /* The two memslot sets - active and inactive (per address space) */ struct kvm_memslots __memslots[KVM_MAX_NR_ADDRESS_SPACES][2]; /* The current active memslot set for each address space */ struct kvm_memslots __rcu *memslots[KVM_MAX_NR_ADDRESS_SPACES]; struct xarray vcpu_array; /* * Protected by slots_lock, but can be read outside if an * incorrect answer is acceptable. */ atomic_t nr_memslots_dirty_logging; /* Used to wait for completion of MMU notifiers. */ spinlock_t mn_invalidate_lock; unsigned long mn_active_invalidate_count; struct rcuwait mn_memslots_update_rcuwait; /* For management / invalidation of gfn_to_pfn_caches */ spinlock_t gpc_lock; struct list_head gpc_list; /* * created_vcpus is protected by kvm->lock, and is incremented * at the beginning of KVM_CREATE_VCPU. online_vcpus is only * incremented after storing the kvm_vcpu pointer in vcpus, * and is accessed atomically. */ atomic_t online_vcpus; int max_vcpus; int created_vcpus; int last_boosted_vcpu; struct list_head vm_list; struct mutex lock; struct kvm_io_bus __rcu *buses[KVM_NR_BUSES]; #ifdef CONFIG_HAVE_KVM_IRQCHIP struct { spinlock_t lock; struct list_head items; /* resampler_list update side is protected by resampler_lock. */ struct list_head resampler_list; struct mutex resampler_lock; } irqfds; #endif struct list_head ioeventfds; struct kvm_vm_stat stat; struct kvm_arch arch; refcount_t users_count; #ifdef CONFIG_KVM_MMIO struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; spinlock_t ring_lock; struct list_head coalesced_zones; #endif struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP /* * Update side is protected by irq_lock. */ struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head irq_ack_notifier_list; #endif #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER struct mmu_notifier mmu_notifier; unsigned long mmu_invalidate_seq; long mmu_invalidate_in_progress; gfn_t mmu_invalidate_range_start; gfn_t mmu_invalidate_range_end; #endif struct list_head devices; u64 manual_dirty_log_protect; struct dentry *debugfs_dentry; struct kvm_stat_data **debugfs_stat_data; struct srcu_struct srcu; struct srcu_struct irq_srcu; pid_t userspace_pid; bool override_halt_poll_ns; unsigned int max_halt_poll_ns; u32 dirty_ring_size; bool dirty_ring_with_bitmap; bool vm_bugged; bool vm_dead; #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER struct notifier_block pm_notifier; #endif #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES /* Protected by slots_lock (for writes) and RCU (for reads) */ struct xarray mem_attr_array; #endif char stats_id[KVM_STATS_NAME_SIZE]; }; #define kvm_err(fmt, ...) \ pr_err("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__) #define kvm_info(fmt, ...) \ pr_info("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__) #define kvm_debug(fmt, ...) \ pr_debug("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__) #define kvm_debug_ratelimited(fmt, ...) \ pr_debug_ratelimited("kvm [%i]: " fmt, task_pid_nr(current), \ ## __VA_ARGS__) #define kvm_pr_unimpl(fmt, ...) \ pr_err_ratelimited("kvm [%i]: " fmt, \ task_tgid_nr(current), ## __VA_ARGS__) /* The guest did something we don't support. */ #define vcpu_unimpl(vcpu, fmt, ...) \ kvm_pr_unimpl("vcpu%i, guest rIP: 0x%lx " fmt, \ (vcpu)->vcpu_id, kvm_rip_read(vcpu), ## __VA_ARGS__) #define vcpu_debug(vcpu, fmt, ...) \ kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) #define vcpu_debug_ratelimited(vcpu, fmt, ...) \ kvm_debug_ratelimited("vcpu%i " fmt, (vcpu)->vcpu_id, \ ## __VA_ARGS__) #define vcpu_err(vcpu, fmt, ...) \ kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) static inline void kvm_vm_dead(struct kvm *kvm) { kvm->vm_dead = true; kvm_make_all_cpus_request(kvm, KVM_REQ_VM_DEAD); } static inline void kvm_vm_bugged(struct kvm *kvm) { kvm->vm_bugged = true; kvm_vm_dead(kvm); } #define KVM_BUG(cond, kvm, fmt...) \ ({ \ bool __ret = !!(cond); \ \ if (WARN_ONCE(__ret && !(kvm)->vm_bugged, fmt)) \ kvm_vm_bugged(kvm); \ unlikely(__ret); \ }) #define KVM_BUG_ON(cond, kvm) \ ({ \ bool __ret = !!(cond); \ \ if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \ kvm_vm_bugged(kvm); \ unlikely(__ret); \ }) /* * Note, "data corruption" refers to corruption of host kernel data structures, * not guest data. Guest data corruption, suspected or confirmed, that is tied * and contained to a single VM should *never* BUG() and potentially panic the * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure * is corrupted and that corruption can have a cascading effect to other parts * of the hosts and/or to other VMs. */ #define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \ ({ \ bool __ret = !!(cond); \ \ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \ BUG_ON(__ret); \ else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \ kvm_vm_bugged(kvm); \ unlikely(__ret); \ }) static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PROVE_RCU WARN_ONCE(vcpu->srcu_depth++, "KVM: Illegal vCPU srcu_idx LOCK, depth=%d", vcpu->srcu_depth - 1); #endif vcpu->____srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); } static inline void kvm_vcpu_srcu_read_unlock(struct kvm_vcpu *vcpu) { srcu_read_unlock(&vcpu->kvm->srcu, vcpu->____srcu_idx); #ifdef CONFIG_PROVE_RCU WARN_ONCE(--vcpu->srcu_depth, "KVM: Illegal vCPU srcu_idx UNLOCK, depth=%d", vcpu->srcu_depth); #endif } static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm) { return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET); } /* * Get a bus reference under the update-side lock. No long-term SRCU reader * references are permitted, to avoid stale reads vs concurrent IO * registrations. */ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) { return rcu_dereference_protected(kvm->buses[idx], lockdep_is_held(&kvm->slots_lock)); } static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { int num_vcpus = atomic_read(&kvm->online_vcpus); /* * Explicitly verify the target vCPU is online, as the anti-speculation * logic only limits the CPU's ability to speculate, e.g. given a "bad" * index, clamping the index to 0 would return vCPU0, not NULL. */ if (i >= num_vcpus) return NULL; i = array_index_nospec(i, num_vcpus); /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ smp_rmb(); return xa_load(&kvm->vcpu_array, i); } #define kvm_for_each_vcpu(idx, vcpup, kvm) \ if (atomic_read(&kvm->online_vcpus)) \ xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ (atomic_read(&kvm->online_vcpus) - 1)) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { struct kvm_vcpu *vcpu = NULL; unsigned long i; if (id < 0) return NULL; if (id < KVM_MAX_VCPUS) vcpu = kvm_get_vcpu(kvm, id); if (vcpu && vcpu->vcpu_id == id) return vcpu; kvm_for_each_vcpu(i, vcpu, kvm) if (vcpu->vcpu_id == id) return vcpu; return NULL; } void kvm_destroy_vcpus(struct kvm *kvm); int kvm_trylock_all_vcpus(struct kvm *kvm); int kvm_lock_all_vcpus(struct kvm *kvm); void kvm_unlock_all_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM_IOAPIC void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); #else static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) { } #endif #ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd_init(void); void kvm_irqfd_exit(void); #else static inline int kvm_irqfd_init(void) { return 0; } static inline void kvm_irqfd_exit(void) { } #endif int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module); void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); bool kvm_get_kvm_safe(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); bool file_is_kvm(struct file *file); void kvm_put_kvm_no_destroy(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { as_id = array_index_nospec(as_id, KVM_MAX_NR_ADDRESS_SPACES); return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, lockdep_is_held(&kvm->slots_lock) || !refcount_read(&kvm->users_count)); } static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) { return __kvm_memslots(kvm, 0); } static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu) { int as_id = kvm_arch_vcpu_memslots_id(vcpu); return __kvm_memslots(vcpu->kvm, as_id); } static inline bool kvm_memslots_empty(struct kvm_memslots *slots) { return RB_EMPTY_ROOT(&slots->gfn_tree); } bool kvm_are_all_memslots_empty(struct kvm *kvm); #define kvm_for_each_memslot(memslot, bkt, slots) \ hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \ if (WARN_ON_ONCE(!memslot->npages)) { \ } else static inline struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id) { struct kvm_memory_slot *slot; int idx = slots->node_idx; hash_for_each_possible(slots->id_hash, slot, id_node[idx], id) { if (slot->id == id) return slot; } return NULL; } /* Iterator used for walking memslots that overlap a gfn range. */ struct kvm_memslot_iter { struct kvm_memslots *slots; struct rb_node *node; struct kvm_memory_slot *slot; }; static inline void kvm_memslot_iter_next(struct kvm_memslot_iter *iter) { iter->node = rb_next(iter->node); if (!iter->node) return; iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[iter->slots->node_idx]); } static inline void kvm_memslot_iter_start(struct kvm_memslot_iter *iter, struct kvm_memslots *slots, gfn_t start) { int idx = slots->node_idx; struct rb_node *tmp; struct kvm_memory_slot *slot; iter->slots = slots; /* * Find the so called "upper bound" of a key - the first node that has * its key strictly greater than the searched one (the start gfn in our case). */ iter->node = NULL; for (tmp = slots->gfn_tree.rb_node; tmp; ) { slot = container_of(tmp, struct kvm_memory_slot, gfn_node[idx]); if (start < slot->base_gfn) { iter->node = tmp; tmp = tmp->rb_left; } else { tmp = tmp->rb_right; } } /* * Find the slot with the lowest gfn that can possibly intersect with * the range, so we'll ideally have slot start <= range start */ if (iter->node) { /* * A NULL previous node means that the very first slot * already has a higher start gfn. * In this case slot start > range start. */ tmp = rb_prev(iter->node); if (tmp) iter->node = tmp; } else { /* a NULL node below means no slots */ iter->node = rb_last(&slots->gfn_tree); } if (iter->node) { iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[idx]); /* * It is possible in the slot start < range start case that the * found slot ends before or at range start (slot end <= range start) * and so it does not overlap the requested range. * * In such non-overlapping case the next slot (if it exists) will * already have slot start > range start, otherwise the logic above * would have found it instead of the current slot. */ if (iter->slot->base_gfn + iter->slot->npages <= start) kvm_memslot_iter_next(iter); } } static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_t end) { if (!iter->node) return false; /* * If this slot starts beyond or at the end of the range so does * every next one */ return iter->slot->base_gfn < end; } /* Iterate over each memslot at least partially intersecting [start, end) range */ #define kvm_for_each_memslot_in_gfn_range(iter, slots, start, end) \ for (kvm_memslot_iter_start(iter, slots, start); \ kvm_memslot_iter_is_valid(iter, end); \ kvm_memslot_iter_next(iter)) struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); /* * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: * - create a new memory slot * - delete an existing memory slot * - modify an existing memory slot * -- move it in the guest physical memory space * -- just change its flags * * Since flags can be changed by some of these operations, the following * differentiation is the best we can do for kvm_set_memory_region(): */ enum kvm_mr_change { KVM_MR_CREATE, KVM_MR_DELETE, KVM_MR_MOVE, KVM_MR_FLAGS_ONLY, }; int kvm_set_internal_memslot(struct kvm *kvm, const struct kvm_userspace_memory_region2 *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, struct kvm_memory_slot *new, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change); /* flush all memory translations */ void kvm_arch_flush_shadow_all(struct kvm *kvm); /* flush memory translations pointing to 'slot' */ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); int kvm_prefetch_pages(struct kvm_memory_slot *slot, gfn_t gfn, struct page **pages, int nr_pages); struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn, bool write); static inline struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) { return __gfn_to_page(kvm, gfn, true); } unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable); static inline void kvm_release_page_unused(struct page *page) { if (!page) return; put_page(page); } void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); static inline void kvm_release_faultin_page(struct kvm *kvm, struct page *page, bool unused, bool dirty) { lockdep_assert_once(lockdep_is_held(&kvm->mmu_lock) || unused); if (!page) return; /* * If the page that KVM got from the *primary MMU* is writable, and KVM * installed or reused a SPTE, mark the page/folio dirty. Note, this * may mark a folio dirty even if KVM created a read-only SPTE, e.g. if * the GFN is write-protected. Folios can't be safely marked dirty * outside of mmu_lock as doing so could race with writeback on the * folio. As a result, KVM can't mark folios dirty in the fast page * fault handler, and so KVM must (somewhat) speculatively mark the * folio dirty if KVM could locklessly make the SPTE writable. */ if (unused) kvm_release_page_unused(page); else if (dirty) kvm_release_page_dirty(page); else kvm_release_page_clean(page); } kvm_pfn_t __kvm_faultin_pfn(const struct kvm_memory_slot *slot, gfn_t gfn, unsigned int foll, bool *writable, struct page **refcounted_page); static inline kvm_pfn_t kvm_faultin_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool write, bool *writable, struct page **refcounted_page) { return __kvm_faultin_pfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, write ? FOLL_WRITE : 0, writable, refcounted_page); } int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len); int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned int offset, unsigned long len); int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, int offset, int len); int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, unsigned long len); int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len); int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned int offset, unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); #define __kvm_get_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \ typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ int __ret = -EFAULT; \ \ if (!kvm_is_error_hva(__addr)) \ __ret = get_user(v, __uaddr); \ __ret; \ }) #define kvm_get_guest(kvm, gpa, v) \ ({ \ gpa_t __gpa = gpa; \ struct kvm *__kvm = kvm; \ \ __kvm_get_guest(__kvm, __gpa >> PAGE_SHIFT, \ offset_in_page(__gpa), v); \ }) #define __kvm_put_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \ typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ int __ret = -EFAULT; \ \ if (!kvm_is_error_hva(__addr)) \ __ret = put_user(v, __uaddr); \ if (!__ret) \ mark_page_dirty(kvm, gfn); \ __ret; \ }) #define kvm_put_guest(kvm, gpa, v) \ ({ \ gpa_t __gpa = gpa; \ struct kvm *__kvm = kvm; \ \ __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \ offset_in_page(__gpa), v); \ }) int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map, bool writable); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map); static inline int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map) { return __kvm_vcpu_map(vcpu, gpa, map, true); } static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map) { return __kvm_vcpu_map(vcpu, gpa, map, false); } static inline void kvm_vcpu_map_mark_dirty(struct kvm_vcpu *vcpu, struct kvm_host_map *map) { if (kvm_vcpu_mapped(map)) kvm_vcpu_mark_page_dirty(vcpu, map->gfn); } unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, int len); int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len); int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len); int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data, int offset, int len); int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, unsigned long len); /** * kvm_gpc_init - initialize gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. * @kvm: pointer to kvm instance. * * This sets up a gfn_to_pfn_cache by initializing locks and assigning the * immutable attributes. Note, the cache must be zero-allocated (or zeroed by * the caller before init). */ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm); /** * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest * physical address. * * @gpc: struct gfn_to_pfn_cache object. * @gpa: guest physical address to map. * @len: sanity check; the range being access must fit a single page. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. * * This primes a gfn_to_pfn_cache and links it into the @gpc->kvm's list for * invalidations to be processed. Callers are required to use kvm_gpc_check() * to ensure that the cache is valid before accessing the target page. */ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** * kvm_gpc_activate_hva - prepare a cached kernel mapping and HPA for a given HVA. * * @gpc: struct gfn_to_pfn_cache object. * @hva: userspace virtual address to map. * @len: sanity check; the range being access must fit a single page. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. * * The semantics of this function are the same as those of kvm_gpc_activate(). It * merely bypasses a layer of address translation. */ int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long hva, unsigned long len); /** * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. * @len: sanity check; the range being access must fit a single page. * * @return: %true if the cache is still valid and the address matches. * %false if the cache is not valid. * * Callers outside IN_GUEST_MODE context should hold a read lock on @gpc->lock * while calling this function, and then continue to hold the lock until the * access is complete. * * Callers in IN_GUEST_MODE may do so without locking, although they should * still hold a read lock on kvm->scru for the memslot checks. */ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len); /** * kvm_gpc_refresh - update a previously initialized cache. * * @gpc: struct gfn_to_pfn_cache object. * @len: sanity check; the range being access must fit a single page. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. * * This will attempt to refresh a gfn_to_pfn_cache. Note that a successful * return from this function does not mean the page can be immediately * accessed because it may have raced with an invalidation. Callers must * still lock and check the cache status, as this function does not return * with the lock still held to permit access. */ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len); /** * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. * * This removes a cache from the VM's list to be processed on MMU notifier * invocation. */ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc); static inline bool kvm_gpc_is_gpa_active(struct gfn_to_pfn_cache *gpc) { return gpc->active && !kvm_is_error_gpa(gpc->gpa); } static inline bool kvm_gpc_is_hva_active(struct gfn_to_pfn_cache *gpc) { return gpc->active && kvm_is_error_gpa(gpc->gpa); } void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); void kvm_vcpu_halt(struct kvm_vcpu *vcpu); bool kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); #ifndef CONFIG_S390 void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait); static inline void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { __kvm_vcpu_kick(vcpu, false); } #endif int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, const struct kvm_memory_slot *memslot); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min); int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc); void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); #endif void kvm_mmu_invalidate_begin(struct kvm *kvm); void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end); void kvm_mmu_invalidate_end(struct kvm *kvm); bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); #ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty, struct kvm_memory_slot **memslot); #endif int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status); int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr); int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state); int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state); int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id); int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state); #endif #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry); #else static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING /* * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of * kvm_usage_count, i.e. at the beginning of the generic hardware enabling * sequence, and at the end of the generic hardware disabling sequence. */ void kvm_arch_enable_virtualization(void); void kvm_arch_disable_virtualization(void); /* * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to * do the actual twiddling of hardware bits. The hooks are called on all * online CPUs when KVM enables/disabled virtualization, and on a single CPU * when that CPU is onlined/offlined (including for Resume/Suspend). */ int kvm_arch_enable_virtualization_cpu(void); void kvm_arch_disable_virtualization_cpu(void); #endif bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); void kvm_arch_pre_destroy_vm(struct kvm *kvm); void kvm_arch_create_vm_debugfs(struct kvm *kvm); #ifndef __KVM_HAVE_ARCH_VM_ALLOC /* * All architectures that want to use vzalloc currently also * need their own kvm_arch_alloc_vm implementation. */ static inline struct kvm *kvm_arch_alloc_vm(void) { return kzalloc(sizeof(struct kvm), GFP_KERNEL_ACCOUNT); } #endif static inline void __kvm_arch_free_vm(struct kvm *kvm) { kvfree(kvm); } #ifndef __KVM_HAVE_ARCH_VM_FREE static inline void kvm_arch_free_vm(struct kvm *kvm) { __kvm_arch_free_vm(kvm); } #endif #ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { return -ENOTSUPP; } #else int kvm_arch_flush_remote_tlbs(struct kvm *kvm); #endif #ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) { return -EOPNOTSUPP; } #else int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); #endif #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA void kvm_arch_register_noncoherent_dma(struct kvm *kvm); void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); bool kvm_arch_has_noncoherent_dma(struct kvm *kvm); #else static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm) { } static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) { } static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) { return false; } #endif static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_WQP return vcpu->arch.waitp; #else return &vcpu->wait; #endif } /* * Wake a vCPU if necessary, but don't do any stats/metadata updates. Returns * true if the vCPU was blocking and was awakened, false otherwise. */ static inline bool __kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) { return !!rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu)); } static inline bool kvm_vcpu_is_blocking(struct kvm_vcpu *vcpu) { return rcuwait_active(kvm_arch_vcpu_get_wait(vcpu)); } #ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED /* * returns true if the virtual interrupt controller is initialized and * ready to accept virtual IRQ. On some architectures the virtual interrupt * controller is dynamically instantiated and this is not always true. */ bool kvm_arch_intc_initialized(struct kvm *kvm); #else static inline bool kvm_arch_intc_initialized(struct kvm *kvm) { return true; } #endif #ifdef CONFIG_GUEST_PERF_EVENTS unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu); void __kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void), void (*mediated_pmi_handler)(void)); static inline void kvm_register_perf_callbacks(void) { __kvm_register_perf_callbacks(NULL, NULL); } void kvm_unregister_perf_callbacks(void); #else static inline void kvm_register_perf_callbacks(void) {} static inline void kvm_unregister_perf_callbacks(void) {} #endif /* CONFIG_GUEST_PERF_EVENTS */ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); struct kvm_irq_ack_notifier { struct hlist_node link; unsigned gsi; void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi); int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); /* * Returns a pointer to the memslot if it contains gfn. * Otherwise returns NULL. */ static inline struct kvm_memory_slot * try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { if (!slot) return NULL; if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages) return slot; else return NULL; } /* * Returns a pointer to the memslot that contains gfn. Otherwise returns NULL. * * With "approx" set returns the memslot also when the address falls * in a hole. In that case one of the memslots bordering the hole is * returned. */ static inline struct kvm_memory_slot * search_memslots(struct kvm_memslots *slots, gfn_t gfn, bool approx) { struct kvm_memory_slot *slot; struct rb_node *node; int idx = slots->node_idx; slot = NULL; for (node = slots->gfn_tree.rb_node; node; ) { slot = container_of(node, struct kvm_memory_slot, gfn_node[idx]); if (gfn >= slot->base_gfn) { if (gfn < slot->base_gfn + slot->npages) return slot; node = node->rb_right; } else node = node->rb_left; } return approx ? slot : NULL; } static inline struct kvm_memory_slot * ____gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn, bool approx) { struct kvm_memory_slot *slot; slot = (struct kvm_memory_slot *)atomic_long_read(&slots->last_used_slot); slot = try_get_memslot(slot, gfn); if (slot) return slot; slot = search_memslots(slots, gfn, approx); if (slot) { atomic_long_set(&slots->last_used_slot, (unsigned long)slot); return slot; } return NULL; } /* * __gfn_to_memslot() and its descendants are here to allow arch code to inline * the lookups in hot paths. gfn_to_memslot() itself isn't here as an inline * because that would bloat other code too much. */ static inline struct kvm_memory_slot * __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) { return ____gfn_to_memslot(slots, gfn, false); } static inline unsigned long __gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn) { /* * The index was checked originally in search_memslots. To avoid * that a malicious guest builds a Spectre gadget out of e.g. page * table walks, do not let the processor speculate loads outside * the guest's registered memslots. */ unsigned long offset = gfn - slot->base_gfn; offset = array_index_nospec(offset, slot->npages); return slot->userspace_addr + offset * PAGE_SIZE; } static inline int memslot_id(struct kvm *kvm, gfn_t gfn) { return gfn_to_memslot(kvm, gfn)->id; } static inline gfn_t hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) { gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT; return slot->base_gfn + gfn_offset; } static inline gpa_t gfn_to_gpa(gfn_t gfn) { return (gpa_t)gfn << PAGE_SHIFT; } static inline gfn_t gpa_to_gfn(gpa_t gpa) { return (gfn_t)(gpa >> PAGE_SHIFT); } static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) { return (hpa_t)pfn << PAGE_SHIFT; } static inline bool kvm_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa) { unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); return !kvm_is_error_hva(hva); } static inline void kvm_gpc_mark_dirty_in_slot(struct gfn_to_pfn_cache *gpc) { lockdep_assert_held(&gpc->lock); if (!gpc->memslot) return; mark_page_dirty_in_slot(gpc->kvm, gpc->memslot, gpa_to_gfn(gpc->gpa)); } enum kvm_stat_kind { KVM_STAT_VM, KVM_STAT_VCPU, }; struct kvm_stat_data { struct kvm *kvm; const struct kvm_stats_desc *desc; enum kvm_stat_kind kind; }; #define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \ .flags = type | unit | base | \ BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \ BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \ BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \ .exponent = exp, \ .size = sz, \ .bucket_size = bsz #define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ { \ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ .offset = offsetof(struct kvm_vm_stat, generic.stat), \ .name = #stat, \ } #define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ { \ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ .offset = offsetof(struct kvm_vcpu_stat, generic.stat), \ .name = #stat, \ } #define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ { \ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ .offset = offsetof(struct kvm_vm_stat, stat), \ .name = #stat, \ } #define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ { \ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ .offset = offsetof(struct kvm_vcpu_stat, stat), \ .name = #stat, \ } /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */ #define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz) \ SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz) #define STATS_DESC_CUMULATIVE(SCOPE, name, unit, base, exponent) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_CUMULATIVE, \ unit, base, exponent, 1, 0) #define STATS_DESC_INSTANT(SCOPE, name, unit, base, exponent) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_INSTANT, \ unit, base, exponent, 1, 0) #define STATS_DESC_PEAK(SCOPE, name, unit, base, exponent) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_PEAK, \ unit, base, exponent, 1, 0) #define STATS_DESC_LINEAR_HIST(SCOPE, name, unit, base, exponent, sz, bsz) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LINEAR_HIST, \ unit, base, exponent, sz, bsz) #define STATS_DESC_LOG_HIST(SCOPE, name, unit, base, exponent, sz) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LOG_HIST, \ unit, base, exponent, sz, 0) /* Cumulative counter, read/write */ #define STATS_DESC_COUNTER(SCOPE, name) \ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_NONE, \ KVM_STATS_BASE_POW10, 0) /* Instantaneous counter, read only */ #define STATS_DESC_ICOUNTER(SCOPE, name) \ STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_NONE, \ KVM_STATS_BASE_POW10, 0) /* Peak counter, read/write */ #define STATS_DESC_PCOUNTER(SCOPE, name) \ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \ KVM_STATS_BASE_POW10, 0) /* Instantaneous boolean value, read only */ #define STATS_DESC_IBOOLEAN(SCOPE, name) \ STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ KVM_STATS_BASE_POW10, 0) /* Peak (sticky) boolean value, read/write */ #define STATS_DESC_PBOOLEAN(SCOPE, name) \ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ KVM_STATS_BASE_POW10, 0) /* Cumulative time in nanosecond */ #define STATS_DESC_TIME_NSEC(SCOPE, name) \ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \ KVM_STATS_BASE_POW10, -9) /* Linear histogram for time in nanosecond */ #define STATS_DESC_LINHIST_TIME_NSEC(SCOPE, name, sz, bsz) \ STATS_DESC_LINEAR_HIST(SCOPE, name, KVM_STATS_UNIT_SECONDS, \ KVM_STATS_BASE_POW10, -9, sz, bsz) /* Logarithmic histogram for time in nanosecond */ #define STATS_DESC_LOGHIST_TIME_NSEC(SCOPE, name, sz) \ STATS_DESC_LOG_HIST(SCOPE, name, KVM_STATS_UNIT_SECONDS, \ KVM_STATS_BASE_POW10, -9, sz) #define KVM_GENERIC_VM_STATS() \ STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush), \ STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush_requests) #define KVM_GENERIC_VCPU_STATS() \ STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll), \ STATS_DESC_COUNTER(VCPU_GENERIC, halt_attempted_poll), \ STATS_DESC_COUNTER(VCPU_GENERIC, halt_poll_invalid), \ STATS_DESC_COUNTER(VCPU_GENERIC, halt_wakeup), \ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_success_ns), \ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_ns), \ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_wait_ns), \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_success_hist, \ HALT_POLL_HIST_COUNT), \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_hist, \ HALT_POLL_HIST_COUNT), \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \ HALT_POLL_HIST_COUNT), \ STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, const struct kvm_stats_desc *desc, void *stats, size_t size_stats, char __user *user_buffer, size_t size, loff_t *offset); /** * kvm_stats_linear_hist_update() - Update bucket value for linear histogram * statistics data. * * @data: start address of the stats data * @size: the number of bucket of the stats data * @value: the new value used to update the linear histogram's bucket * @bucket_size: the size (width) of a bucket */ static inline void kvm_stats_linear_hist_update(u64 *data, size_t size, u64 value, size_t bucket_size) { size_t index = div64_u64(value, bucket_size); index = min(index, size - 1); ++data[index]; } /** * kvm_stats_log_hist_update() - Update bucket value for logarithmic histogram * statistics data. * * @data: start address of the stats data * @size: the number of bucket of the stats data * @value: the new value used to update the logarithmic histogram's bucket */ static inline void kvm_stats_log_hist_update(u64 *data, size_t size, u64 value) { size_t index = fls64(value); index = min(index, size - 1); ++data[index]; } #define KVM_STATS_LINEAR_HIST_UPDATE(array, value, bsize) \ kvm_stats_linear_hist_update(array, ARRAY_SIZE(array), value, bsize) #define KVM_STATS_LOG_HIST_UPDATE(array, value) \ kvm_stats_log_hist_update(array, ARRAY_SIZE(array), value) extern const struct kvm_stats_header kvm_vm_stats_header; extern const struct kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct kvm_stats_desc kvm_vcpu_stats_desc[]; #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { if (unlikely(kvm->mmu_invalidate_in_progress)) return 1; /* * Ensure the read of mmu_invalidate_in_progress happens before * the read of mmu_invalidate_seq. This interacts with the * smp_wmb() in mmu_notifier_invalidate_range_end to make sure * that the caller either sees the old (non-zero) value of * mmu_invalidate_in_progress or the new (incremented) value of * mmu_invalidate_seq. * * PowerPC Book3s HV KVM calls this under a per-page lock rather * than under kvm->mmu_lock, for scalability, so can't rely on * kvm->mmu_lock to keep things ordered. */ smp_rmb(); if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } static inline int mmu_invalidate_retry_gfn(struct kvm *kvm, unsigned long mmu_seq, gfn_t gfn) { lockdep_assert_held(&kvm->mmu_lock); /* * If mmu_invalidate_in_progress is non-zero, then the range maintained * by kvm_mmu_notifier_invalidate_range_start contains all addresses * that might be being invalidated. Note that it may include some false * positives, due to shortcuts when handing concurrent invalidations. */ if (unlikely(kvm->mmu_invalidate_in_progress)) { /* * Dropping mmu_lock after bumping mmu_invalidate_in_progress * but before updating the range is a KVM bug. */ if (WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA || kvm->mmu_invalidate_range_end == INVALID_GPA)) return 1; if (gfn >= kvm->mmu_invalidate_range_start && gfn < kvm->mmu_invalidate_range_end) return 1; } if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } /* * This lockless version of the range-based retry check *must* be paired with a * call to the locked version after acquiring mmu_lock, i.e. this is safe to * use only as a pre-check to avoid contending mmu_lock. This version *will* * get false negatives and false positives. */ static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm, unsigned long mmu_seq, gfn_t gfn) { /* * Use READ_ONCE() to ensure the in-progress flag and sequence counter * are always read from memory, e.g. so that checking for retry in a * loop won't result in an infinite retry loop. Don't force loads for * start+end, as the key to avoiding infinite retry loops is observing * the 1=>0 transition of in-progress, i.e. getting false negatives * due to stale start+end values is acceptable. */ if (unlikely(READ_ONCE(kvm->mmu_invalidate_in_progress)) && gfn >= kvm->mmu_invalidate_range_start && gfn < kvm->mmu_invalidate_range_end) return true; return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq; } #endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING #define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future */ bool kvm_arch_can_set_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); int kvm_init_irq_routing(struct kvm *kvm); int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); #else static inline void kvm_free_irq_routing(struct kvm *kvm) {} static inline int kvm_init_irq_routing(struct kvm *kvm) { return 0; } #endif int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); void kvm_eventfd_init(struct kvm *kvm); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); bool kvm_notify_irqfd_resampler(struct kvm *kvm, unsigned int irqchip, unsigned int pin); void kvm_irq_routing_update(struct kvm *); #else static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) { return -EINVAL; } static inline void kvm_irqfd_release(struct kvm *kvm) {} static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm, unsigned int irqchip, unsigned int pin) { return false; } #endif /* CONFIG_HAVE_KVM_IRQCHIP */ void kvm_arch_irq_routing_update(struct kvm *kvm); static inline void __kvm_make_request(int req, struct kvm_vcpu *vcpu) { /* * Ensure the rest of the request is published to kvm_check_request's * caller. Paired with the smp_mb__after_atomic in kvm_check_request. */ smp_wmb(); set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests); } static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) { /* * Request that don't require vCPU action should never be logged in * vcpu->requests. The vCPU won't clear the request, so it will stay * logged indefinitely and prevent the vCPU from entering the guest. */ BUILD_BUG_ON(!__builtin_constant_p(req) || (req & KVM_REQUEST_NO_ACTION)); __kvm_make_request(req, vcpu); } #ifndef CONFIG_S390 static inline void kvm_make_request_and_kick(int req, struct kvm_vcpu *vcpu) { kvm_make_request(req, vcpu); __kvm_vcpu_kick(vcpu, req & KVM_REQUEST_WAIT); } #endif static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) { return READ_ONCE(vcpu->requests); } static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) { return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests); } static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu) { clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests); } static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) { if (kvm_test_request(req, vcpu)) { kvm_clear_request(req, vcpu); /* * Ensure the rest of the request is visible to kvm_check_request's * caller. Paired with the smp_wmb in kvm_make_request. */ smp_mb__after_atomic(); return true; } else { return false; } } #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING extern bool enable_virt_at_load; extern bool kvm_rebooting; #endif extern unsigned int halt_poll_ns; extern unsigned int halt_poll_ns_grow; extern unsigned int halt_poll_ns_grow_start; extern unsigned int halt_poll_ns_shrink; struct kvm_device { const struct kvm_device_ops *ops; struct kvm *kvm; void *private; struct list_head vm_node; }; /* create, destroy, and name are mandatory */ struct kvm_device_ops { const char *name; /* * create is called holding kvm->lock and any operations not suitable * to do while holding the lock should be deferred to init (see * below). */ int (*create)(struct kvm_device *dev, u32 type); /* * init is called after create if create is successful and is called * outside of holding kvm->lock. */ void (*init)(struct kvm_device *dev); /* * Destroy is responsible for freeing dev. * * Destroy may be called before or after destructors are called * on emulated I/O regions, depending on whether a reference is * held by a vcpu or other kvm component that gets destroyed * after the emulated I/O. */ void (*destroy)(struct kvm_device *dev); /* * Release is an alternative method to free the device. It is * called when the device file descriptor is closed. Once * release is called, the destroy method will not be called * anymore as the device is removed from the device list of * the VM. kvm->lock is held. */ void (*release)(struct kvm_device *dev); int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); long (*ioctl)(struct kvm_device *dev, unsigned int ioctl, unsigned long arg); int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma); }; struct kvm_device *kvm_device_from_filp(struct file *filp); int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type); void kvm_unregister_device_ops(u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_arm_vgic_v2_ops; extern struct kvm_device_ops kvm_arm_vgic_v3_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) { vcpu->spin_loop.in_spin_loop = val; } static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { vcpu->spin_loop.dy_eligible = val; } #else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) { } static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { } #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot) { return (memslot && memslot->id < KVM_USER_MEM_SLOTS && !(memslot->flags & KVM_MEMSLOT_INVALID)); } struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); #if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) struct kvm_kernel_irqfd; bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS /* If we wakeup during the poll time, was it a sucessful poll? */ static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) { return vcpu->valid_wakeup; } #else static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) { return true; } #endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */ #ifdef CONFIG_HAVE_KVM_NO_POLL /* Callback that tells if we must not poll */ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu); #else static inline bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { return false; } #endif /* CONFIG_HAVE_KVM_NO_POLL */ void kvm_arch_guest_memory_reclaimed(struct kvm *kvm); #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu); #else static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) { return 0; } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ #ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_INTR; vcpu->stat.signal_exits++; } static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) { int r = xfer_to_guest_mode_handle_work(); if (r) { WARN_ON_ONCE(r != -EINTR); kvm_handle_signal_exit(vcpu); } return r; } #endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ /* * If more than one page is being (un)accounted, @virt must be the address of * the first page of a block of pages what were allocated together (i.e * accounted together). * * kvm_account_pgtable_pages() is thread-safe because mod_lruvec_page_state() * is thread-safe. */ static inline void kvm_account_pgtable_pages(void *virt, int nr) { mod_lruvec_page_state(virt_to_page(virt), NR_SECONDARY_PAGETABLE, nr); } /* * This defines how many reserved entries we want to keep before we * kick the vcpu to the userspace to avoid dirty ring full. This * value can be tuned to higher if e.g. PML is enabled on the host. */ #define KVM_DIRTY_RING_RSVD_ENTRIES 64 /* Max number of entries allowed for each kvm dirty ring */ #define KVM_DIRTY_RING_MAX_ENTRIES 65536 static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, gpa_t gpa, gpa_t size, bool is_write, bool is_exec, bool is_private) { vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT; vcpu->run->memory_fault.gpa = gpa; vcpu->run->memory_fault.size = size; /* RWX flags are not (yet) defined or communicated to userspace. */ vcpu->run->memory_fault.flags = 0; if (is_private) vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE; } static inline bool kvm_memslot_is_gmem_only(const struct kvm_memory_slot *slot) { if (!IS_ENABLED(CONFIG_KVM_GUEST_MEMFD)) return false; return slot->flags & KVM_MEMSLOT_GMEM_ONLY; } #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) { return xa_to_value(xa_load(&kvm->mem_attr_array, gfn)); } bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, unsigned long mask, unsigned long attrs); bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range); static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) { return kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; } #else static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) { return false; } #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ #ifdef CONFIG_KVM_GUEST_MEMFD int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, struct page **page, int *max_order); #else static inline int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, struct page **page, int *max_order) { KVM_BUG_ON(1, kvm); return -EIO; } #endif /* CONFIG_KVM_GUEST_MEMFD */ #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); #endif #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE /** * kvm_gmem_populate() - Populate/prepare a GPA range with guest data * * @kvm: KVM instance * @gfn: starting GFN to be populated * @src: userspace-provided buffer containing data to copy into GFN range * (passed to @post_populate, and incremented on each iteration * if not NULL). Must be page-aligned. * @npages: number of pages to copy from userspace-buffer * @post_populate: callback to issue for each gmem page that backs the GPA * range * @opaque: opaque data to pass to @post_populate callback * * This is primarily intended for cases where a gmem-backed GPA range needs * to be initialized with userspace-provided data prior to being mapped into * the guest as a private page. This should be called with the slots->lock * held so that caller-enforced invariants regarding the expected memory * attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES. * * Returns the number of pages that were populated. */ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, struct page *page, void *opaque); long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque); #endif #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); #endif #ifdef CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, struct kvm_pre_fault_memory *range); #endif #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING int kvm_enable_virtualization(void); void kvm_disable_virtualization(void); #else static inline int kvm_enable_virtualization(void) { return 0; } static inline void kvm_disable_virtualization(void) { } #endif #endif
197 156 3 46 48 271 198 81 270 168 112 22 22 20 1 292 288 289 292 25 268 11 86 101 2 14 14 15 4 29 33 1 1 4 4 48 1 8 40 14 20 20 2 18 31 6 4 6 3 20 6 14 14 1 12 17 1 4 14 14 1 14 9 8 15 1 11 12 5 1 5 1 3 27 27 6 20 5 323 319 320 2 52 50 50 2 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/stat.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/blkdev.h> #include <linux/export.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/file.h> #include <linux/highuid.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/security.h> #include <linux/cred.h> #include <linux/syscalls.h> #include <linux/pagemap.h> #include <linux/compat.h> #include <linux/iversion.h> #include <linux/uaccess.h> #include <asm/unistd.h> #include <trace/events/timestamp.h> #include "internal.h" #include "mount.h" /** * fill_mg_cmtime - Fill in the mtime and ctime and flag ctime as QUERIED * @stat: where to store the resulting values * @request_mask: STATX_* values requested * @inode: inode from which to grab the c/mtime * * Given @inode, grab the ctime and mtime out if it and store the result * in @stat. When fetching the value, flag it as QUERIED (if not already) * so the next write will record a distinct timestamp. * * NB: The QUERIED flag is tracked in the ctime, but we set it there even * if only the mtime was requested, as that ensures that the next mtime * change will be distinct. */ void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode) { atomic_t *pcn = (atomic_t *)&inode->i_ctime_nsec; /* If neither time was requested, then don't report them */ if (!(request_mask & (STATX_CTIME|STATX_MTIME))) { stat->result_mask &= ~(STATX_CTIME|STATX_MTIME); return; } stat->mtime = inode_get_mtime(inode); stat->ctime.tv_sec = inode->i_ctime_sec; stat->ctime.tv_nsec = (u32)atomic_read(pcn); if (!(stat->ctime.tv_nsec & I_CTIME_QUERIED)) stat->ctime.tv_nsec = ((u32)atomic_fetch_or(I_CTIME_QUERIED, pcn)); stat->ctime.tv_nsec &= ~I_CTIME_QUERIED; trace_fill_mg_cmtime(inode, &stat->ctime, &stat->mtime); } EXPORT_SYMBOL(fill_mg_cmtime); /** * generic_fillattr - Fill in the basic attributes from the inode struct * @idmap: idmap of the mount the inode was found from * @request_mask: statx request_mask * @inode: Inode to use as the source * @stat: Where to fill in the attributes * * Fill in the basic attributes in the kstat structure from data that's to be * found on the VFS inode structure. This is the default if no getattr inode * operation is supplied. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before filling in the * uid and gid filds. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. */ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask, struct inode *inode, struct kstat *stat) { vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); stat->dev = inode->i_sb->s_dev; stat->ino = inode->i_ino; stat->mode = inode->i_mode; stat->nlink = inode->i_nlink; stat->uid = vfsuid_into_kuid(vfsuid); stat->gid = vfsgid_into_kgid(vfsgid); stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode_get_atime(inode); if (is_mgtime(inode)) { fill_mg_cmtime(stat, request_mask, inode); } else { stat->ctime = inode_get_ctime(inode); stat->mtime = inode_get_mtime(inode); } stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) { stat->result_mask |= STATX_CHANGE_COOKIE; stat->change_cookie = inode_query_iversion(inode); } } EXPORT_SYMBOL(generic_fillattr); /** * generic_fill_statx_attr - Fill in the statx attributes from the inode flags * @inode: Inode to use as the source * @stat: Where to fill in the attribute flags * * Fill in the STATX_ATTR_* flags in the kstat structure for properties of the * inode that are published on i_flags and enforced by the VFS. */ void generic_fill_statx_attr(struct inode *inode, struct kstat *stat) { if (inode->i_flags & S_IMMUTABLE) stat->attributes |= STATX_ATTR_IMMUTABLE; if (inode->i_flags & S_APPEND) stat->attributes |= STATX_ATTR_APPEND; stat->attributes_mask |= KSTAT_ATTR_VFS_FLAGS; } EXPORT_SYMBOL(generic_fill_statx_attr); /** * generic_fill_statx_atomic_writes - Fill in atomic writes statx attributes * @stat: Where to fill in the attribute flags * @unit_min: Minimum supported atomic write length in bytes * @unit_max: Maximum supported atomic write length in bytes * @unit_max_opt: Optimised maximum supported atomic write length in bytes * * Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from * atomic write unit_min and unit_max values. */ void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, unsigned int unit_max, unsigned int unit_max_opt) { /* Confirm that the request type is known */ stat->result_mask |= STATX_WRITE_ATOMIC; /* Confirm that the file attribute type is known */ stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC; if (unit_min) { stat->atomic_write_unit_min = unit_min; stat->atomic_write_unit_max = unit_max; stat->atomic_write_unit_max_opt = unit_max_opt; /* Initially only allow 1x segment */ stat->atomic_write_segments_max = 1; /* Confirm atomic writes are actually supported */ stat->attributes |= STATX_ATTR_WRITE_ATOMIC; } } EXPORT_SYMBOL_GPL(generic_fill_statx_atomic_writes); /** * vfs_getattr_nosec - getattr without security checks * @path: file to get attributes from * @stat: structure to return attributes in * @request_mask: STATX_xxx flags indicating what the caller wants * @query_flags: Query mode (AT_STATX_SYNC_TYPE) * * Get attributes without calling security_inode_getattr. * * Currently the only caller other than vfs_getattr is internal to the * filehandle lookup code, which uses only the inode number and returns no * attributes to any user. Any other code probably wants vfs_getattr. */ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct mnt_idmap *idmap; struct inode *inode = d_backing_inode(path->dentry); memset(stat, 0, sizeof(*stat)); stat->result_mask |= STATX_BASIC_STATS; query_flags &= AT_STATX_SYNC_TYPE; /* allow the fs to override these if it really wants to */ /* SB_NOATIME means filesystem supplies dummy atime value */ if (inode->i_sb->s_flags & SB_NOATIME) stat->result_mask &= ~STATX_ATIME; /* * Note: If you add another clause to set an attribute flag, please * update attributes_mask below. */ if (IS_AUTOMOUNT(inode)) stat->attributes |= STATX_ATTR_AUTOMOUNT; if (IS_DAX(inode)) stat->attributes |= STATX_ATTR_DAX; stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT | STATX_ATTR_DAX); idmap = mnt_idmap(path->mnt); if (inode->i_op->getattr) { int ret; ret = inode->i_op->getattr(idmap, path, stat, request_mask, query_flags); if (ret) return ret; } else { generic_fillattr(idmap, request_mask, inode, stat); } /* * If this is a block device inode, override the filesystem attributes * with the block device specific parameters that need to be obtained * from the bdev backing inode. */ if (S_ISBLK(stat->mode)) bdev_statx(path, stat, request_mask); return 0; } EXPORT_SYMBOL(vfs_getattr_nosec); /* * vfs_getattr - Get the enhanced basic attributes of a file * @path: The file of interest * @stat: Where to return the statistics * @request_mask: STATX_xxx flags indicating what the caller wants * @query_flags: Query mode (AT_STATX_SYNC_TYPE) * * Ask the filesystem for a file's attributes. The caller must indicate in * request_mask and query_flags to indicate what they want. * * If the file is remote, the filesystem can be forced to update the attributes * from the backing store by passing AT_STATX_FORCE_SYNC in query_flags or can * suppress the update by passing AT_STATX_DONT_SYNC. * * Bits must have been set in request_mask to indicate which attributes the * caller wants retrieving. Any such attribute not requested may be returned * anyway, but the value may be approximate, and, if remote, may not have been * synchronised with the server. * * 0 will be returned on success, and a -ve error code if unsuccessful. */ int vfs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { int retval; retval = security_inode_getattr(path); if (unlikely(retval)) return retval; return vfs_getattr_nosec(path, stat, request_mask, query_flags); } EXPORT_SYMBOL(vfs_getattr); /** * vfs_fstat - Get the basic attributes by file descriptor * @fd: The file descriptor referring to the file of interest * @stat: The result structure to fill in. * * This function is a wrapper around vfs_getattr(). The main difference is * that it uses a file descriptor to determine the file location. * * 0 will be returned on success, and a -ve error code if unsuccessful. */ int vfs_fstat(int fd, struct kstat *stat) { CLASS(fd_raw, f)(fd); if (fd_empty(f)) return -EBADF; return vfs_getattr(&fd_file(f)->f_path, stat, STATX_BASIC_STATS, 0); } static int statx_lookup_flags(int flags) { int lookup_flags = 0; if (!(flags & AT_SYMLINK_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; if (!(flags & AT_NO_AUTOMOUNT)) lookup_flags |= LOOKUP_AUTOMOUNT; return lookup_flags; } static int vfs_statx_path(const struct path *path, int flags, struct kstat *stat, u32 request_mask) { int error = vfs_getattr(path, stat, request_mask, flags); if (error) return error; if (request_mask & STATX_MNT_ID_UNIQUE) { stat->mnt_id = real_mount(path->mnt)->mnt_id_unique; stat->result_mask |= STATX_MNT_ID_UNIQUE; } else { stat->mnt_id = real_mount(path->mnt)->mnt_id; stat->result_mask |= STATX_MNT_ID; } if (path_mounted(path)) stat->attributes |= STATX_ATTR_MOUNT_ROOT; stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; return 0; } static int vfs_statx_fd(int fd, int flags, struct kstat *stat, u32 request_mask) { CLASS(fd_raw, f)(fd); if (fd_empty(f)) return -EBADF; return vfs_statx_path(&fd_file(f)->f_path, flags, stat, request_mask); } /** * vfs_statx - Get basic and extra attributes by filename * @dfd: A file descriptor representing the base dir for a relative filename * @filename: The name of the file of interest * @flags: Flags to control the query * @stat: The result structure to fill in. * @request_mask: STATX_xxx flags indicating what the caller wants * * This function is a wrapper around vfs_getattr(). The main difference is * that it uses a filename and base directory to determine the file location. * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink * at the given name from being referenced. * * 0 will be returned on success, and a -ve error code if unsuccessful. */ static int vfs_statx(int dfd, struct filename *filename, int flags, struct kstat *stat, u32 request_mask) { struct path path; unsigned int lookup_flags = statx_lookup_flags(flags); int error; if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | AT_EMPTY_PATH | AT_STATX_SYNC_TYPE)) return -EINVAL; retry: error = filename_lookup(dfd, filename, lookup_flags, &path, NULL); if (error) return error; error = vfs_statx_path(&path, flags, stat, request_mask); path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } return error; } int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { CLASS(filename_maybe_null, name)(filename, flags); if (!name && dfd >= 0) return vfs_fstat(dfd, stat); return vfs_statx(dfd, name, flags | AT_NO_AUTOMOUNT, stat, STATX_BASIC_STATS); } #ifdef __ARCH_WANT_OLD_STAT /* * For backward compatibility? Maybe this should be moved * into arch/i386 instead? */ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * statbuf) { static int warncount = 5; struct __old_kernel_stat tmp; if (warncount > 0) { warncount--; printk(KERN_WARNING "VFS: Warning: %s using old stat() call. Recompile your binary.\n", current->comm); } else if (warncount < 0) { /* it's laughable, but... */ warncount = 0; } memset(&tmp, 0, sizeof(struct __old_kernel_stat)); tmp.st_dev = old_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = old_encode_dev(stat->rdev); #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) return -EOVERFLOW; #endif tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_ctime = stat->ctime.tv_sec; return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } SYSCALL_DEFINE2(stat, const char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_stat(filename, &stat); if (unlikely(error)) return error; return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(lstat, const char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_lstat(filename, &stat); if (unlikely(error)) return error; return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_fstat(fd, &stat); if (unlikely(error)) return error; return cp_old_stat(&stat, statbuf); } #endif /* __ARCH_WANT_OLD_STAT */ #ifdef __ARCH_WANT_NEW_STAT #ifndef INIT_STRUCT_STAT_PADDING # define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st)) #endif static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) { struct stat tmp; if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) return -EOVERFLOW; if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) return -EOVERFLOW; #endif INIT_STRUCT_STAT_PADDING(tmp); tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = new_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_ctime = stat->ctime.tv_sec; #ifdef STAT_HAVE_NSEC tmp.st_atime_nsec = stat->atime.tv_nsec; tmp.st_mtime_nsec = stat->mtime.tv_nsec; tmp.st_ctime_nsec = stat->ctime.tv_nsec; #endif tmp.st_blocks = stat->blocks; tmp.st_blksize = stat->blksize; return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } SYSCALL_DEFINE2(newstat, const char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; int error; error = vfs_stat(filename, &stat); if (unlikely(error)) return error; return cp_new_stat(&stat, statbuf); } SYSCALL_DEFINE2(newlstat, const char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; int error; error = vfs_lstat(filename, &stat); if (unlikely(error)) return error; return cp_new_stat(&stat, statbuf); } #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename, struct stat __user *, statbuf, int, flag) { struct kstat stat; int error; error = vfs_fstatat(dfd, filename, &stat, flag); if (unlikely(error)) return error; return cp_new_stat(&stat, statbuf); } #endif SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf) { struct kstat stat; int error; error = vfs_fstat(fd, &stat); if (unlikely(error)) return error; return cp_new_stat(&stat, statbuf); } #endif static int do_readlinkat(int dfd, const char __user *pathname, char __user *buf, int bufsiz) { struct path path; int error; unsigned int lookup_flags = 0; if (bufsiz <= 0) return -EINVAL; CLASS(filename_flags, name)(pathname, LOOKUP_EMPTY); retry: error = filename_lookup(dfd, name, lookup_flags, &path, NULL); if (unlikely(error)) return error; /* * AFS mountpoints allow readlink(2) but are not symlinks */ if (d_is_symlink(path.dentry) || d_backing_inode(path.dentry)->i_op->readlink) { error = security_inode_readlink(path.dentry); if (!error) { touch_atime(&path); error = vfs_readlink(path.dentry, buf, bufsiz); } } else { error = (name->name[0] == '\0') ? -ENOENT : -EINVAL; } path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } return error; } SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, char __user *, buf, int, bufsiz) { return do_readlinkat(dfd, pathname, buf, bufsiz); } SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf, int, bufsiz) { return do_readlinkat(AT_FDCWD, path, buf, bufsiz); } /* ---------- LFS-64 ----------- */ #if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) #ifndef INIT_STRUCT_STAT64_PADDING # define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st)) #endif static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) { struct stat64 tmp; INIT_STRUCT_STAT64_PADDING(tmp); #ifdef CONFIG_MIPS /* mips has weird padding, so we don't get 64 bits there */ tmp.st_dev = new_encode_dev(stat->dev); tmp.st_rdev = new_encode_dev(stat->rdev); #else tmp.st_dev = huge_encode_dev(stat->dev); tmp.st_rdev = huge_encode_dev(stat->rdev); #endif tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; #ifdef STAT64_HAS_BROKEN_ST_INO tmp.__st_ino = stat->ino; #endif tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_atime = stat->atime.tv_sec; tmp.st_atime_nsec = stat->atime.tv_nsec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_mtime_nsec = stat->mtime.tv_nsec; tmp.st_ctime = stat->ctime.tv_sec; tmp.st_ctime_nsec = stat->ctime.tv_nsec; tmp.st_size = stat->size; tmp.st_blocks = stat->blocks; tmp.st_blksize = stat->blksize; return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } SYSCALL_DEFINE2(stat64, const char __user *, filename, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_stat(filename, &stat); if (!error) error = cp_new_stat64(&stat, statbuf); return error; } SYSCALL_DEFINE2(lstat64, const char __user *, filename, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_lstat(filename, &stat); if (!error) error = cp_new_stat64(&stat, statbuf); return error; } SYSCALL_DEFINE2(fstat64, unsigned long, fd, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); if (!error) error = cp_new_stat64(&stat, statbuf); return error; } SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename, struct stat64 __user *, statbuf, int, flag) { struct kstat stat; int error; error = vfs_fstatat(dfd, filename, &stat, flag); if (error) return error; return cp_new_stat64(&stat, statbuf); } #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ static noinline_for_stack int cp_statx(const struct kstat *stat, struct statx __user *buffer) { struct statx tmp; memset(&tmp, 0, sizeof(tmp)); /* STATX_CHANGE_COOKIE is kernel-only for now */ tmp.stx_mask = stat->result_mask & ~STATX_CHANGE_COOKIE; tmp.stx_blksize = stat->blksize; /* STATX_ATTR_CHANGE_MONOTONIC is kernel-only for now */ tmp.stx_attributes = stat->attributes & ~STATX_ATTR_CHANGE_MONOTONIC; tmp.stx_nlink = stat->nlink; tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid); tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.stx_mode = stat->mode; tmp.stx_ino = stat->ino; tmp.stx_size = stat->size; tmp.stx_blocks = stat->blocks; tmp.stx_attributes_mask = stat->attributes_mask; tmp.stx_atime.tv_sec = stat->atime.tv_sec; tmp.stx_atime.tv_nsec = stat->atime.tv_nsec; tmp.stx_btime.tv_sec = stat->btime.tv_sec; tmp.stx_btime.tv_nsec = stat->btime.tv_nsec; tmp.stx_ctime.tv_sec = stat->ctime.tv_sec; tmp.stx_ctime.tv_nsec = stat->ctime.tv_nsec; tmp.stx_mtime.tv_sec = stat->mtime.tv_sec; tmp.stx_mtime.tv_nsec = stat->mtime.tv_nsec; tmp.stx_rdev_major = MAJOR(stat->rdev); tmp.stx_rdev_minor = MINOR(stat->rdev); tmp.stx_dev_major = MAJOR(stat->dev); tmp.stx_dev_minor = MINOR(stat->dev); tmp.stx_mnt_id = stat->mnt_id; tmp.stx_dio_mem_align = stat->dio_mem_align; tmp.stx_dio_offset_align = stat->dio_offset_align; tmp.stx_dio_read_offset_align = stat->dio_read_offset_align; tmp.stx_subvol = stat->subvol; tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min; tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max; tmp.stx_atomic_write_segments_max = stat->atomic_write_segments_max; tmp.stx_atomic_write_unit_max_opt = stat->atomic_write_unit_max_opt; return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; } int do_statx(int dfd, struct filename *filename, unsigned int flags, unsigned int mask, struct statx __user *buffer) { struct kstat stat; int error; if (mask & STATX__RESERVED) return -EINVAL; if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; /* * STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests * from userland. */ mask &= ~STATX_CHANGE_COOKIE; error = vfs_statx(dfd, filename, flags, &stat, mask); if (error) return error; return cp_statx(&stat, buffer); } int do_statx_fd(int fd, unsigned int flags, unsigned int mask, struct statx __user *buffer) { struct kstat stat; int error; if (mask & STATX__RESERVED) return -EINVAL; if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; /* * STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests * from userland. */ mask &= ~STATX_CHANGE_COOKIE; error = vfs_statx_fd(fd, flags, &stat, mask); if (error) return error; return cp_statx(&stat, buffer); } /** * sys_statx - System call to get enhanced stats * @dfd: Base directory to pathwalk from *or* fd to stat. * @filename: File to stat or either NULL or "" with AT_EMPTY_PATH * @flags: AT_* flags to control pathwalk. * @mask: Parts of statx struct actually required. * @buffer: Result buffer. * * Note that fstat() can be emulated by setting dfd to the fd of interest, * supplying "" (or preferably NULL) as the filename and setting AT_EMPTY_PATH * in the flags. */ SYSCALL_DEFINE5(statx, int, dfd, const char __user *, filename, unsigned, flags, unsigned int, mask, struct statx __user *, buffer) { CLASS(filename_maybe_null, name)(filename, flags); if (!name && dfd >= 0) return do_statx_fd(dfd, flags & ~AT_NO_AUTOMOUNT, mask, buffer); return do_statx(dfd, name, flags, mask, buffer); } #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_STAT) static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) { struct compat_stat tmp; if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) return -EOVERFLOW; if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; memset(&tmp, 0, sizeof(tmp)); tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = new_encode_dev(stat->rdev); if ((u64) stat->size > MAX_NON_LFS) return -EOVERFLOW; tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_atime_nsec = stat->atime.tv_nsec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_mtime_nsec = stat->mtime.tv_nsec; tmp.st_ctime = stat->ctime.tv_sec; tmp.st_ctime_nsec = stat->ctime.tv_nsec; tmp.st_blocks = stat->blocks; tmp.st_blksize = stat->blksize; return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; } COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, struct compat_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_stat(filename, &stat); if (error) return error; return cp_compat_stat(&stat, statbuf); } COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, struct compat_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_lstat(filename, &stat); if (error) return error; return cp_compat_stat(&stat, statbuf); } #ifndef __ARCH_WANT_STAT64 COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, const char __user *, filename, struct compat_stat __user *, statbuf, int, flag) { struct kstat stat; int error; error = vfs_fstatat(dfd, filename, &stat, flag); if (error) return error; return cp_compat_stat(&stat, statbuf); } #endif COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct compat_stat __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); if (!error) error = cp_compat_stat(&stat, statbuf); return error; } #endif /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ void __inode_add_bytes(struct inode *inode, loff_t bytes) { inode->i_blocks += bytes >> 9; bytes &= 511; inode->i_bytes += bytes; if (inode->i_bytes >= 512) { inode->i_blocks++; inode->i_bytes -= 512; } } EXPORT_SYMBOL(__inode_add_bytes); void inode_add_bytes(struct inode *inode, loff_t bytes) { spin_lock(&inode->i_lock); __inode_add_bytes(inode, bytes); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(inode_add_bytes); void __inode_sub_bytes(struct inode *inode, loff_t bytes) { inode->i_blocks -= bytes >> 9; bytes &= 511; if (inode->i_bytes < bytes) { inode->i_blocks--; inode->i_bytes += 512; } inode->i_bytes -= bytes; } EXPORT_SYMBOL(__inode_sub_bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes) { spin_lock(&inode->i_lock); __inode_sub_bytes(inode, bytes); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(inode_sub_bytes); loff_t inode_get_bytes(struct inode *inode) { loff_t ret; spin_lock(&inode->i_lock); ret = __inode_get_bytes(inode); spin_unlock(&inode->i_lock); return ret; } EXPORT_SYMBOL(inode_get_bytes); void inode_set_bytes(struct inode *inode, loff_t bytes) { /* Caller is here responsible for sufficient locking * (ie. inode->i_lock) */ inode->i_blocks = bytes >> 9; inode->i_bytes = bytes & 511; } EXPORT_SYMBOL(inode_set_bytes);
683 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 /* SPDX-License-Identifier: GPL-2.0+ */ /* * Read-Copy Update mechanism for mutual exclusion (tree-based version) * * Copyright IBM Corporation, 2008 * * Author: Dipankar Sarma <dipankar@in.ibm.com> * Paul E. McKenney <paulmck@linux.ibm.com> Hierarchical algorithm * * Based on the original work by Paul McKenney <paulmck@linux.ibm.com> * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. * * For detailed explanation of Read-Copy Update mechanism see - * Documentation/RCU */ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H void rcu_softirq_qs(void); void rcu_note_context_switch(bool preempt); int rcu_needs_cpu(void); void rcu_cpu_stall_reset(void); void rcu_request_urgent_qs_task(struct task_struct *t); /* * Note a virtualization-based context switch. This is simply a * wrapper around rcu_note_context_switch(), which allows TINY_RCU * to save a few bytes. The caller must have disabled interrupts. */ static inline void rcu_virt_note_context_switch(void) { rcu_note_context_switch(false); } void synchronize_rcu_expedited(void); void rcu_barrier(void); void rcu_momentary_eqs(void); struct rcu_gp_oldstate { unsigned long rgos_norm; unsigned long rgos_exp; }; // Maximum number of rcu_gp_oldstate values corresponding to // not-yet-completed RCU grace periods. #define NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE 4 /** * same_state_synchronize_rcu_full - Are two old-state values identical? * @rgosp1: First old-state value. * @rgosp2: Second old-state value. * * The two old-state values must have been obtained from either * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), * or get_completed_synchronize_rcu_full(). Returns @true if the two * values are identical and @false otherwise. This allows structures * whose lifetimes are tracked by old-state values to push these values * to a list header, allowing those structures to be slightly smaller. * * Note that equality is judged on a bitwise basis, so that an * @rcu_gp_oldstate structure with an already-completed state in one field * will compare not-equal to a structure with an already-completed state * in the other field. After all, the @rcu_gp_oldstate structure is opaque * so how did such a situation come to pass in the first place? */ static inline bool same_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp1, struct rcu_gp_oldstate *rgosp2) { return rgosp1->rgos_norm == rgosp2->rgos_norm && rgosp1->rgos_exp == rgosp2->rgos_exp; } unsigned long start_poll_synchronize_rcu_expedited(void); void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu_expedited(unsigned long oldstate); void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp); unsigned long get_state_synchronize_rcu(void); void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); unsigned long start_poll_synchronize_rcu(void); void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); bool poll_state_synchronize_rcu(unsigned long oldstate); bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); #ifdef CONFIG_PROVE_RCU void rcu_irq_exit_check_preempt(void); #else static inline void rcu_irq_exit_check_preempt(void) { } #endif struct task_struct; void rcu_preempt_deferred_qs(struct task_struct *t); void exit_rcu(void); void rcu_scheduler_starting(void); extern int rcu_scheduler_active; void rcu_end_inkernel_boot(void); bool rcu_inkernel_boot_has_ended(void); bool rcu_is_watching(void); #ifndef CONFIG_PREEMPT_RCU void rcu_all_qs(void); #endif /* RCUtree hotplug events */ int rcutree_prepare_cpu(unsigned int cpu); int rcutree_online_cpu(unsigned int cpu); void rcutree_report_cpu_starting(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU int rcutree_dead_cpu(unsigned int cpu); int rcutree_dying_cpu(unsigned int cpu); int rcutree_offline_cpu(unsigned int cpu); #else #define rcutree_dead_cpu NULL #define rcutree_dying_cpu NULL #define rcutree_offline_cpu NULL #endif void rcutree_migrate_callbacks(int cpu); /* Called from hotplug and also arm64 early secondary boot failure */ void rcutree_report_cpu_dead(void); #endif /* __LINUX_RCUTREE_H */
4 1 1 1 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 // SPDX-License-Identifier: GPL-2.0+ /* * pcmmio.c * Driver for Winsystems PC-104 based multifunction IO board. * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 2007 Calin A. Culianu <calin@ajvar.org> */ /* * Driver: pcmmio * Description: A driver for the PCM-MIO multifunction board * Devices: [Winsystems] PCM-MIO (pcmmio) * Author: Calin Culianu <calin@ajvar.org> * Updated: Wed, May 16 2007 16:21:10 -0500 * Status: works * * A driver for the PCM-MIO multifunction board from Winsystems. This * is a PC-104 based I/O board. It contains four subdevices: * * subdevice 0 - 16 channels of 16-bit AI * subdevice 1 - 8 channels of 16-bit AO * subdevice 2 - first 24 channels of the 48 channel of DIO * (with edge-triggered interrupt support) * subdevice 3 - last 24 channels of the 48 channel DIO * (no interrupt support for this bank of channels) * * Some notes: * * Synchronous reads and writes are the only things implemented for analog * input and output. The hardware itself can do streaming acquisition, etc. * * Asynchronous I/O for the DIO subdevices *is* implemented, however! They * are basically edge-triggered interrupts for any configuration of the * channels in subdevice 2. * * Also note that this interrupt support is untested. * * A few words about edge-detection IRQ support (commands on DIO): * * To use edge-detection IRQ support for the DIO subdevice, pass the IRQ * of the board to the comedi_config command. The board IRQ is not jumpered * but rather configured through software, so any IRQ from 1-15 is OK. * * Due to the genericity of the comedi API, you need to create a special * comedi_command in order to use edge-triggered interrupts for DIO. * * Use comedi_commands with TRIG_NOW. Your callback will be called each * time an edge is detected on the specified DIO line(s), and the data * values will be two sample_t's, which should be concatenated to form * one 32-bit unsigned int. This value is the mask of channels that had * edges detected from your channel list. Note that the bits positions * in the mask correspond to positions in your chanlist when you * specified the command and *not* channel id's! * * To set the polarity of the edge-detection interrupts pass a nonzero value * for either CR_RANGE or CR_AREF for edge-up polarity, or a zero * value for both CR_RANGE and CR_AREF if you want edge-down polarity. * * Configuration Options: * [0] - I/O port base address * [1] - IRQ (optional -- for edge-detect interrupt support only, * leave out if you don't need this feature) */ #include <linux/module.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/comedi/comedidev.h> /* * Register I/O map */ #define PCMMIO_AI_LSB_REG 0x00 #define PCMMIO_AI_MSB_REG 0x01 #define PCMMIO_AI_CMD_REG 0x02 #define PCMMIO_AI_CMD_SE BIT(7) #define PCMMIO_AI_CMD_ODD_CHAN BIT(6) #define PCMMIO_AI_CMD_CHAN_SEL(x) (((x) & 0x3) << 4) #define PCMMIO_AI_CMD_RANGE(x) (((x) & 0x3) << 2) #define PCMMIO_RESOURCE_REG 0x02 #define PCMMIO_RESOURCE_IRQ(x) (((x) & 0xf) << 0) #define PCMMIO_AI_STATUS_REG 0x03 #define PCMMIO_AI_STATUS_DATA_READY BIT(7) #define PCMMIO_AI_STATUS_DATA_DMA_PEND BIT(6) #define PCMMIO_AI_STATUS_CMD_DMA_PEND BIT(5) #define PCMMIO_AI_STATUS_IRQ_PEND BIT(4) #define PCMMIO_AI_STATUS_DATA_DRQ_ENA BIT(2) #define PCMMIO_AI_STATUS_REG_SEL BIT(3) #define PCMMIO_AI_STATUS_CMD_DRQ_ENA BIT(1) #define PCMMIO_AI_STATUS_IRQ_ENA BIT(0) #define PCMMIO_AI_RES_ENA_REG 0x03 #define PCMMIO_AI_RES_ENA_CMD_REG_ACCESS (0 << 3) #define PCMMIO_AI_RES_ENA_AI_RES_ACCESS BIT(3) #define PCMMIO_AI_RES_ENA_DIO_RES_ACCESS BIT(4) #define PCMMIO_AI_2ND_ADC_OFFSET 0x04 #define PCMMIO_AO_LSB_REG 0x08 #define PCMMIO_AO_LSB_SPAN(x) (((x) & 0xf) << 0) #define PCMMIO_AO_MSB_REG 0x09 #define PCMMIO_AO_CMD_REG 0x0a #define PCMMIO_AO_CMD_WR_SPAN (0x2 << 4) #define PCMMIO_AO_CMD_WR_CODE (0x3 << 4) #define PCMMIO_AO_CMD_UPDATE (0x4 << 4) #define PCMMIO_AO_CMD_UPDATE_ALL (0x5 << 4) #define PCMMIO_AO_CMD_WR_SPAN_UPDATE (0x6 << 4) #define PCMMIO_AO_CMD_WR_CODE_UPDATE (0x7 << 4) #define PCMMIO_AO_CMD_WR_SPAN_UPDATE_ALL (0x8 << 4) #define PCMMIO_AO_CMD_WR_CODE_UPDATE_ALL (0x9 << 4) #define PCMMIO_AO_CMD_RD_B1_SPAN (0xa << 4) #define PCMMIO_AO_CMD_RD_B1_CODE (0xb << 4) #define PCMMIO_AO_CMD_RD_B2_SPAN (0xc << 4) #define PCMMIO_AO_CMD_RD_B2_CODE (0xd << 4) #define PCMMIO_AO_CMD_NOP (0xf << 4) #define PCMMIO_AO_CMD_CHAN_SEL(x) (((x) & 0x03) << 1) #define PCMMIO_AO_CMD_CHAN_SEL_ALL (0x0f << 0) #define PCMMIO_AO_STATUS_REG 0x0b #define PCMMIO_AO_STATUS_DATA_READY BIT(7) #define PCMMIO_AO_STATUS_DATA_DMA_PEND BIT(6) #define PCMMIO_AO_STATUS_CMD_DMA_PEND BIT(5) #define PCMMIO_AO_STATUS_IRQ_PEND BIT(4) #define PCMMIO_AO_STATUS_DATA_DRQ_ENA BIT(2) #define PCMMIO_AO_STATUS_REG_SEL BIT(3) #define PCMMIO_AO_STATUS_CMD_DRQ_ENA BIT(1) #define PCMMIO_AO_STATUS_IRQ_ENA BIT(0) #define PCMMIO_AO_RESOURCE_ENA_REG 0x0b #define PCMMIO_AO_2ND_DAC_OFFSET 0x04 /* * WinSystems WS16C48 * * Offset Page 0 Page 1 Page 2 Page 3 * ------ ----------- ----------- ----------- ----------- * 0x10 Port 0 I/O Port 0 I/O Port 0 I/O Port 0 I/O * 0x11 Port 1 I/O Port 1 I/O Port 1 I/O Port 1 I/O * 0x12 Port 2 I/O Port 2 I/O Port 2 I/O Port 2 I/O * 0x13 Port 3 I/O Port 3 I/O Port 3 I/O Port 3 I/O * 0x14 Port 4 I/O Port 4 I/O Port 4 I/O Port 4 I/O * 0x15 Port 5 I/O Port 5 I/O Port 5 I/O Port 5 I/O * 0x16 INT_PENDING INT_PENDING INT_PENDING INT_PENDING * 0x17 Page/Lock Page/Lock Page/Lock Page/Lock * 0x18 N/A POL_0 ENAB_0 INT_ID0 * 0x19 N/A POL_1 ENAB_1 INT_ID1 * 0x1a N/A POL_2 ENAB_2 INT_ID2 */ #define PCMMIO_PORT_REG(x) (0x10 + (x)) #define PCMMIO_INT_PENDING_REG 0x16 #define PCMMIO_PAGE_LOCK_REG 0x17 #define PCMMIO_LOCK_PORT(x) ((1 << (x)) & 0x3f) #define PCMMIO_PAGE(x) (((x) & 0x3) << 6) #define PCMMIO_PAGE_MASK PCMUIO_PAGE(3) #define PCMMIO_PAGE_POL 1 #define PCMMIO_PAGE_ENAB 2 #define PCMMIO_PAGE_INT_ID 3 #define PCMMIO_PAGE_REG(x) (0x18 + (x)) static const struct comedi_lrange pcmmio_ai_ranges = { 4, { BIP_RANGE(5), BIP_RANGE(10), UNI_RANGE(5), UNI_RANGE(10) } }; static const struct comedi_lrange pcmmio_ao_ranges = { 6, { UNI_RANGE(5), UNI_RANGE(10), BIP_RANGE(5), BIP_RANGE(10), BIP_RANGE(2.5), RANGE(-2.5, 7.5) } }; struct pcmmio_private { spinlock_t pagelock; /* protects the page registers */ spinlock_t spinlock; /* protects the member variables */ unsigned int enabled_mask; unsigned int active:1; }; static void pcmmio_dio_write(struct comedi_device *dev, unsigned int val, int page, int port) { struct pcmmio_private *devpriv = dev->private; unsigned long iobase = dev->iobase; unsigned long flags; spin_lock_irqsave(&devpriv->pagelock, flags); if (page == 0) { /* Port registers are valid for any page */ outb(val & 0xff, iobase + PCMMIO_PORT_REG(port + 0)); outb((val >> 8) & 0xff, iobase + PCMMIO_PORT_REG(port + 1)); outb((val >> 16) & 0xff, iobase + PCMMIO_PORT_REG(port + 2)); } else { outb(PCMMIO_PAGE(page), iobase + PCMMIO_PAGE_LOCK_REG); outb(val & 0xff, iobase + PCMMIO_PAGE_REG(0)); outb((val >> 8) & 0xff, iobase + PCMMIO_PAGE_REG(1)); outb((val >> 16) & 0xff, iobase + PCMMIO_PAGE_REG(2)); } spin_unlock_irqrestore(&devpriv->pagelock, flags); } static unsigned int pcmmio_dio_read(struct comedi_device *dev, int page, int port) { struct pcmmio_private *devpriv = dev->private; unsigned long iobase = dev->iobase; unsigned long flags; unsigned int val; spin_lock_irqsave(&devpriv->pagelock, flags); if (page == 0) { /* Port registers are valid for any page */ val = inb(iobase + PCMMIO_PORT_REG(port + 0)); val |= (inb(iobase + PCMMIO_PORT_REG(port + 1)) << 8); val |= (inb(iobase + PCMMIO_PORT_REG(port + 2)) << 16); } else { outb(PCMMIO_PAGE(page), iobase + PCMMIO_PAGE_LOCK_REG); val = inb(iobase + PCMMIO_PAGE_REG(0)); val |= (inb(iobase + PCMMIO_PAGE_REG(1)) << 8); val |= (inb(iobase + PCMMIO_PAGE_REG(2)) << 16); } spin_unlock_irqrestore(&devpriv->pagelock, flags); return val; } /* * Each channel can be individually programmed for input or output. * Writing a '0' to a channel causes the corresponding output pin * to go to a high-z state (pulled high by an external 10K resistor). * This allows it to be used as an input. When used in the input mode, * a read reflects the inverted state of the I/O pin, such that a * high on the pin will read as a '0' in the register. Writing a '1' * to a bit position causes the pin to sink current (up to 12mA), * effectively pulling it low. */ static int pcmmio_dio_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { /* subdevice 2 uses ports 0-2, subdevice 3 uses ports 3-5 */ int port = s->index == 2 ? 0 : 3; unsigned int chanmask = (1 << s->n_chan) - 1; unsigned int mask; unsigned int val; mask = comedi_dio_update_state(s, data); if (mask) { /* * Outputs are inverted, invert the state and * update the channels. * * The s->io_bits mask makes sure the input channels * are '0' so that the outputs pins stay in a high * z-state. */ val = ~s->state & chanmask; val &= s->io_bits; pcmmio_dio_write(dev, val, 0, port); } /* get inverted state of the channels from the port */ val = pcmmio_dio_read(dev, 0, port); /* return the true state of the channels */ data[1] = ~val & chanmask; return insn->n; } static int pcmmio_dio_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { /* subdevice 2 uses ports 0-2, subdevice 3 uses ports 3-5 */ int port = s->index == 2 ? 0 : 3; int ret; ret = comedi_dio_insn_config(dev, s, insn, data, 0); if (ret) return ret; if (data[0] == INSN_CONFIG_DIO_INPUT) pcmmio_dio_write(dev, s->io_bits, 0, port); return insn->n; } static void pcmmio_reset(struct comedi_device *dev) { /* Clear all the DIO port bits */ pcmmio_dio_write(dev, 0, 0, 0); pcmmio_dio_write(dev, 0, 0, 3); /* Clear all the paged registers */ pcmmio_dio_write(dev, 0, PCMMIO_PAGE_POL, 0); pcmmio_dio_write(dev, 0, PCMMIO_PAGE_ENAB, 0); pcmmio_dio_write(dev, 0, PCMMIO_PAGE_INT_ID, 0); } /* devpriv->spinlock is already locked */ static void pcmmio_stop_intr(struct comedi_device *dev, struct comedi_subdevice *s) { struct pcmmio_private *devpriv = dev->private; devpriv->enabled_mask = 0; devpriv->active = 0; s->async->inttrig = NULL; /* disable all dio interrupts */ pcmmio_dio_write(dev, 0, PCMMIO_PAGE_ENAB, 0); } static void pcmmio_handle_dio_intr(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int triggered) { struct pcmmio_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; unsigned int val = 0; unsigned long flags; int i; spin_lock_irqsave(&devpriv->spinlock, flags); if (!devpriv->active) goto done; if (!(triggered & devpriv->enabled_mask)) goto done; for (i = 0; i < cmd->chanlist_len; i++) { unsigned int chan = CR_CHAN(cmd->chanlist[i]); if (triggered & (1 << chan)) val |= (1 << i); } comedi_buf_write_samples(s, &val, 1); if (cmd->stop_src == TRIG_COUNT && s->async->scans_done >= cmd->stop_arg) s->async->events |= COMEDI_CB_EOA; done: spin_unlock_irqrestore(&devpriv->spinlock, flags); comedi_handle_events(dev, s); } static irqreturn_t interrupt_pcmmio(int irq, void *d) { struct comedi_device *dev = d; struct comedi_subdevice *s = dev->read_subdev; unsigned int triggered; unsigned char int_pend; /* are there any interrupts pending */ int_pend = inb(dev->iobase + PCMMIO_INT_PENDING_REG) & 0x07; if (!int_pend) return IRQ_NONE; /* get, and clear, the pending interrupts */ triggered = pcmmio_dio_read(dev, PCMMIO_PAGE_INT_ID, 0); pcmmio_dio_write(dev, 0, PCMMIO_PAGE_INT_ID, 0); pcmmio_handle_dio_intr(dev, s, triggered); return IRQ_HANDLED; } /* devpriv->spinlock is already locked */ static void pcmmio_start_intr(struct comedi_device *dev, struct comedi_subdevice *s) { struct pcmmio_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; unsigned int bits = 0; unsigned int pol_bits = 0; int i; devpriv->enabled_mask = 0; devpriv->active = 1; if (cmd->chanlist) { for (i = 0; i < cmd->chanlist_len; i++) { unsigned int chanspec = cmd->chanlist[i]; unsigned int chan = CR_CHAN(chanspec); unsigned int range = CR_RANGE(chanspec); unsigned int aref = CR_AREF(chanspec); bits |= (1 << chan); pol_bits |= (((aref || range) ? 1 : 0) << chan); } } bits &= ((1 << s->n_chan) - 1); devpriv->enabled_mask = bits; /* set polarity and enable interrupts */ pcmmio_dio_write(dev, pol_bits, PCMMIO_PAGE_POL, 0); pcmmio_dio_write(dev, bits, PCMMIO_PAGE_ENAB, 0); } static int pcmmio_cancel(struct comedi_device *dev, struct comedi_subdevice *s) { struct pcmmio_private *devpriv = dev->private; unsigned long flags; spin_lock_irqsave(&devpriv->spinlock, flags); if (devpriv->active) pcmmio_stop_intr(dev, s); spin_unlock_irqrestore(&devpriv->spinlock, flags); return 0; } static int pcmmio_inttrig_start_intr(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int trig_num) { struct pcmmio_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; unsigned long flags; if (trig_num != cmd->start_arg) return -EINVAL; spin_lock_irqsave(&devpriv->spinlock, flags); s->async->inttrig = NULL; if (devpriv->active) pcmmio_start_intr(dev, s); spin_unlock_irqrestore(&devpriv->spinlock, flags); return 1; } /* * 'do_cmd' function for an 'INTERRUPT' subdevice. */ static int pcmmio_cmd(struct comedi_device *dev, struct comedi_subdevice *s) { struct pcmmio_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; unsigned long flags; spin_lock_irqsave(&devpriv->spinlock, flags); devpriv->active = 1; /* Set up start of acquisition. */ if (cmd->start_src == TRIG_INT) s->async->inttrig = pcmmio_inttrig_start_intr; else /* TRIG_NOW */ pcmmio_start_intr(dev, s); spin_unlock_irqrestore(&devpriv->spinlock, flags); return 0; } static int pcmmio_cmdtest(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_cmd *cmd) { int err = 0; /* Step 1 : check if triggers are trivially valid */ err |= comedi_check_trigger_src(&cmd->start_src, TRIG_NOW | TRIG_INT); err |= comedi_check_trigger_src(&cmd->scan_begin_src, TRIG_EXT); err |= comedi_check_trigger_src(&cmd->convert_src, TRIG_NOW); err |= comedi_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT); err |= comedi_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE); if (err) return 1; /* Step 2a : make sure trigger sources are unique */ err |= comedi_check_trigger_is_unique(cmd->start_src); err |= comedi_check_trigger_is_unique(cmd->stop_src); /* Step 2b : and mutually compatible */ if (err) return 2; /* Step 3: check if arguments are trivially valid */ err |= comedi_check_trigger_arg_is(&cmd->start_arg, 0); err |= comedi_check_trigger_arg_is(&cmd->scan_begin_arg, 0); err |= comedi_check_trigger_arg_is(&cmd->convert_arg, 0); err |= comedi_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len); if (cmd->stop_src == TRIG_COUNT) err |= comedi_check_trigger_arg_min(&cmd->stop_arg, 1); else /* TRIG_NONE */ err |= comedi_check_trigger_arg_is(&cmd->stop_arg, 0); if (err) return 3; /* step 4: fix up any arguments */ /* if (err) return 4; */ return 0; } static int pcmmio_ai_eoc(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned long context) { unsigned char status; status = inb(dev->iobase + PCMMIO_AI_STATUS_REG); if (status & PCMMIO_AI_STATUS_DATA_READY) return 0; return -EBUSY; } static int pcmmio_ai_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned long iobase = dev->iobase; unsigned int chan = CR_CHAN(insn->chanspec); unsigned int range = CR_RANGE(insn->chanspec); unsigned int aref = CR_AREF(insn->chanspec); unsigned char cmd = 0; unsigned int val; int ret; int i; /* * The PCM-MIO uses two Linear Tech LTC1859CG 8-channel A/D converters. * The devices use a full duplex serial interface which transmits and * receives data simultaneously. An 8-bit command is shifted into the * ADC interface to configure it for the next conversion. At the same * time, the data from the previous conversion is shifted out of the * device. Consequently, the conversion result is delayed by one * conversion from the command word. * * Setup the cmd for the conversions then do a dummy conversion to * flush the junk data. Then do each conversion requested by the * comedi_insn. Note that the last conversion will leave junk data * in ADC which will get flushed on the next comedi_insn. */ if (chan > 7) { chan -= 8; iobase += PCMMIO_AI_2ND_ADC_OFFSET; } if (aref == AREF_GROUND) cmd |= PCMMIO_AI_CMD_SE; if (chan % 2) cmd |= PCMMIO_AI_CMD_ODD_CHAN; cmd |= PCMMIO_AI_CMD_CHAN_SEL(chan / 2); cmd |= PCMMIO_AI_CMD_RANGE(range); outb(cmd, iobase + PCMMIO_AI_CMD_REG); ret = comedi_timeout(dev, s, insn, pcmmio_ai_eoc, 0); if (ret) return ret; val = inb(iobase + PCMMIO_AI_LSB_REG); val |= inb(iobase + PCMMIO_AI_MSB_REG) << 8; for (i = 0; i < insn->n; i++) { outb(cmd, iobase + PCMMIO_AI_CMD_REG); ret = comedi_timeout(dev, s, insn, pcmmio_ai_eoc, 0); if (ret) return ret; val = inb(iobase + PCMMIO_AI_LSB_REG); val |= inb(iobase + PCMMIO_AI_MSB_REG) << 8; /* bipolar data is two's complement */ if (comedi_range_is_bipolar(s, range)) val = comedi_offset_munge(s, val); data[i] = val; } return insn->n; } static int pcmmio_ao_eoc(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned long context) { unsigned char status; status = inb(dev->iobase + PCMMIO_AO_STATUS_REG); if (status & PCMMIO_AO_STATUS_DATA_READY) return 0; return -EBUSY; } static int pcmmio_ao_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned long iobase = dev->iobase; unsigned int chan = CR_CHAN(insn->chanspec); unsigned int range = CR_RANGE(insn->chanspec); unsigned char cmd = 0; int ret; int i; /* * The PCM-MIO has two Linear Tech LTC2704 DAC devices. Each device * is a 4-channel converter with software-selectable output range. */ if (chan > 3) { cmd |= PCMMIO_AO_CMD_CHAN_SEL(chan - 4); iobase += PCMMIO_AO_2ND_DAC_OFFSET; } else { cmd |= PCMMIO_AO_CMD_CHAN_SEL(chan); } /* set the range for the channel */ outb(PCMMIO_AO_LSB_SPAN(range), iobase + PCMMIO_AO_LSB_REG); outb(0, iobase + PCMMIO_AO_MSB_REG); outb(cmd | PCMMIO_AO_CMD_WR_SPAN_UPDATE, iobase + PCMMIO_AO_CMD_REG); ret = comedi_timeout(dev, s, insn, pcmmio_ao_eoc, 0); if (ret) return ret; for (i = 0; i < insn->n; i++) { unsigned int val = data[i]; /* write the data to the channel */ outb(val & 0xff, iobase + PCMMIO_AO_LSB_REG); outb((val >> 8) & 0xff, iobase + PCMMIO_AO_MSB_REG); outb(cmd | PCMMIO_AO_CMD_WR_CODE_UPDATE, iobase + PCMMIO_AO_CMD_REG); ret = comedi_timeout(dev, s, insn, pcmmio_ao_eoc, 0); if (ret) return ret; s->readback[chan] = val; } return insn->n; } static int pcmmio_attach(struct comedi_device *dev, struct comedi_devconfig *it) { struct pcmmio_private *devpriv; struct comedi_subdevice *s; int ret; ret = comedi_request_region(dev, it->options[0], 32); if (ret) return ret; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; spin_lock_init(&devpriv->pagelock); spin_lock_init(&devpriv->spinlock); pcmmio_reset(dev); if (it->options[1]) { ret = request_irq(it->options[1], interrupt_pcmmio, 0, dev->board_name, dev); if (ret == 0) { dev->irq = it->options[1]; /* configure the interrupt routing on the board */ outb(PCMMIO_AI_RES_ENA_DIO_RES_ACCESS, dev->iobase + PCMMIO_AI_RES_ENA_REG); outb(PCMMIO_RESOURCE_IRQ(dev->irq), dev->iobase + PCMMIO_RESOURCE_REG); } } ret = comedi_alloc_subdevices(dev, 4); if (ret) return ret; /* Analog Input subdevice */ s = &dev->subdevices[0]; s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | SDF_GROUND | SDF_DIFF; s->n_chan = 16; s->maxdata = 0xffff; s->range_table = &pcmmio_ai_ranges; s->insn_read = pcmmio_ai_insn_read; /* initialize the resource enable register by clearing it */ outb(PCMMIO_AI_RES_ENA_CMD_REG_ACCESS, dev->iobase + PCMMIO_AI_RES_ENA_REG); outb(PCMMIO_AI_RES_ENA_CMD_REG_ACCESS, dev->iobase + PCMMIO_AI_RES_ENA_REG + PCMMIO_AI_2ND_ADC_OFFSET); /* Analog Output subdevice */ s = &dev->subdevices[1]; s->type = COMEDI_SUBD_AO; s->subdev_flags = SDF_READABLE; s->n_chan = 8; s->maxdata = 0xffff; s->range_table = &pcmmio_ao_ranges; s->insn_write = pcmmio_ao_insn_write; ret = comedi_alloc_subdev_readback(s); if (ret) return ret; /* initialize the resource enable register by clearing it */ outb(0, dev->iobase + PCMMIO_AO_RESOURCE_ENA_REG); outb(0, dev->iobase + PCMMIO_AO_2ND_DAC_OFFSET + PCMMIO_AO_RESOURCE_ENA_REG); /* Digital I/O subdevice with interrupt support */ s = &dev->subdevices[2]; s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = 24; s->maxdata = 1; s->len_chanlist = 1; s->range_table = &range_digital; s->insn_bits = pcmmio_dio_insn_bits; s->insn_config = pcmmio_dio_insn_config; if (dev->irq) { dev->read_subdev = s; s->subdev_flags |= SDF_CMD_READ | SDF_LSAMPL | SDF_PACKED; s->len_chanlist = s->n_chan; s->cancel = pcmmio_cancel; s->do_cmd = pcmmio_cmd; s->do_cmdtest = pcmmio_cmdtest; } /* Digital I/O subdevice */ s = &dev->subdevices[3]; s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = 24; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = pcmmio_dio_insn_bits; s->insn_config = pcmmio_dio_insn_config; return 0; } static struct comedi_driver pcmmio_driver = { .driver_name = "pcmmio", .module = THIS_MODULE, .attach = pcmmio_attach, .detach = comedi_legacy_detach, }; module_comedi_driver(pcmmio_driver); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi driver for Winsystems PCM-MIO PC/104 board"); MODULE_LICENSE("GPL");
13 13 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 // SPDX-License-Identifier: GPL-2.0-only #include <linux/types.h> #include <linux/sched.h> #include <linux/module.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/addr.h> #include <linux/err.h> #include <linux/seq_file.h> #include <linux/hash.h> #include <linux/string.h> #include <linux/slab.h> #include <net/sock.h> #include <net/ipv6.h> #include <linux/kernel.h> #include <linux/user_namespace.h> #include <trace/events/sunrpc.h> #define RPCDBG_FACILITY RPCDBG_AUTH #include "netns.h" /* * AUTHUNIX and AUTHNULL credentials are both handled here. * AUTHNULL is treated just like AUTHUNIX except that the uid/gid * are always nobody (-2). i.e. we do the same IP address checks for * AUTHNULL as for AUTHUNIX, and that is done here. */ struct unix_domain { struct auth_domain h; /* other stuff later */ }; extern struct auth_ops svcauth_null; extern struct auth_ops svcauth_unix; extern struct auth_ops svcauth_tls; static void svcauth_unix_domain_release_rcu(struct rcu_head *head) { struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head); struct unix_domain *ud = container_of(dom, struct unix_domain, h); kfree(dom->name); kfree(ud); } static void svcauth_unix_domain_release(struct auth_domain *dom) { call_rcu(&dom->rcu_head, svcauth_unix_domain_release_rcu); } struct auth_domain *unix_domain_find(char *name) { struct auth_domain *rv; struct unix_domain *new = NULL; rv = auth_domain_find(name); while(1) { if (rv) { if (new && rv != &new->h) svcauth_unix_domain_release(&new->h); if (rv->flavour != &svcauth_unix) { auth_domain_put(rv); return NULL; } return rv; } new = kmalloc(sizeof(*new), GFP_KERNEL); if (new == NULL) return NULL; kref_init(&new->h.ref); new->h.name = kstrdup(name, GFP_KERNEL); if (new->h.name == NULL) { kfree(new); return NULL; } new->h.flavour = &svcauth_unix; rv = auth_domain_lookup(name, &new->h); } } EXPORT_SYMBOL_GPL(unix_domain_find); /************************************************** * cache for IP address to unix_domain * as needed by AUTH_UNIX */ #define IP_HASHBITS 8 #define IP_HASHMAX (1<<IP_HASHBITS) struct ip_map { struct cache_head h; char m_class[8]; /* e.g. "nfsd" */ struct in6_addr m_addr; struct unix_domain *m_client; struct rcu_head m_rcu; }; static void ip_map_put(struct kref *kref) { struct cache_head *item = container_of(kref, struct cache_head, ref); struct ip_map *im = container_of(item, struct ip_map,h); if (test_bit(CACHE_VALID, &item->flags) && !test_bit(CACHE_NEGATIVE, &item->flags)) auth_domain_put(&im->m_client->h); kfree_rcu(im, m_rcu); } static inline int hash_ip6(const struct in6_addr *ip) { return hash_32(ipv6_addr_hash(ip), IP_HASHBITS); } static int ip_map_match(struct cache_head *corig, struct cache_head *cnew) { struct ip_map *orig = container_of(corig, struct ip_map, h); struct ip_map *new = container_of(cnew, struct ip_map, h); return strcmp(orig->m_class, new->m_class) == 0 && ipv6_addr_equal(&orig->m_addr, &new->m_addr); } static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) { struct ip_map *new = container_of(cnew, struct ip_map, h); struct ip_map *item = container_of(citem, struct ip_map, h); strcpy(new->m_class, item->m_class); new->m_addr = item->m_addr; } static void update(struct cache_head *cnew, struct cache_head *citem) { struct ip_map *new = container_of(cnew, struct ip_map, h); struct ip_map *item = container_of(citem, struct ip_map, h); kref_get(&item->m_client->h.ref); new->m_client = item->m_client; } static struct cache_head *ip_map_alloc(void) { struct ip_map *i = kmalloc(sizeof(*i), GFP_KERNEL); if (i) return &i->h; else return NULL; } static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) { return sunrpc_cache_pipe_upcall(cd, h); } static void ip_map_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) { char text_addr[40]; struct ip_map *im = container_of(h, struct ip_map, h); if (ipv6_addr_v4mapped(&(im->m_addr))) { snprintf(text_addr, 20, "%pI4", &im->m_addr.s6_addr32[3]); } else { snprintf(text_addr, 40, "%pI6", &im->m_addr); } qword_add(bpp, blen, im->m_class); qword_add(bpp, blen, text_addr); (*bpp)[-1] = '\n'; } static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr); static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time64_t expiry); static int ip_map_parse(struct cache_detail *cd, char *mesg, int mlen) { /* class ipaddress [domainname] */ /* should be safe just to use the start of the input buffer * for scratch: */ char *buf = mesg; int len; char class[8]; union { struct sockaddr sa; struct sockaddr_in s4; struct sockaddr_in6 s6; } address; struct sockaddr_in6 sin6; int err; struct ip_map *ipmp; struct auth_domain *dom; time64_t expiry; if (mesg[mlen-1] != '\n') return -EINVAL; mesg[mlen-1] = 0; /* class */ len = qword_get(&mesg, class, sizeof(class)); if (len <= 0) return -EINVAL; /* ip address */ len = qword_get(&mesg, buf, mlen); if (len <= 0) return -EINVAL; if (rpc_pton(cd->net, buf, len, &address.sa, sizeof(address)) == 0) return -EINVAL; switch (address.sa.sa_family) { case AF_INET: /* Form a mapped IPv4 address in sin6 */ sin6.sin6_family = AF_INET6; ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr, &sin6.sin6_addr); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: memcpy(&sin6, &address.s6, sizeof(sin6)); break; #endif default: return -EINVAL; } err = get_expiry(&mesg, &expiry); if (err) return err; /* domainname, or empty for NEGATIVE */ len = qword_get(&mesg, buf, mlen); if (len < 0) return -EINVAL; if (len) { dom = unix_domain_find(buf); if (dom == NULL) return -ENOENT; } else dom = NULL; /* IPv6 scope IDs are ignored for now */ ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr); if (ipmp) { err = __ip_map_update(cd, ipmp, container_of(dom, struct unix_domain, h), expiry); } else err = -ENOMEM; if (dom) auth_domain_put(dom); cache_flush(); return err; } static int ip_map_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) { struct ip_map *im; struct in6_addr addr; char *dom = "-no-domain-"; if (h == NULL) { seq_puts(m, "#class IP domain\n"); return 0; } im = container_of(h, struct ip_map, h); /* class addr domain */ addr = im->m_addr; if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) dom = im->m_client->h.name; if (ipv6_addr_v4mapped(&addr)) { seq_printf(m, "%s %pI4 %s\n", im->m_class, &addr.s6_addr32[3], dom); } else { seq_printf(m, "%s %pI6 %s\n", im->m_class, &addr, dom); } return 0; } static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr) { struct ip_map ip; struct cache_head *ch; strcpy(ip.m_class, class); ip.m_addr = *addr; ch = sunrpc_cache_lookup_rcu(cd, &ip.h, hash_str(class, IP_HASHBITS) ^ hash_ip6(addr)); if (ch) return container_of(ch, struct ip_map, h); else return NULL; } static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time64_t expiry) { struct ip_map ip; struct cache_head *ch; ip.m_client = udom; ip.h.flags = 0; if (!udom) set_bit(CACHE_NEGATIVE, &ip.h.flags); ip.h.expiry_time = expiry; ch = sunrpc_cache_update(cd, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ hash_ip6(&ipm->m_addr)); if (!ch) return -ENOMEM; cache_put(ch, cd); return 0; } void svcauth_unix_purge(struct net *net) { struct sunrpc_net *sn; sn = net_generic(net, sunrpc_net_id); cache_purge(sn->ip_map_cache); } EXPORT_SYMBOL_GPL(svcauth_unix_purge); static inline struct ip_map * ip_map_cached_get(struct svc_xprt *xprt) { struct ip_map *ipm = NULL; struct sunrpc_net *sn; if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { spin_lock(&xprt->xpt_lock); ipm = xprt->xpt_auth_cache; if (ipm != NULL) { sn = net_generic(xprt->xpt_net, sunrpc_net_id); if (cache_is_expired(sn->ip_map_cache, &ipm->h)) { /* * The entry has been invalidated since it was * remembered, e.g. by a second mount from the * same IP address. */ xprt->xpt_auth_cache = NULL; spin_unlock(&xprt->xpt_lock); cache_put(&ipm->h, sn->ip_map_cache); return NULL; } cache_get(&ipm->h); } spin_unlock(&xprt->xpt_lock); } return ipm; } static inline void ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm) { if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { spin_lock(&xprt->xpt_lock); if (xprt->xpt_auth_cache == NULL) { /* newly cached, keep the reference */ xprt->xpt_auth_cache = ipm; ipm = NULL; } spin_unlock(&xprt->xpt_lock); } if (ipm) { struct sunrpc_net *sn; sn = net_generic(xprt->xpt_net, sunrpc_net_id); cache_put(&ipm->h, sn->ip_map_cache); } } void svcauth_unix_info_release(struct svc_xprt *xpt) { struct ip_map *ipm; ipm = xpt->xpt_auth_cache; if (ipm != NULL) { struct sunrpc_net *sn; sn = net_generic(xpt->xpt_net, sunrpc_net_id); cache_put(&ipm->h, sn->ip_map_cache); } } /**************************************************************************** * auth.unix.gid cache * simple cache to map a UID to a list of GIDs * because AUTH_UNIX aka AUTH_SYS has a max of UNX_NGROUPS */ #define GID_HASHBITS 8 #define GID_HASHMAX (1<<GID_HASHBITS) struct unix_gid { struct cache_head h; kuid_t uid; struct group_info *gi; struct rcu_head rcu; }; static int unix_gid_hash(kuid_t uid) { return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS); } static void unix_gid_free(struct rcu_head *rcu) { struct unix_gid *ug = container_of(rcu, struct unix_gid, rcu); struct cache_head *item = &ug->h; if (test_bit(CACHE_VALID, &item->flags) && !test_bit(CACHE_NEGATIVE, &item->flags)) put_group_info(ug->gi); kfree(ug); } static void unix_gid_put(struct kref *kref) { struct cache_head *item = container_of(kref, struct cache_head, ref); struct unix_gid *ug = container_of(item, struct unix_gid, h); call_rcu(&ug->rcu, unix_gid_free); } static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew) { struct unix_gid *orig = container_of(corig, struct unix_gid, h); struct unix_gid *new = container_of(cnew, struct unix_gid, h); return uid_eq(orig->uid, new->uid); } static void unix_gid_init(struct cache_head *cnew, struct cache_head *citem) { struct unix_gid *new = container_of(cnew, struct unix_gid, h); struct unix_gid *item = container_of(citem, struct unix_gid, h); new->uid = item->uid; } static void unix_gid_update(struct cache_head *cnew, struct cache_head *citem) { struct unix_gid *new = container_of(cnew, struct unix_gid, h); struct unix_gid *item = container_of(citem, struct unix_gid, h); get_group_info(item->gi); new->gi = item->gi; } static struct cache_head *unix_gid_alloc(void) { struct unix_gid *g = kmalloc(sizeof(*g), GFP_KERNEL); if (g) return &g->h; else return NULL; } static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) { return sunrpc_cache_pipe_upcall_timeout(cd, h); } static void unix_gid_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) { char tuid[20]; struct unix_gid *ug = container_of(h, struct unix_gid, h); snprintf(tuid, 20, "%u", from_kuid(&init_user_ns, ug->uid)); qword_add(bpp, blen, tuid); (*bpp)[-1] = '\n'; } static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid); static int unix_gid_parse(struct cache_detail *cd, char *mesg, int mlen) { /* uid expiry Ngid gid0 gid1 ... gidN-1 */ int id; kuid_t uid; int gids; int rv; int i; int err; time64_t expiry; struct unix_gid ug, *ugp; if (mesg[mlen - 1] != '\n') return -EINVAL; mesg[mlen-1] = 0; rv = get_int(&mesg, &id); if (rv) return -EINVAL; uid = make_kuid(current_user_ns(), id); ug.uid = uid; err = get_expiry(&mesg, &expiry); if (err) return err; rv = get_int(&mesg, &gids); if (rv || gids < 0 || gids > 8192) return -EINVAL; ug.gi = groups_alloc(gids); if (!ug.gi) return -ENOMEM; for (i = 0 ; i < gids ; i++) { int gid; kgid_t kgid; rv = get_int(&mesg, &gid); err = -EINVAL; if (rv) goto out; kgid = make_kgid(current_user_ns(), gid); if (!gid_valid(kgid)) goto out; ug.gi->gid[i] = kgid; } groups_sort(ug.gi); ugp = unix_gid_lookup(cd, uid); if (ugp) { struct cache_head *ch; ug.h.flags = 0; ug.h.expiry_time = expiry; ch = sunrpc_cache_update(cd, &ug.h, &ugp->h, unix_gid_hash(uid)); if (!ch) err = -ENOMEM; else { err = 0; cache_put(ch, cd); } } else err = -ENOMEM; out: if (ug.gi) put_group_info(ug.gi); return err; } static int unix_gid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) { struct user_namespace *user_ns = m->file->f_cred->user_ns; struct unix_gid *ug; int i; int glen; if (h == NULL) { seq_puts(m, "#uid cnt: gids...\n"); return 0; } ug = container_of(h, struct unix_gid, h); if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) glen = ug->gi->ngroups; else glen = 0; seq_printf(m, "%u %d:", from_kuid_munged(user_ns, ug->uid), glen); for (i = 0; i < glen; i++) seq_printf(m, " %d", from_kgid_munged(user_ns, ug->gi->gid[i])); seq_printf(m, "\n"); return 0; } static const struct cache_detail unix_gid_cache_template = { .owner = THIS_MODULE, .hash_size = GID_HASHMAX, .name = "auth.unix.gid", .cache_put = unix_gid_put, .cache_upcall = unix_gid_upcall, .cache_request = unix_gid_request, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, .match = unix_gid_match, .init = unix_gid_init, .update = unix_gid_update, .alloc = unix_gid_alloc, }; int unix_gid_cache_create(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct cache_detail *cd; int err; cd = cache_create_net(&unix_gid_cache_template, net); if (IS_ERR(cd)) return PTR_ERR(cd); err = cache_register_net(cd, net); if (err) { cache_destroy_net(cd, net); return err; } sn->unix_gid_cache = cd; return 0; } void unix_gid_cache_destroy(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct cache_detail *cd = sn->unix_gid_cache; sn->unix_gid_cache = NULL; cache_purge(cd); cache_unregister_net(cd, net); cache_destroy_net(cd, net); } static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid) { struct unix_gid ug; struct cache_head *ch; ug.uid = uid; ch = sunrpc_cache_lookup_rcu(cd, &ug.h, unix_gid_hash(uid)); if (ch) return container_of(ch, struct unix_gid, h); else return NULL; } static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp) { struct unix_gid *ug; struct group_info *gi; int ret; struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id); ug = unix_gid_lookup(sn->unix_gid_cache, uid); if (!ug) return ERR_PTR(-EAGAIN); ret = cache_check(sn->unix_gid_cache, &ug->h, &rqstp->rq_chandle); switch (ret) { case -ENOENT: return ERR_PTR(-ENOENT); case -ETIMEDOUT: return ERR_PTR(-ESHUTDOWN); case 0: gi = get_group_info(ug->gi); cache_put(&ug->h, sn->unix_gid_cache); return gi; default: return ERR_PTR(-EAGAIN); } } enum svc_auth_status svcauth_unix_set_client(struct svc_rqst *rqstp) { struct sockaddr_in *sin; struct sockaddr_in6 *sin6, sin6_storage; struct ip_map *ipm; struct group_info *gi; struct svc_cred *cred = &rqstp->rq_cred; struct svc_xprt *xprt = rqstp->rq_xprt; struct net *net = xprt->xpt_net; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); switch (rqstp->rq_addr.ss_family) { case AF_INET: sin = svc_addr_in(rqstp); sin6 = &sin6_storage; ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &sin6->sin6_addr); break; case AF_INET6: sin6 = svc_addr_in6(rqstp); break; default: BUG(); } rqstp->rq_client = NULL; if (rqstp->rq_proc == 0) goto out; rqstp->rq_auth_stat = rpc_autherr_badcred; ipm = ip_map_cached_get(xprt); if (ipm == NULL) ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_programs->pg_class, &sin6->sin6_addr); if (ipm == NULL) return SVC_DENIED; switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { default: BUG(); case -ETIMEDOUT: return SVC_CLOSE; case -EAGAIN: return SVC_DROP; case -ENOENT: return SVC_DENIED; case 0: rqstp->rq_client = &ipm->m_client->h; kref_get(&rqstp->rq_client->ref); ip_map_cached_put(xprt, ipm); break; } gi = unix_gid_find(cred->cr_uid, rqstp); switch (PTR_ERR(gi)) { case -EAGAIN: return SVC_DROP; case -ESHUTDOWN: return SVC_CLOSE; case -ENOENT: break; default: put_group_info(cred->cr_group_info); cred->cr_group_info = gi; } out: rqstp->rq_auth_stat = rpc_auth_ok; return SVC_OK; } EXPORT_SYMBOL_GPL(svcauth_unix_set_client); /** * svcauth_null_accept - Decode and validate incoming RPC_AUTH_NULL credential * @rqstp: RPC transaction * * Return values: * %SVC_OK: Both credential and verifier are valid * %SVC_DENIED: Credential or verifier is not valid * %SVC_GARBAGE: Failed to decode credential or verifier * %SVC_CLOSE: Temporary failure * * rqstp->rq_auth_stat is set as mandated by RFC 5531. */ static enum svc_auth_status svcauth_null_accept(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct svc_cred *cred = &rqstp->rq_cred; u32 flavor, len; void *body; /* Length of Call's credential body field: */ if (xdr_stream_decode_u32(xdr, &len) < 0) return SVC_GARBAGE; if (len != 0) { rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } /* Call's verf field: */ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0) return SVC_GARBAGE; if (flavor != RPC_AUTH_NULL || len != 0) { rqstp->rq_auth_stat = rpc_autherr_badverf; return SVC_DENIED; } /* Signal that mapping to nobody uid/gid is required */ cred->cr_uid = INVALID_UID; cred->cr_gid = INVALID_GID; cred->cr_group_info = groups_alloc(0); if (cred->cr_group_info == NULL) return SVC_CLOSE; /* kmalloc failure - client must retry */ if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, RPC_AUTH_NULL, NULL, 0) < 0) return SVC_CLOSE; if (!svcxdr_set_accept_stat(rqstp)) return SVC_CLOSE; rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL; return SVC_OK; } static int svcauth_null_release(struct svc_rqst *rqstp) { if (rqstp->rq_client) auth_domain_put(rqstp->rq_client); rqstp->rq_client = NULL; if (rqstp->rq_cred.cr_group_info) put_group_info(rqstp->rq_cred.cr_group_info); rqstp->rq_cred.cr_group_info = NULL; return 0; /* don't drop */ } struct auth_ops svcauth_null = { .name = "null", .owner = THIS_MODULE, .flavour = RPC_AUTH_NULL, .accept = svcauth_null_accept, .release = svcauth_null_release, .set_client = svcauth_unix_set_client, }; /** * svcauth_tls_accept - Decode and validate incoming RPC_AUTH_TLS credential * @rqstp: RPC transaction * * Return values: * %SVC_OK: Both credential and verifier are valid * %SVC_DENIED: Credential or verifier is not valid * %SVC_GARBAGE: Failed to decode credential or verifier * %SVC_CLOSE: Temporary failure * * rqstp->rq_auth_stat is set as mandated by RFC 5531. */ static enum svc_auth_status svcauth_tls_accept(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct svc_cred *cred = &rqstp->rq_cred; struct svc_xprt *xprt = rqstp->rq_xprt; u32 flavor, len; void *body; __be32 *p; /* Length of Call's credential body field: */ if (xdr_stream_decode_u32(xdr, &len) < 0) return SVC_GARBAGE; if (len != 0) { rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } /* Call's verf field: */ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0) return SVC_GARBAGE; if (flavor != RPC_AUTH_NULL || len != 0) { rqstp->rq_auth_stat = rpc_autherr_badverf; return SVC_DENIED; } /* AUTH_TLS is not valid on non-NULL procedures */ if (rqstp->rq_proc != 0) { rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } /* Signal that mapping to nobody uid/gid is required */ cred->cr_uid = INVALID_UID; cred->cr_gid = INVALID_GID; cred->cr_group_info = groups_alloc(0); if (cred->cr_group_info == NULL) return SVC_CLOSE; if (xprt->xpt_ops->xpo_handshake) { p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); if (!p) return SVC_CLOSE; trace_svc_tls_start(xprt); *p++ = rpc_auth_null; *p++ = cpu_to_be32(8); memcpy(p, "STARTTLS", 8); set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); } else { trace_svc_tls_unavailable(xprt); if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, RPC_AUTH_NULL, NULL, 0) < 0) return SVC_CLOSE; } if (!svcxdr_set_accept_stat(rqstp)) return SVC_CLOSE; rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS; return SVC_OK; } struct auth_ops svcauth_tls = { .name = "tls", .owner = THIS_MODULE, .flavour = RPC_AUTH_TLS, .accept = svcauth_tls_accept, .release = svcauth_null_release, .set_client = svcauth_unix_set_client, }; /** * svcauth_unix_accept - Decode and validate incoming RPC_AUTH_SYS credential * @rqstp: RPC transaction * * Return values: * %SVC_OK: Both credential and verifier are valid * %SVC_DENIED: Credential or verifier is not valid * %SVC_GARBAGE: Failed to decode credential or verifier * %SVC_CLOSE: Temporary failure * * rqstp->rq_auth_stat is set as mandated by RFC 5531. */ static enum svc_auth_status svcauth_unix_accept(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct svc_cred *cred = &rqstp->rq_cred; struct user_namespace *userns; u32 flavor, len, i; void *body; __be32 *p; /* * This implementation ignores the length of the Call's * credential body field and the timestamp and machinename * fields. */ p = xdr_inline_decode(xdr, XDR_UNIT * 3); if (!p) return SVC_GARBAGE; len = be32_to_cpup(p + 2); if (len > RPC_MAX_MACHINENAME) return SVC_GARBAGE; if (!xdr_inline_decode(xdr, len)) return SVC_GARBAGE; /* * Note: we skip uid_valid()/gid_valid() checks here for * backwards compatibility with clients that use -1 id's. * Instead, -1 uid or gid is later mapped to the * (export-specific) anonymous id by nfsd_setuser. * Supplementary gid's will be left alone. */ userns = (rqstp->rq_xprt && rqstp->rq_xprt->xpt_cred) ? rqstp->rq_xprt->xpt_cred->user_ns : &init_user_ns; if (xdr_stream_decode_u32(xdr, &i) < 0) return SVC_GARBAGE; cred->cr_uid = make_kuid(userns, i); if (xdr_stream_decode_u32(xdr, &i) < 0) return SVC_GARBAGE; cred->cr_gid = make_kgid(userns, i); if (xdr_stream_decode_u32(xdr, &len) < 0) return SVC_GARBAGE; if (len > UNX_NGROUPS) goto badcred; p = xdr_inline_decode(xdr, XDR_UNIT * len); if (!p) return SVC_GARBAGE; cred->cr_group_info = groups_alloc(len); if (cred->cr_group_info == NULL) return SVC_CLOSE; for (i = 0; i < len; i++) { kgid_t kgid = make_kgid(userns, be32_to_cpup(p++)); cred->cr_group_info->gid[i] = kgid; } groups_sort(cred->cr_group_info); /* Call's verf field: */ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0) return SVC_GARBAGE; if (flavor != RPC_AUTH_NULL || len != 0) { rqstp->rq_auth_stat = rpc_autherr_badverf; return SVC_DENIED; } if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, RPC_AUTH_NULL, NULL, 0) < 0) return SVC_CLOSE; if (!svcxdr_set_accept_stat(rqstp)) return SVC_CLOSE; rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX; return SVC_OK; badcred: rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } static int svcauth_unix_release(struct svc_rqst *rqstp) { /* Verifier (such as it is) is already in place. */ if (rqstp->rq_client) auth_domain_put(rqstp->rq_client); rqstp->rq_client = NULL; if (rqstp->rq_cred.cr_group_info) put_group_info(rqstp->rq_cred.cr_group_info); rqstp->rq_cred.cr_group_info = NULL; return 0; } struct auth_ops svcauth_unix = { .name = "unix", .owner = THIS_MODULE, .flavour = RPC_AUTH_UNIX, .accept = svcauth_unix_accept, .release = svcauth_unix_release, .domain_release = svcauth_unix_domain_release, .set_client = svcauth_unix_set_client, }; static const struct cache_detail ip_map_cache_template = { .owner = THIS_MODULE, .hash_size = IP_HASHMAX, .name = "auth.unix.ip", .cache_put = ip_map_put, .cache_upcall = ip_map_upcall, .cache_request = ip_map_request, .cache_parse = ip_map_parse, .cache_show = ip_map_show, .match = ip_map_match, .init = ip_map_init, .update = update, .alloc = ip_map_alloc, }; int ip_map_cache_create(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct cache_detail *cd; int err; cd = cache_create_net(&ip_map_cache_template, net); if (IS_ERR(cd)) return PTR_ERR(cd); err = cache_register_net(cd, net); if (err) { cache_destroy_net(cd, net); return err; } sn->ip_map_cache = cd; return 0; } void ip_map_cache_destroy(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct cache_detail *cd = sn->ip_map_cache; sn->ip_map_cache = NULL; cache_purge(cd); cache_unregister_net(cd, net); cache_destroy_net(cd, net); }
14 11 3 1 4 4 3 2 1 3 3 3 3 2 3 3 148 146 72 72 63 9 8 8 8 6 6 6 6 4 4 4 55 3 52 9 7 52 52 2 1 49 49 49 2 1 60 57 2 1 58 1 57 5 1 13 9 52 58 2 2 47 40 1 1 1 1 1 1 1 1 6 1 48 48 47 2 48 2 41 6 47 1 5 5 1 1 5 6 5 1 6 41 46 46 1 1 46 46 1 1 58 55 54 3 55 29 29 1 29 1 28 28 16 7 6 6 16 16 1 1 16 45 1 9 50 51 59 1 5 54 1 1 2 51 38 1 9 3 47 3 10 53 60 61 1 59 2 51 51 37 2 49 13 7 42 42 42 42 47 49 41 47 47 47 5 42 5 47 5 47 47 5 1 11 66 6 10 1 61 13 58 1 59 3 47 6 63 9 44 7 1 1 11 38 22 3 27 49 37 8 2 47 4 43 47 89 89 89 89 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 // SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. */ #include <uapi/linux/magic.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/xattr.h> #include <linux/mount.h> #include <linux/parser.h> #include <linux/module.h> #include <linux/statfs.h> #include <linux/seq_file.h> #include <linux/posix_acl_xattr.h> #include <linux/exportfs.h> #include <linux/file.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include "overlayfs.h" #include "params.h" MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Overlay filesystem"); MODULE_LICENSE("GPL"); struct ovl_dir_cache; static struct dentry *ovl_d_real(struct dentry *dentry, enum d_real_type type) { struct dentry *upper, *lower; int err; switch (type) { case D_REAL_DATA: case D_REAL_METADATA: break; default: goto bug; } if (!d_is_reg(dentry)) { /* d_real_inode() is only relevant for regular files */ return dentry; } upper = ovl_dentry_upper(dentry); if (upper && (type == D_REAL_METADATA || ovl_has_upperdata(d_inode(dentry)))) return upper; if (type == D_REAL_METADATA) { lower = ovl_dentry_lower(dentry); goto real_lower; } /* * Best effort lazy lookup of lowerdata for D_REAL_DATA case to return * the real lowerdata dentry. The only current caller of d_real() with * D_REAL_DATA is d_real_inode() from trace_uprobe and this caller is * likely going to be followed reading from the file, before placing * uprobes on offset within the file, so lowerdata should be available * when setting the uprobe. */ err = ovl_verify_lowerdata(dentry); if (err) goto bug; lower = ovl_dentry_lowerdata(dentry); if (!lower) goto bug; real_lower: /* Handle recursion into stacked lower fs */ return d_real(lower, type); bug: WARN(1, "%s(%pd4, %d): real dentry not found\n", __func__, dentry, type); return dentry; } static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) { int ret = 1; if (!d) return 1; if (weak) { if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) ret = d->d_op->d_weak_revalidate(d, flags); } else if (d->d_flags & DCACHE_OP_REVALIDATE) { struct dentry *parent; struct inode *dir; struct name_snapshot n; if (flags & LOOKUP_RCU) { parent = READ_ONCE(d->d_parent); dir = d_inode_rcu(parent); if (!dir) return -ECHILD; } else { parent = dget_parent(d); dir = d_inode(parent); } take_dentry_name_snapshot(&n, d); ret = d->d_op->d_revalidate(dir, &n.name, d, flags); release_dentry_name_snapshot(&n); if (!(flags & LOOKUP_RCU)) dput(parent); if (!ret) { if (!(flags & LOOKUP_RCU)) d_invalidate(d); ret = -ESTALE; } } return ret; } static int ovl_dentry_revalidate_common(struct dentry *dentry, unsigned int flags, bool weak) { struct ovl_entry *oe; struct ovl_path *lowerstack; struct inode *inode = d_inode_rcu(dentry); struct dentry *upper; unsigned int i; int ret = 1; if (!inode) { /* * Lookup of negative dentries will call ovl_dentry_init_flags() * with NULL upperdentry and NULL oe, resulting in the * DCACHE_OP*_REVALIDATE flags being cleared. Hence the only * way to get a negative inode is due to a race with dentry * destruction. */ WARN_ON(!(flags & LOOKUP_RCU)); return -ECHILD; } oe = OVL_I_E(inode); lowerstack = ovl_lowerstack(oe); upper = ovl_i_dentry_upper(inode); if (upper) ret = ovl_revalidate_real(upper, flags, weak); for (i = 0; ret > 0 && i < ovl_numlower(oe); i++) ret = ovl_revalidate_real(lowerstack[i].dentry, flags, weak); return ret; } static int ovl_dentry_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { return ovl_dentry_revalidate_common(dentry, flags, false); } static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) { return ovl_dentry_revalidate_common(dentry, flags, true); } static const struct dentry_operations ovl_dentry_operations = { .d_real = ovl_d_real, .d_revalidate = ovl_dentry_revalidate, .d_weak_revalidate = ovl_dentry_weak_revalidate, }; #if IS_ENABLED(CONFIG_UNICODE) static const struct dentry_operations ovl_dentry_ci_operations = { .d_real = ovl_d_real, .d_revalidate = ovl_dentry_revalidate, .d_weak_revalidate = ovl_dentry_weak_revalidate, .d_hash = generic_ci_d_hash, .d_compare = generic_ci_d_compare, }; #endif static struct kmem_cache *ovl_inode_cachep; static struct inode *ovl_alloc_inode(struct super_block *sb) { struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL); if (!oi) return NULL; oi->cache = NULL; oi->redirect = NULL; oi->version = 0; oi->flags = 0; oi->__upperdentry = NULL; oi->lowerdata_redirect = NULL; oi->oe = NULL; mutex_init(&oi->lock); return &oi->vfs_inode; } static void ovl_free_inode(struct inode *inode) { struct ovl_inode *oi = OVL_I(inode); kfree(oi->redirect); kfree(oi->oe); mutex_destroy(&oi->lock); kmem_cache_free(ovl_inode_cachep, oi); } static void ovl_destroy_inode(struct inode *inode) { struct ovl_inode *oi = OVL_I(inode); dput(oi->__upperdentry); ovl_stack_put(ovl_lowerstack(oi->oe), ovl_numlower(oi->oe)); if (S_ISDIR(inode->i_mode)) ovl_dir_cache_free(inode); else kfree(oi->lowerdata_redirect); } static void ovl_put_super(struct super_block *sb) { struct ovl_fs *ofs = OVL_FS(sb); if (ofs) ovl_free_fs(ofs); } /* Sync real dirty inodes in upper filesystem (if it exists) */ static int ovl_sync_fs(struct super_block *sb, int wait) { struct ovl_fs *ofs = OVL_FS(sb); struct super_block *upper_sb; int ret; ret = ovl_sync_status(ofs); if (ret < 0) return -EIO; if (!ret) return ret; /* * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC). * All the super blocks will be iterated, including upper_sb. * * If this is a syncfs(2) call, then we do need to call * sync_filesystem() on upper_sb, but enough if we do it when being * called with wait == 1. */ if (!wait) return 0; upper_sb = ovl_upper_mnt(ofs)->mnt_sb; down_read(&upper_sb->s_umount); ret = sync_filesystem(upper_sb); up_read(&upper_sb->s_umount); return ret; } /** * ovl_statfs * @dentry: The dentry to query * @buf: The struct kstatfs to fill in with stats * * Get the filesystem statistics. As writes always target the upper layer * filesystem pass the statfs to the upper filesystem (if it exists) */ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct ovl_fs *ofs = OVL_FS(sb); struct dentry *root_dentry = sb->s_root; struct path path; int err; ovl_path_real(root_dentry, &path); err = vfs_statfs(&path, buf); if (!err) { buf->f_namelen = ofs->namelen; buf->f_type = OVERLAYFS_SUPER_MAGIC; if (ovl_has_fsid(ofs)) buf->f_fsid = uuid_to_fsid(sb->s_uuid.b); } return err; } static const struct super_operations ovl_super_operations = { .alloc_inode = ovl_alloc_inode, .free_inode = ovl_free_inode, .destroy_inode = ovl_destroy_inode, .drop_inode = inode_just_drop, .put_super = ovl_put_super, .sync_fs = ovl_sync_fs, .statfs = ovl_statfs, .show_options = ovl_show_options, }; #define OVL_WORKDIR_NAME "work" #define OVL_INDEXDIR_NAME "index" static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, const char *name, bool persist) { struct inode *dir = ofs->workbasedir->d_inode; struct vfsmount *mnt = ovl_upper_mnt(ofs); struct dentry *work; int err; bool retried = false; retry: work = ovl_start_creating_upper(ofs, ofs->workbasedir, &QSTR(name)); if (!IS_ERR(work)) { struct iattr attr = { .ia_valid = ATTR_MODE, .ia_mode = S_IFDIR | 0, }; if (work->d_inode) { end_creating_keep(work); if (persist) return work; err = -EEXIST; if (retried) goto out_dput; retried = true; err = ovl_workdir_cleanup(ofs, ofs->workbasedir, mnt, work, 0); dput(work); if (err == -EINVAL) return ERR_PTR(err); goto retry; } work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode); end_creating_keep(work); err = PTR_ERR(work); if (IS_ERR(work)) goto out_err; /* Weird filesystem returning with hashed negative (kernfs)? */ err = -EINVAL; if (d_really_is_negative(work)) goto out_dput; /* * Try to remove POSIX ACL xattrs from workdir. We are good if: * * a) success (there was a POSIX ACL xattr and was removed) * b) -ENODATA (there was no POSIX ACL xattr) * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) * * There are various other error values that could effectively * mean that the xattr doesn't exist (e.g. -ERANGE is returned * if the xattr name is too long), but the set of filesystems * allowed as upper are limited to "normal" ones, where checking * for the above two errors is sufficient. */ err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_DEFAULT); if (err && err != -ENODATA && err != -EOPNOTSUPP) goto out_dput; err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_ACCESS); if (err && err != -ENODATA && err != -EOPNOTSUPP) goto out_dput; /* Clear any inherited mode bits */ inode_lock(work->d_inode); err = ovl_do_notify_change(ofs, work, &attr); inode_unlock(work->d_inode); if (err) goto out_dput; } else { err = PTR_ERR(work); goto out_err; } return work; out_dput: dput(work); out_err: pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n", ofs->config.workdir, name, -err); return NULL; } static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs, const char *name) { struct kstatfs statfs; int err = vfs_statfs(path, &statfs); if (err) pr_err("statfs failed on '%s'\n", name); else ofs->namelen = max(ofs->namelen, statfs.f_namelen); return err; } static int ovl_lower_dir(const char *name, const struct path *path, struct ovl_fs *ofs, int *stack_depth) { int fh_type; int err; err = ovl_check_namelen(path, ofs, name); if (err) return err; *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); /* * The inodes index feature and NFS export need to encode and decode * file handles, so they require that all layers support them. */ fh_type = ovl_can_decode_fh(path->dentry->d_sb); if ((ofs->config.nfs_export || (ofs->config.index && ofs->config.upperdir)) && !fh_type) { ofs->config.index = false; ofs->config.nfs_export = false; pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", name); } ofs->nofh |= !fh_type; /* * Decoding origin file handle is required for persistent st_ino. * Without persistent st_ino, xino=auto falls back to xino=off. */ if (ofs->config.xino == OVL_XINO_AUTO && ofs->config.upperdir && !fh_type) { ofs->config.xino = OVL_XINO_OFF; pr_warn("fs on '%s' does not support file handles, falling back to xino=off.\n", name); } /* Check if lower fs has 32bit inode numbers */ if (fh_type != FILEID_INO32_GEN) ofs->xino_mode = -1; return 0; } /* Workdir should not be subdir of upperdir and vice versa */ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) { bool ok = false; if (workdir != upperdir) { struct dentry *trap = lock_rename(workdir, upperdir); if (!IS_ERR(trap)) unlock_rename(workdir, upperdir); ok = (trap == NULL); } return ok; } static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, struct inode **ptrap, const char *name) { struct inode *trap; int err; trap = ovl_get_trap_inode(sb, dir); err = PTR_ERR_OR_ZERO(trap); if (err) { if (err == -ELOOP) pr_err("conflicting %s path\n", name); return err; } *ptrap = trap; return 0; } /* * Determine how we treat concurrent use of upperdir/workdir based on the * index feature. This is papering over mount leaks of container runtimes, * for example, an old overlay mount is leaked and now its upperdir is * attempted to be used as a lower layer in a new overlay mount. */ static int ovl_report_in_use(struct ovl_fs *ofs, const char *name) { if (ofs->config.index) { pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", name); return -EBUSY; } else { pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", name); return 0; } } static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, struct ovl_layer *upper_layer, const struct path *upperpath) { struct vfsmount *upper_mnt; int err; /* Upperdir path should not be r/o */ if (__mnt_is_readonly(upperpath->mnt)) { pr_err("upper fs is r/o, try multi-lower layers mount\n"); err = -EINVAL; goto out; } err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir); if (err) goto out; err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap, "upperdir"); if (err) goto out; upper_mnt = clone_private_mount(upperpath); err = PTR_ERR(upper_mnt); if (IS_ERR(upper_mnt)) { pr_err("failed to clone upperpath\n"); goto out; } /* Don't inherit atime flags */ upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); upper_layer->mnt = upper_mnt; upper_layer->idx = 0; upper_layer->fsid = 0; /* * Inherit SB_NOSEC flag from upperdir. * * This optimization changes behavior when a security related attribute * (suid/sgid/security.*) is changed on an underlying layer. This is * okay because we don't yet have guarantees in that case, but it will * need careful treatment once we want to honour changes to underlying * filesystems. */ if (upper_mnt->mnt_sb->s_flags & SB_NOSEC) sb->s_flags |= SB_NOSEC; if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) { ofs->upperdir_locked = true; } else { err = ovl_report_in_use(ofs, "upperdir"); if (err) goto out; } err = 0; out: return err; } /* * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and * negative values if error is encountered. */ static int ovl_check_rename_whiteout(struct ovl_fs *ofs) { struct dentry *workdir = ofs->workdir; struct dentry *temp; struct dentry *whiteout; struct name_snapshot name; struct renamedata rd = {}; char name2[OVL_TEMPNAME_SIZE]; int err; temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0)); err = PTR_ERR(temp); if (IS_ERR(temp)) return err; rd.mnt_idmap = ovl_upper_mnt_idmap(ofs); rd.old_parent = workdir; rd.new_parent = workdir; rd.flags = RENAME_WHITEOUT; ovl_tempname(name2); err = start_renaming_dentry(&rd, 0, temp, &QSTR(name2)); if (err) { dput(temp); return err; } /* Name is inline and stable - using snapshot as a copy helper */ take_dentry_name_snapshot(&name, temp); err = ovl_do_rename_rd(&rd); end_renaming(&rd); if (err) { if (err == -EINVAL) err = 0; goto cleanup_temp; } whiteout = ovl_lookup_upper_unlocked(ofs, name.name.name, workdir, name.name.len); err = PTR_ERR(whiteout); if (IS_ERR(whiteout)) goto cleanup_temp; err = ovl_upper_is_whiteout(ofs, whiteout); /* Best effort cleanup of whiteout and temp file */ if (err) ovl_cleanup(ofs, workdir, whiteout); dput(whiteout); cleanup_temp: ovl_cleanup(ofs, workdir, temp); release_dentry_name_snapshot(&name); dput(temp); return err; } static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs, struct dentry *parent, const char *name, umode_t mode) { struct dentry *child; child = ovl_start_creating_upper(ofs, parent, &QSTR(name)); if (!IS_ERR(child)) { if (!child->d_inode) child = ovl_create_real(ofs, parent, child, OVL_CATTR(mode)); end_creating_keep(child); } dput(parent); return child; } /* * Creates $workdir/work/incompat/volatile/dirty file if it is not already * present. */ static int ovl_create_volatile_dirty(struct ovl_fs *ofs) { unsigned int ctr; struct dentry *d = dget(ofs->workbasedir); static const char *const volatile_path[] = { OVL_WORKDIR_NAME, "incompat", "volatile", "dirty" }; const char *const *name = volatile_path; for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) { d = ovl_lookup_or_create(ofs, d, *name, ctr > 1 ? S_IFDIR : S_IFREG); if (IS_ERR(d)) return PTR_ERR(d); } dput(d); return 0; } static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, const struct path *workpath) { struct vfsmount *mnt = ovl_upper_mnt(ofs); struct dentry *workdir; struct file *tmpfile; bool rename_whiteout; bool d_type; int fh_type; int err; err = mnt_want_write(mnt); if (err) return err; workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); err = PTR_ERR(workdir); if (IS_ERR_OR_NULL(workdir)) goto out; ofs->workdir = workdir; err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir"); if (err) goto out; /* * Upper should support d_type, else whiteouts are visible. Given * workdir and upper are on same fs, we can do iterate_dir() on * workdir. This check requires successful creation of workdir in * previous step. */ err = ovl_check_d_type_supported(workpath); if (err < 0) goto out; d_type = err; if (!d_type) pr_warn("upper fs needs to support d_type.\n"); /* Check if upper/work fs supports O_TMPFILE */ tmpfile = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0); ofs->tmpfile = !IS_ERR(tmpfile); if (ofs->tmpfile) fput(tmpfile); else pr_warn("upper fs does not support tmpfile.\n"); /* Check if upper/work fs supports RENAME_WHITEOUT */ err = ovl_check_rename_whiteout(ofs); if (err < 0) goto out; rename_whiteout = err; if (!rename_whiteout) pr_warn("upper fs does not support RENAME_WHITEOUT.\n"); /* * Check if upper/work fs supports (trusted|user).overlay.* xattr */ err = ovl_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1); if (err) { pr_warn("failed to set xattr on upper\n"); ofs->noxattr = true; if (ovl_redirect_follow(ofs)) { ofs->config.redirect_mode = OVL_REDIRECT_NOFOLLOW; pr_warn("...falling back to redirect_dir=nofollow.\n"); } if (ofs->config.metacopy) { ofs->config.metacopy = false; pr_warn("...falling back to metacopy=off.\n"); } if (ofs->config.index) { ofs->config.index = false; pr_warn("...falling back to index=off.\n"); } if (ovl_has_fsid(ofs)) { ofs->config.uuid = OVL_UUID_NULL; pr_warn("...falling back to uuid=null.\n"); } /* * xattr support is required for persistent st_ino. * Without persistent st_ino, xino=auto falls back to xino=off. */ if (ofs->config.xino == OVL_XINO_AUTO) { ofs->config.xino = OVL_XINO_OFF; pr_warn("...falling back to xino=off.\n"); } if (err == -EPERM && !ofs->config.userxattr) pr_info("try mounting with 'userxattr' option\n"); err = 0; } else { ovl_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE); } /* * We allowed sub-optimal upper fs configuration and don't want to break * users over kernel upgrade, but we never allowed remote upper fs, so * we can enforce strict requirements for remote upper fs. */ if (ovl_dentry_remote(ofs->workdir) && (!d_type || !rename_whiteout || ofs->noxattr)) { pr_err("upper fs missing required features.\n"); err = -EINVAL; goto out; } /* * For volatile mount, create a incompat/volatile/dirty file to keep * track of it. */ if (ofs->config.ovl_volatile) { err = ovl_create_volatile_dirty(ofs); if (err < 0) { pr_err("Failed to create volatile/dirty file.\n"); goto out; } } /* Check if upper/work fs supports file handles */ fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); if (ofs->config.index && !fh_type) { ofs->config.index = false; pr_warn("upper fs does not support file handles, falling back to index=off.\n"); } ofs->nofh |= !fh_type; /* Check if upper fs has 32bit inode numbers */ if (fh_type != FILEID_INO32_GEN) ofs->xino_mode = -1; /* NFS export of r/w mount depends on index */ if (ofs->config.nfs_export && !ofs->config.index) { pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } out: mnt_drop_write(mnt); return err; } static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, const struct path *upperpath, const struct path *workpath) { int err; err = -EINVAL; if (upperpath->mnt != workpath->mnt) { pr_err("workdir and upperdir must reside under the same mount\n"); return err; } if (!ovl_workdir_ok(workpath->dentry, upperpath->dentry)) { pr_err("workdir and upperdir must be separate subtrees\n"); return err; } ofs->workbasedir = dget(workpath->dentry); if (ovl_inuse_trylock(ofs->workbasedir)) { ofs->workdir_locked = true; } else { err = ovl_report_in_use(ofs, "workdir"); if (err) return err; } err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap, "workdir"); if (err) return err; return ovl_make_workdir(sb, ofs, workpath); } static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, struct ovl_entry *oe, const struct path *upperpath) { struct vfsmount *mnt = ovl_upper_mnt(ofs); struct dentry *indexdir; struct dentry *origin = ovl_lowerstack(oe)->dentry; const struct ovl_fh *fh; int err; fh = ovl_get_origin_fh(ofs, origin); if (IS_ERR(fh)) return PTR_ERR(fh); err = mnt_want_write(mnt); if (err) goto out_free_fh; /* Verify lower root is upper root origin */ err = ovl_verify_origin_fh(ofs, upperpath->dentry, fh, true); if (err) { pr_err("failed to verify upper root origin\n"); goto out; } /* index dir will act also as workdir */ iput(ofs->workdir_trap); ofs->workdir_trap = NULL; dput(ofs->workdir); ofs->workdir = NULL; indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); if (IS_ERR(indexdir)) { err = PTR_ERR(indexdir); } else if (indexdir) { ofs->workdir = indexdir; err = ovl_setup_trap(sb, indexdir, &ofs->workdir_trap, "indexdir"); if (err) goto out; /* * Verify upper root is exclusively associated with index dir. * Older kernels stored upper fh in ".overlay.origin" * xattr. If that xattr exists, verify that it is a match to * upper dir file handle. In any case, verify or set xattr * ".overlay.upper" to indicate that index may have * directory entries. */ if (ovl_check_origin_xattr(ofs, indexdir)) { err = ovl_verify_origin_xattr(ofs, indexdir, OVL_XATTR_ORIGIN, upperpath->dentry, true, false); if (err) pr_err("failed to verify index dir 'origin' xattr\n"); } err = ovl_verify_upper(ofs, indexdir, upperpath->dentry, true); if (err) pr_err("failed to verify index dir 'upper' xattr\n"); /* Cleanup bad/stale/orphan index entries */ if (!err) err = ovl_indexdir_cleanup(ofs); } if (err || !indexdir) pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); out: mnt_drop_write(mnt); out_free_fh: kfree(fh); return err; } static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) { unsigned int i; if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs)) return true; /* * We allow using single lower with null uuid for index and nfs_export * for example to support those features with single lower squashfs. * To avoid regressions in setups of overlay with re-formatted lower * squashfs, do not allow decoding origin with lower null uuid unless * user opted-in to one of the new features that require following the * lower inode of non-dir upper. */ if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid)) return false; for (i = 0; i < ofs->numfs; i++) { /* * We use uuid to associate an overlay lower file handle with a * lower layer, so we can accept lower fs with null uuid as long * as all lower layers with null uuid are on the same fs. * if we detect multiple lower fs with the same uuid, we * disable lower file handle decoding on all of them. */ if (ofs->fs[i].is_lower && uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) { ofs->fs[i].bad_uuid = true; return false; } } return true; } /* Get a unique fsid for the layer */ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) { struct super_block *sb = path->mnt->mnt_sb; unsigned int i; dev_t dev; int err; bool bad_uuid = false; bool warn = false; for (i = 0; i < ofs->numfs; i++) { if (ofs->fs[i].sb == sb) return i; } if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) { bad_uuid = true; if (ofs->config.xino == OVL_XINO_AUTO) { ofs->config.xino = OVL_XINO_OFF; warn = true; } if (ofs->config.index || ofs->config.nfs_export) { ofs->config.index = false; ofs->config.nfs_export = false; warn = true; } if (warn) { pr_warn("%s uuid detected in lower fs '%pd2', falling back to xino=%s,index=off,nfs_export=off.\n", uuid_is_null(&sb->s_uuid) ? "null" : "conflicting", path->dentry, ovl_xino_mode(&ofs->config)); } } err = get_anon_bdev(&dev); if (err) { pr_err("failed to get anonymous bdev for lowerpath\n"); return err; } ofs->fs[ofs->numfs].sb = sb; ofs->fs[ofs->numfs].pseudo_dev = dev; ofs->fs[ofs->numfs].bad_uuid = bad_uuid; return ofs->numfs++; } /* * The fsid after the last lower fsid is used for the data layers. * It is a "null fs" with a null sb, null uuid, and no pseudo dev. */ static int ovl_get_data_fsid(struct ovl_fs *ofs) { return ofs->numfs; } /* * Set the ovl sb encoding as the same one used by the first layer */ static int ovl_set_encoding(struct super_block *sb, struct super_block *fs_sb) { if (!sb_has_encoding(fs_sb)) return 0; #if IS_ENABLED(CONFIG_UNICODE) if (sb_has_strict_encoding(fs_sb)) { pr_err("strict encoding not supported\n"); return -EINVAL; } sb->s_encoding = fs_sb->s_encoding; sb->s_encoding_flags = fs_sb->s_encoding_flags; #endif return 0; } static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, struct ovl_fs_context *ctx, struct ovl_layer *layers) { int err; unsigned int i; size_t nr_merged_lower; ofs->fs = kcalloc(ctx->nr + 2, sizeof(struct ovl_sb), GFP_KERNEL); if (ofs->fs == NULL) return -ENOMEM; /* * idx/fsid 0 are reserved for upper fs even with lower only overlay * and the last fsid is reserved for "null fs" of the data layers. */ ofs->numfs++; /* * All lower layers that share the same fs as upper layer, use the same * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower * only overlay to simplify ovl_fs_free(). * is_lower will be set if upper fs is shared with a lower layer. */ err = get_anon_bdev(&ofs->fs[0].pseudo_dev); if (err) { pr_err("failed to get anonymous bdev for upper fs\n"); return err; } if (ovl_upper_mnt(ofs)) { ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb; ofs->fs[0].is_lower = false; if (ofs->casefold) { err = ovl_set_encoding(sb, ofs->fs[0].sb); if (err) return err; } } nr_merged_lower = ctx->nr - ctx->nr_data; for (i = 0; i < ctx->nr; i++) { struct ovl_fs_context_layer *l = &ctx->lower[i]; struct vfsmount *mnt; struct inode *trap; int fsid; if (i < nr_merged_lower) fsid = ovl_get_fsid(ofs, &l->path); else fsid = ovl_get_data_fsid(ofs); if (fsid < 0) return fsid; /* * Check if lower root conflicts with this overlay layers before * checking if it is in-use as upperdir/workdir of "another" * mount, because we do not bother to check in ovl_is_inuse() if * the upperdir/workdir is in fact in-use by our * upperdir/workdir. */ err = ovl_setup_trap(sb, l->path.dentry, &trap, "lowerdir"); if (err) return err; if (ovl_is_inuse(l->path.dentry)) { err = ovl_report_in_use(ofs, "lowerdir"); if (err) { iput(trap); return err; } } mnt = clone_private_mount(&l->path); err = PTR_ERR(mnt); if (IS_ERR(mnt)) { pr_err("failed to clone lowerpath\n"); iput(trap); return err; } /* * Make lower layers R/O. That way fchmod/fchown on lower file * will fail instead of modifying lower fs. */ mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; layers[ofs->numlayer].trap = trap; layers[ofs->numlayer].mnt = mnt; layers[ofs->numlayer].idx = ofs->numlayer; layers[ofs->numlayer].fsid = fsid; layers[ofs->numlayer].fs = &ofs->fs[fsid]; /* Store for printing lowerdir=... in ovl_show_options() */ ofs->config.lowerdirs[ofs->numlayer] = l->name; l->name = NULL; ofs->numlayer++; ofs->fs[fsid].is_lower = true; if (ofs->casefold) { if (!ovl_upper_mnt(ofs) && !sb_has_encoding(sb)) { err = ovl_set_encoding(sb, ofs->fs[fsid].sb); if (err) return err; } if (!sb_same_encoding(sb, mnt->mnt_sb)) { pr_err("all layers must have the same encoding\n"); return -EINVAL; } } } /* * When all layers on same fs, overlay can use real inode numbers. * With mount option "xino=<on|auto>", mounter declares that there are * enough free high bits in underlying fs to hold the unique fsid. * If overlayfs does encounter underlying inodes using the high xino * bits reserved for fsid, it emits a warning and uses the original * inode number or a non persistent inode number allocated from a * dedicated range. */ if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) { if (ofs->config.xino == OVL_XINO_ON) pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); ofs->xino_mode = 0; } else if (ofs->config.xino == OVL_XINO_OFF) { ofs->xino_mode = -1; } else if (ofs->xino_mode < 0) { /* * This is a roundup of number of bits needed for encoding * fsid, where fsid 0 is reserved for upper fs (even with * lower only overlay) +1 extra bit is reserved for the non * persistent inode number range that is used for resolving * xino lower bits overflow. */ BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30); ofs->xino_mode = ilog2(ofs->numfs - 1) + 2; } if (ofs->xino_mode > 0) { pr_info("\"xino\" feature enabled using %d upper inode bits.\n", ofs->xino_mode); } return 0; } static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, struct ovl_fs_context *ctx, struct ovl_fs *ofs, struct ovl_layer *layers) { int err; unsigned int i; size_t nr_merged_lower; struct ovl_entry *oe; struct ovl_path *lowerstack; struct ovl_fs_context_layer *l; if (!ofs->config.upperdir && ctx->nr == 1) { pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n"); return ERR_PTR(-EINVAL); } if (ctx->nr == ctx->nr_data) { pr_err("at least one non-data lowerdir is required\n"); return ERR_PTR(-EINVAL); } err = -EINVAL; for (i = 0; i < ctx->nr; i++) { l = &ctx->lower[i]; err = ovl_lower_dir(l->name, &l->path, ofs, &sb->s_stack_depth); if (err) return ERR_PTR(err); } err = -EINVAL; sb->s_stack_depth++; if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { pr_err("maximum fs stacking depth exceeded\n"); return ERR_PTR(err); } err = ovl_get_layers(sb, ofs, ctx, layers); if (err) return ERR_PTR(err); err = -ENOMEM; /* Data-only layers are not merged in root directory */ nr_merged_lower = ctx->nr - ctx->nr_data; oe = ovl_alloc_entry(nr_merged_lower); if (!oe) return ERR_PTR(err); lowerstack = ovl_lowerstack(oe); for (i = 0; i < nr_merged_lower; i++) { l = &ctx->lower[i]; lowerstack[i].dentry = dget(l->path.dentry); lowerstack[i].layer = &ofs->layers[i + 1]; } ofs->numdatalayer = ctx->nr_data; return oe; } /* * Check if this layer root is a descendant of: * - another layer of this overlayfs instance * - upper/work dir of any overlayfs instance */ static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, struct dentry *dentry, const char *name, bool is_lower) { struct dentry *next = dentry, *parent; int err = 0; if (!dentry) return 0; parent = dget_parent(next); /* Walk back ancestors to root (inclusive) looking for traps */ while (!err && parent != next) { if (is_lower && ovl_lookup_trap_inode(sb, parent)) { err = -ELOOP; pr_err("overlapping %s path\n", name); } else if (ovl_is_inuse(parent)) { err = ovl_report_in_use(ofs, name); } next = parent; parent = dget_parent(next); dput(next); } dput(parent); return err; } /* * Check if any of the layers or work dirs overlap. */ static int ovl_check_overlapping_layers(struct super_block *sb, struct ovl_fs *ofs) { int i, err; if (ovl_upper_mnt(ofs)) { err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root, "upperdir", false); if (err) return err; /* * Checking workbasedir avoids hitting ovl_is_inuse(parent) of * this instance and covers overlapping work and index dirs, * unless work or index dir have been moved since created inside * workbasedir. In that case, we already have their traps in * inode cache and we will catch that case on lookup. */ err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir", false); if (err) return err; } for (i = 1; i < ofs->numlayer; i++) { err = ovl_check_layer(sb, ofs, ofs->layers[i].mnt->mnt_root, "lowerdir", true); if (err) return err; } return 0; } static struct dentry *ovl_get_root(struct super_block *sb, struct dentry *upperdentry, struct ovl_entry *oe) { struct dentry *root; struct ovl_fs *ofs = OVL_FS(sb); struct ovl_path *lowerpath = ovl_lowerstack(oe); unsigned long ino = d_inode(lowerpath->dentry)->i_ino; int fsid = lowerpath->layer->fsid; struct ovl_inode_params oip = { .upperdentry = upperdentry, .oe = oe, }; root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); if (!root) return NULL; if (upperdentry) { /* Root inode uses upper st_ino/i_ino */ ino = d_inode(upperdentry)->i_ino; fsid = 0; ovl_dentry_set_upper_alias(root); if (ovl_is_impuredir(sb, upperdentry)) ovl_set_flag(OVL_IMPURE, d_inode(root)); } /* Look for xwhiteouts marker except in the lowermost layer */ for (int i = 0; i < ovl_numlower(oe) - 1; i++, lowerpath++) { struct path path = { .mnt = lowerpath->layer->mnt, .dentry = lowerpath->dentry, }; /* overlay.opaque=x means xwhiteouts directory */ if (ovl_get_opaquedir_val(ofs, &path) == 'x') { ovl_layer_set_xwhiteouts(ofs, lowerpath->layer); ovl_dentry_set_xwhiteouts(root); } } /* Root is always merge -> can have whiteouts */ ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); ovl_dentry_set_flag(OVL_E_CONNECTED, root); ovl_set_upperdata(d_inode(root)); ovl_inode_init(d_inode(root), &oip, ino, fsid); WARN_ON(!!IS_CASEFOLDED(d_inode(root)) != ofs->casefold); ovl_dentry_init_flags(root, upperdentry, oe, DCACHE_OP_WEAK_REVALIDATE); /* root keeps a reference of upperdentry */ dget(upperdentry); return root; } static void ovl_set_d_op(struct super_block *sb) { #if IS_ENABLED(CONFIG_UNICODE) struct ovl_fs *ofs = sb->s_fs_info; if (ofs->casefold) { set_default_d_op(sb, &ovl_dentry_ci_operations); return; } #endif set_default_d_op(sb, &ovl_dentry_operations); } static int ovl_fill_super_creds(struct fs_context *fc, struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; struct cred *creator_cred = (struct cred *)ofs->creator_cred; struct ovl_fs_context *ctx = fc->fs_private; struct ovl_layer *layers; struct ovl_entry *oe = NULL; int err; err = ovl_fs_params_verify(ctx, &ofs->config); if (err) return err; err = -EINVAL; if (ctx->nr == 0) { if (!(fc->sb_flags & SB_SILENT)) pr_err("missing 'lowerdir'\n"); return err; } err = -ENOMEM; layers = kcalloc(ctx->nr + 1, sizeof(struct ovl_layer), GFP_KERNEL); if (!layers) return err; ofs->config.lowerdirs = kcalloc(ctx->nr + 1, sizeof(char *), GFP_KERNEL); if (!ofs->config.lowerdirs) { kfree(layers); return err; } ofs->layers = layers; /* * Layer 0 is reserved for upper even if there's no upper. * config.lowerdirs[0] is used for storing the user provided colon * separated lowerdir string. */ ofs->config.lowerdirs[0] = ctx->lowerdir_all; ctx->lowerdir_all = NULL; ofs->numlayer = 1; sb->s_stack_depth = 0; sb->s_maxbytes = MAX_LFS_FILESIZE; atomic_long_set(&ofs->last_ino, 1); /* Assume underlying fs uses 32bit inodes unless proven otherwise */ if (ofs->config.xino != OVL_XINO_OFF) { ofs->xino_mode = BITS_PER_LONG - 32; if (!ofs->xino_mode) { pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n"); ofs->config.xino = OVL_XINO_OFF; } } /* alloc/destroy_inode needed for setting up traps in inode cache */ sb->s_op = &ovl_super_operations; if (ofs->config.upperdir) { struct super_block *upper_sb; err = -EINVAL; if (!ofs->config.workdir) { pr_err("missing 'workdir'\n"); return err; } err = ovl_get_upper(sb, ofs, &layers[0], &ctx->upper); if (err) return err; upper_sb = ovl_upper_mnt(ofs)->mnt_sb; if (!ovl_should_sync(ofs)) { ofs->errseq = errseq_sample(&upper_sb->s_wb_err); if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) { err = -EIO; pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n"); return err; } } err = ovl_get_workdir(sb, ofs, &ctx->upper, &ctx->work); if (err) return err; if (!ofs->workdir) sb->s_flags |= SB_RDONLY; sb->s_stack_depth = upper_sb->s_stack_depth; sb->s_time_gran = upper_sb->s_time_gran; } oe = ovl_get_lowerstack(sb, ctx, ofs, layers); err = PTR_ERR(oe); if (IS_ERR(oe)) return err; /* If the upper fs is nonexistent, we mark overlayfs r/o too */ if (!ovl_upper_mnt(ofs)) sb->s_flags |= SB_RDONLY; if (!ovl_origin_uuid(ofs) && ofs->numfs > 1) { pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=null.\n"); ofs->config.uuid = OVL_UUID_NULL; } else if (ovl_has_fsid(ofs) && ovl_upper_mnt(ofs)) { /* Use per instance persistent uuid/fsid */ ovl_init_uuid_xattr(sb, ofs, &ctx->upper); } if (!ovl_force_readonly(ofs) && ofs->config.index) { err = ovl_get_indexdir(sb, ofs, oe, &ctx->upper); if (err) goto out_free_oe; /* Force r/o mount with no index dir */ if (!ofs->workdir) sb->s_flags |= SB_RDONLY; } err = ovl_check_overlapping_layers(sb, ofs); if (err) goto out_free_oe; /* Show index=off in /proc/mounts for forced r/o mount */ if (!ofs->workdir) { ofs->config.index = false; if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) { pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } } if (ofs->config.metacopy && ofs->config.nfs_export) { pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); ofs->config.nfs_export = false; } /* * Support encoding decodable file handles with nfs_export=on * and encoding non-decodable file handles with nfs_export=off * if all layers support file handles. */ if (ofs->config.nfs_export) sb->s_export_op = &ovl_export_operations; else if (!ofs->nofh) sb->s_export_op = &ovl_export_fid_operations; /* Never override disk quota limits or use reserved space */ cap_lower(creator_cred->cap_effective, CAP_SYS_RESOURCE); sb->s_magic = OVERLAYFS_SUPER_MAGIC; sb->s_xattr = ovl_xattr_handlers(ofs); sb->s_fs_info = ofs; #ifdef CONFIG_FS_POSIX_ACL sb->s_flags |= SB_POSIXACL; #endif sb->s_iflags |= SB_I_SKIP_SYNC; /* * Ensure that umask handling is done by the filesystems used * for the the upper layer instead of overlayfs as that would * lead to unexpected results. */ sb->s_iflags |= SB_I_NOUMASK; sb->s_iflags |= SB_I_EVM_HMAC_UNSUPPORTED; err = -ENOMEM; sb->s_root = ovl_get_root(sb, ctx->upper.dentry, oe); if (!sb->s_root) goto out_free_oe; return 0; out_free_oe: ovl_free_entry(oe); return err; } int ovl_fill_super(struct super_block *sb, struct fs_context *fc) { struct ovl_fs *ofs = sb->s_fs_info; int err; err = -EIO; if (WARN_ON(fc->user_ns != current_user_ns())) goto out_err; ovl_set_d_op(sb); if (!ofs->creator_cred) { err = -ENOMEM; ofs->creator_cred = prepare_creds(); if (!ofs->creator_cred) goto out_err; } with_ovl_creds(sb) err = ovl_fill_super_creds(fc, sb); out_err: if (err) { ovl_free_fs(ofs); sb->s_fs_info = NULL; } return err; } struct file_system_type ovl_fs_type = { .owner = THIS_MODULE, .name = "overlay", .init_fs_context = ovl_init_fs_context, .parameters = ovl_parameter_spec, .fs_flags = FS_USERNS_MOUNT, .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("overlay"); static void ovl_inode_init_once(void *foo) { struct ovl_inode *oi = foo; inode_init_once(&oi->vfs_inode); } static int __init ovl_init(void) { int err; ovl_inode_cachep = kmem_cache_create("ovl_inode", sizeof(struct ovl_inode), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT), ovl_inode_init_once); if (ovl_inode_cachep == NULL) return -ENOMEM; err = register_filesystem(&ovl_fs_type); if (!err) return 0; kmem_cache_destroy(ovl_inode_cachep); return err; } static void __exit ovl_exit(void) { unregister_filesystem(&ovl_fs_type); /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(ovl_inode_cachep); } module_init(ovl_init); module_exit(ovl_exit);
263 55 8 55 54 42 13 15 14 35 31 6 42 15 15 3 20 13 3 3 3 3 3 3 3 3 3 12 11 2 20 12 12 27 1 7 4 9 12 6 29 2 62 62 59 61 61 7 1 2 56 55 38 1 47 42 47 1 19 8 3 5 3 8 46 22 8 4 12 12 26 12 37 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 /* SPDX-License-Identifier: GPL-2.0 */ /* Multipath TCP * * Copyright (c) 2017 - 2019, Intel Corporation. */ #ifndef __MPTCP_PROTOCOL_H #define __MPTCP_PROTOCOL_H #include <linux/random.h> #include <net/tcp.h> #include <net/inet_connection_sock.h> #include <uapi/linux/mptcp.h> #include <net/genetlink.h> #include <net/rstreason.h> #define MPTCP_SUPPORTED_VERSION 1 /* MPTCP option bits */ #define OPTION_MPTCP_MPC_SYN BIT(0) #define OPTION_MPTCP_MPC_SYNACK BIT(1) #define OPTION_MPTCP_MPC_ACK BIT(2) #define OPTION_MPTCP_MPJ_SYN BIT(3) #define OPTION_MPTCP_MPJ_SYNACK BIT(4) #define OPTION_MPTCP_MPJ_ACK BIT(5) #define OPTION_MPTCP_ADD_ADDR BIT(6) #define OPTION_MPTCP_RM_ADDR BIT(7) #define OPTION_MPTCP_FASTCLOSE BIT(8) #define OPTION_MPTCP_PRIO BIT(9) #define OPTION_MPTCP_RST BIT(10) #define OPTION_MPTCP_DSS BIT(11) #define OPTION_MPTCP_FAIL BIT(12) #define OPTION_MPTCP_CSUMREQD BIT(13) #define OPTIONS_MPTCP_MPC (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \ OPTION_MPTCP_MPC_ACK) #define OPTIONS_MPTCP_MPJ (OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \ OPTION_MPTCP_MPJ_ACK) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 #define MPTCPOPT_MP_JOIN 1 #define MPTCPOPT_DSS 2 #define MPTCPOPT_ADD_ADDR 3 #define MPTCPOPT_RM_ADDR 4 #define MPTCPOPT_MP_PRIO 5 #define MPTCPOPT_MP_FAIL 6 #define MPTCPOPT_MP_FASTCLOSE 7 #define MPTCPOPT_RST 8 /* MPTCP suboption lengths */ #define TCPOLEN_MPTCP_MPC_SYN 4 #define TCPOLEN_MPTCP_MPC_SYNACK 12 #define TCPOLEN_MPTCP_MPC_ACK 20 #define TCPOLEN_MPTCP_MPC_ACK_DATA 22 #define TCPOLEN_MPTCP_MPJ_SYN 12 #define TCPOLEN_MPTCP_MPJ_SYNACK 16 #define TCPOLEN_MPTCP_MPJ_ACK 24 #define TCPOLEN_MPTCP_DSS_BASE 4 #define TCPOLEN_MPTCP_DSS_ACK32 4 #define TCPOLEN_MPTCP_DSS_ACK64 8 #define TCPOLEN_MPTCP_DSS_MAP32 10 #define TCPOLEN_MPTCP_DSS_MAP64 14 #define TCPOLEN_MPTCP_DSS_CHECKSUM 2 #define TCPOLEN_MPTCP_ADD_ADDR 16 #define TCPOLEN_MPTCP_ADD_ADDR_PORT 18 #define TCPOLEN_MPTCP_ADD_ADDR_BASE 8 #define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10 #define TCPOLEN_MPTCP_ADD_ADDR6 28 #define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 #define TCPOLEN_MPTCP_PORT_LEN 2 #define TCPOLEN_MPTCP_PORT_ALIGN 2 #define TCPOLEN_MPTCP_RM_ADDR_BASE 3 #define TCPOLEN_MPTCP_PRIO 3 #define TCPOLEN_MPTCP_PRIO_ALIGN 4 #define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_RST 4 #define TCPOLEN_MPTCP_FAIL 12 #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA) /* MPTCP MP_JOIN flags */ #define MPTCPOPT_BACKUP BIT(0) #define MPTCPOPT_THMAC_LEN 8 /* MPTCP MP_CAPABLE flags */ #define MPTCP_VERSION_MASK (0x0F) #define MPTCP_CAP_CHECKSUM_REQD BIT(7) #define MPTCP_CAP_EXTENSIBILITY BIT(6) #define MPTCP_CAP_DENY_JOIN_ID0 BIT(5) #define MPTCP_CAP_HMAC_SHA256 BIT(0) #define MPTCP_CAP_FLAG_MASK (0x1F) /* MPTCP DSS flags */ #define MPTCP_DSS_DATA_FIN BIT(4) #define MPTCP_DSS_DSN64 BIT(3) #define MPTCP_DSS_HAS_MAP BIT(2) #define MPTCP_DSS_ACK64 BIT(1) #define MPTCP_DSS_HAS_ACK BIT(0) #define MPTCP_DSS_FLAG_MASK (0x1F) /* MPTCP ADD_ADDR flags */ #define MPTCP_ADDR_ECHO BIT(0) /* MPTCP MP_PRIO flags */ #define MPTCP_PRIO_BKUP BIT(0) /* MPTCP TCPRST flags */ #define MPTCP_RST_TRANSIENT BIT(0) /* MPTCP socket atomic flags */ #define MPTCP_WORK_RTX 1 #define MPTCP_FALLBACK_DONE 2 #define MPTCP_WORK_CLOSE_SUBFLOW 3 /* MPTCP socket release cb flags */ #define MPTCP_PUSH_PENDING 1 #define MPTCP_CLEAN_UNA 2 #define MPTCP_ERROR_REPORT 3 #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 #define MPTCP_SYNC_STATE 6 #define MPTCP_SYNC_SNDBUF 7 struct mptcp_skb_cb { u64 map_seq; u64 end_seq; u32 offset; u8 has_rxtstamp; u8 cant_coalesce; }; #define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0])) static inline bool before64(__u64 seq1, __u64 seq2) { return (__s64)(seq1 - seq2) < 0; } #define after64(seq2, seq1) before64(seq1, seq2) struct mptcp_options_received { u64 sndr_key; u64 rcvr_key; u64 data_ack; u64 data_seq; u32 subflow_seq; u16 data_len; __sum16 csum; struct_group(status, u16 suboptions; u16 use_map:1, dsn64:1, data_fin:1, use_ack:1, ack64:1, mpc_map:1, reset_reason:4, reset_transient:1, echo:1, backup:1, deny_join_id0:1, __unused:2; ); u8 join_id; u32 token; u32 nonce; u64 thmac; u8 hmac[MPTCPOPT_HMAC_LEN]; struct mptcp_addr_info addr; struct mptcp_rm_list rm_list; u64 ahmac; u64 fail_seq; }; static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) { return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) | ((nib & 0xF) << 8) | field); } enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_RECEIVED, MPTCP_PM_ADD_ADDR_SEND_ACK, MPTCP_PM_RM_ADDR_RECEIVED, MPTCP_PM_ESTABLISHED, MPTCP_PM_SUBFLOW_ESTABLISHED, MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */ MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is * accounted int id_avail_bitmap */ }; enum mptcp_pm_type { MPTCP_PM_TYPE_KERNEL = 0, MPTCP_PM_TYPE_USERSPACE, __MPTCP_PM_TYPE_NR, __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1, }; /* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */ #define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1) enum mptcp_addr_signal_status { MPTCP_ADD_ADDR_SIGNAL, MPTCP_ADD_ADDR_ECHO, MPTCP_RM_ADDR_SIGNAL, }; /* max value of mptcp_addr_info.id */ #define MPTCP_PM_MAX_ADDR_ID U8_MAX struct mptcp_pm_data { struct mptcp_addr_info local; struct mptcp_addr_info remote; struct list_head anno_list; struct list_head userspace_pm_local_addr_list; spinlock_t lock; /*protects the whole PM data */ struct_group(reset, u8 addr_signal; bool server_side; bool work_pending; bool accept_addr; bool accept_subflow; bool remote_deny_join_id0; u8 add_addr_signaled; u8 add_addr_accepted; u8 local_addr_used; u8 pm_type; u8 extra_subflows; u8 status; ); DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_rm_list rm_list_tx; struct mptcp_rm_list rm_list_rx; }; struct mptcp_pm_local { struct mptcp_addr_info addr; u8 flags; int ifindex; }; struct mptcp_pm_addr_entry { struct list_head list; struct mptcp_addr_info addr; u8 flags; int ifindex; struct socket *lsk; }; struct mptcp_data_frag { struct list_head list; u64 data_seq; u16 data_len; u16 offset; u16 overhead; u16 already_sent; struct page *page; }; /* MPTCP connection sock */ struct mptcp_sock { /* inet_connection_sock must be the first member */ struct inet_connection_sock sk; u64 local_key; /* protected by the first subflow socket lock * lockless access read */ u64 remote_key; /* same as above */ u64 write_seq; u64 bytes_sent; u64 snd_nxt; u64 bytes_received; u64 ack_seq; atomic64_t rcv_wnd_sent; u64 rcv_data_fin_seq; u64 bytes_retrans; u64 bytes_consumed; int snd_burst; int old_wspace; u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; * recovery related fields are under data_lock * protection */ u64 bytes_acked; u64 snd_una; u64 wnd_end; u32 last_data_sent; u32 last_data_recv; u32 last_ack_recv; unsigned long timer_ival; u32 token; unsigned long flags; unsigned long cb_flags; bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; bool rcv_data_fin; bool snd_data_fin_enable; bool rcv_fastclose; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; u8 pending_state; /* A subflow asked to set this sk_state, * protected by the msk data lock */ u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, nodelay:1, fastopening:1, in_accept_queue:1, free_first:1, rcvspace_init:1, fastclosing:1; u32 notsent_lowat; int keepalive_cnt; int keepalive_idle; int keepalive_intvl; int maxseg; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; struct list_head conn_list; struct list_head rtx_queue; struct mptcp_data_frag *first_pending; struct list_head join_list; struct sock *first; /* The mptcp ops can safely dereference, using suitable * ONCE annotation, the subflow outside the socket * lock as such sock is freed after close(). */ struct mptcp_pm_data pm; struct mptcp_sched_ops *sched; struct { int space; /* bytes copied in last measurement window */ int copied; /* bytes copied in this measurement window */ u64 time; /* start time of measurement window */ u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; u8 scaling_ratio; bool allow_subflows; u32 subflow_id; u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; spinlock_t fallback_lock; /* protects fallback, * allow_infinite_fallback and * allow_join */ struct list_head backlog_list; /* protected by the data lock */ u32 backlog_len; u32 backlog_unaccounted; }; #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) #define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock) #define mptcp_for_each_subflow(__msk, __subflow) \ list_for_each_entry(__subflow, &((__msk)->conn_list), node) #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) #define mptcp_next_subflow(__msk, __subflow) \ list_next_entry_circular(__subflow, &((__msk)->conn_list), node) extern struct genl_family mptcp_genl_family; static inline void msk_owned_by_me(const struct mptcp_sock *msk) { sock_owned_by_me((const struct sock *)msk); } #ifdef CONFIG_DEBUG_NET /* MPTCP-specific: we might (indirectly) call this helper with the wrong sk */ #undef tcp_sk #define tcp_sk(ptr) ({ \ typeof(ptr) _ptr = (ptr); \ WARN_ON(_ptr->sk_protocol != IPPROTO_TCP); \ container_of_const(_ptr, struct tcp_sock, inet_conn.icsk_inet.sk); \ }) #define mptcp_sk(ptr) ({ \ typeof(ptr) _ptr = (ptr); \ WARN_ON(_ptr->sk_protocol != IPPROTO_MPTCP); \ container_of_const(_ptr, struct mptcp_sock, sk.icsk_inet.sk); \ }) #else /* !CONFIG_DEBUG_NET */ #define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) #endif static inline int mptcp_win_from_space(const struct sock *sk, int space) { return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space); } static inline int mptcp_space_from_win(const struct sock *sk, int win) { return __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, win); } static inline int __mptcp_space(const struct sock *sk) { return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - READ_ONCE(mptcp_sk(sk)->backlog_len) - sk_rmem_alloc_get(sk)); } static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) { const struct mptcp_sock *msk = mptcp_sk(sk); return msk->first_pending; } static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *cur; cur = msk->first_pending; return list_is_last(&cur->list, &msk->rtx_queue) ? NULL : list_next_entry(cur, list); } static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) { const struct mptcp_sock *msk = mptcp_sk(sk); if (!msk->first_pending) return NULL; if (WARN_ON_ONCE(list_empty(&msk->rtx_queue))) return NULL; return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); } static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); if (msk->snd_una == msk->snd_nxt) return NULL; return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); } struct csum_pseudo_header { __be64 data_seq; __be32 subflow_seq; __be16 data_len; __sum16 csum; }; struct mptcp_subflow_request_sock { struct tcp_request_sock sk; u16 mp_capable : 1, mp_join : 1, backup : 1, request_bkup : 1, csum_reqd : 1, allow_join_id0 : 1; u8 local_id; u8 remote_id; u64 local_key; u64 idsn; u32 token; u32 ssn_offset; u64 thmac; u32 local_nonce; u32 remote_nonce; struct mptcp_sock *msk; struct hlist_nulls_node token_node; }; static inline struct mptcp_subflow_request_sock * mptcp_subflow_rsk(const struct request_sock *rsk) { return (struct mptcp_subflow_request_sock *)rsk; } struct mptcp_delegated_action { struct napi_struct napi; local_lock_t bh_lock; struct list_head head; }; DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); #define MPTCP_DELEGATE_SCHEDULED 0 #define MPTCP_DELEGATE_SEND 1 #define MPTCP_DELEGATE_ACK 2 #define MPTCP_DELEGATE_SNDBUF 3 #define MPTCP_DELEGATE_ACTIONS_MASK (~BIT(MPTCP_DELEGATE_SCHEDULED)) /* MPTCP subflow context */ struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ struct_group(reset, unsigned long avg_pacing_rate; /* protected by msk socket lock */ u64 local_key; u64 remote_key; u64 idsn; u64 map_seq; u64 rcv_wnd_sent; u32 snd_isn; u32 token; u32 rel_write_seq; u32 map_subflow_seq; u32 ssn_offset; u32 map_data_len; __wsum map_data_csum; u32 map_csum_len; u32 request_mptcp : 1, /* send MP_CAPABLE */ request_join : 1, /* send MP_JOIN */ request_bkup : 1, mp_capable : 1, /* remote is MPTCP capable */ mp_join : 1, /* remote is JOINing */ pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, map_csum_reqd : 1, map_data_fin : 1, mpc_map : 1, backup : 1, send_mp_prio : 1, send_mp_fail : 1, send_fastclose : 1, send_infinite_map : 1, remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ closing : 1, /* must not pass rx data to msk anymore */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ close_event_done : 1, /* has done the post-closed part */ mpc_drop : 1, /* the MPC option has been dropped in a rtx */ __unused : 8; bool data_avail; bool scheduled; bool pm_listener; /* a listener managed by the kernel PM? */ bool fully_established; /* path validated */ u32 lent_mem_frag; u32 remote_nonce; u64 thmac; u32 local_nonce; u32 remote_token; union { u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */ u64 iasn; /* initial ack sequence number, MPC subflows only */ }; s16 local_id; /* if negative not initialized yet */ u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; u8 reset_reason:4; u8 stale_count; u32 subflow_id; long delegated_status; unsigned long fail_tout; ); struct list_head delegated_node; /* link into delegated_action, protected by local BH */ u32 setsockopt_seq; u32 stale_rcv_tstamp; int cached_sndbuf; /* sndbuf size when last synced with the msk sndbuf, * protected by the msk socket lock */ struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ const struct inet_connection_sock_af_ops *icsk_af_ops; void (*tcp_state_change)(struct sock *sk); void (*tcp_error_report)(struct sock *sk); struct rcu_head rcu; }; static inline struct mptcp_subflow_context * mptcp_subflow_ctx(const struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); /* Use RCU on icsk_ulp_data only for sock diag code */ return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data; } static inline struct sock * mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) { return subflow->tcp_sock; } static inline void mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; WRITE_ONCE(subflow->local_id, -1); } /* Convert reset reasons in MPTCP to enum sk_rst_reason type */ static inline enum sk_rst_reason sk_rst_convert_mptcp_reason(u32 reason) { switch (reason) { case MPTCP_RST_EUNSPEC: return SK_RST_REASON_MPTCP_RST_EUNSPEC; case MPTCP_RST_EMPTCP: return SK_RST_REASON_MPTCP_RST_EMPTCP; case MPTCP_RST_ERESOURCE: return SK_RST_REASON_MPTCP_RST_ERESOURCE; case MPTCP_RST_EPROHIBIT: return SK_RST_REASON_MPTCP_RST_EPROHIBIT; case MPTCP_RST_EWQ2BIG: return SK_RST_REASON_MPTCP_RST_EWQ2BIG; case MPTCP_RST_EBADPERF: return SK_RST_REASON_MPTCP_RST_EBADPERF; case MPTCP_RST_EMIDDLEBOX: return SK_RST_REASON_MPTCP_RST_EMIDDLEBOX; default: /* It should not happen, or else errors may occur * in MPTCP layer */ return SK_RST_REASON_ERROR; } } static inline void mptcp_send_active_reset_reason(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); enum sk_rst_reason reason; reason = sk_rst_convert_mptcp_reason(subflow->reset_reason); tcp_send_active_reset(sk, GFP_ATOMIC, reason); } /* Made the fwd mem carried by the given skb available to the msk, * To be paired with a previous mptcp_subflow_lend_fwdmem() before freeing * the skb or setting the skb ownership. */ static inline void mptcp_borrow_fwdmem(struct sock *sk, struct sk_buff *skb) { struct sock *ssk = skb->sk; /* The subflow just lend the skb fwd memory; if the subflow meanwhile * closed, mptcp_close_ssk() already released the ssk rcv memory. */ DEBUG_NET_WARN_ON_ONCE(skb->destructor); sk_forward_alloc_add(sk, skb->truesize); if (!ssk) return; atomic_sub(skb->truesize, &ssk->sk_rmem_alloc); skb->sk = NULL; } static inline void __mptcp_subflow_lend_fwdmem(struct mptcp_subflow_context *subflow, int size) { int frag = (subflow->lent_mem_frag + size) & (PAGE_SIZE - 1); subflow->lent_mem_frag = frag; } static inline void mptcp_subflow_lend_fwdmem(struct mptcp_subflow_context *subflow, struct sk_buff *skb) { __mptcp_subflow_lend_fwdmem(subflow, skb->truesize); skb->destructor = NULL; } static inline u64 mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) { return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq - subflow->ssn_offset - subflow->map_subflow_seq; } static inline u64 mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) { return subflow->map_seq + mptcp_subflow_get_map_offset(subflow); } void mptcp_subflow_process_delegated(struct sock *ssk, long actions); static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action) { long old, set_bits = BIT(MPTCP_DELEGATE_SCHEDULED) | BIT(action); struct mptcp_delegated_action *delegated; bool schedule; /* the caller held the subflow bh socket lock */ lockdep_assert_in_softirq(); /* The implied barrier pairs with tcp_release_cb_override() * mptcp_napi_poll(), and ensures the below list check sees list * updates done prior to delegated status bits changes */ old = set_mask_bits(&subflow->delegated_status, 0, set_bits); if (!(old & BIT(MPTCP_DELEGATE_SCHEDULED))) { if (WARN_ON_ONCE(!list_empty(&subflow->delegated_node))) return; local_lock_nested_bh(&mptcp_delegated_actions.bh_lock); delegated = this_cpu_ptr(&mptcp_delegated_actions); schedule = list_empty(&delegated->head); list_add_tail(&subflow->delegated_node, &delegated->head); local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); sock_hold(mptcp_subflow_tcp_sock(subflow)); if (schedule) napi_schedule(&delegated->napi); } } static inline struct mptcp_subflow_context * mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated) { struct mptcp_subflow_context *ret; local_lock_nested_bh(&mptcp_delegated_actions.bh_lock); if (list_empty(&delegated->head)) { local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); return NULL; } ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node); list_del_init(&ret->delegated_node); local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); return ret; } void __mptcp_inherit_memcg(struct sock *sk, struct sock *ssk, gfp_t gfp); void __mptcp_inherit_cgrp_data(struct sock *sk, struct sock *ssk); int mptcp_is_enabled(const struct net *net); unsigned int mptcp_get_add_addr_timeout(const struct net *net); int mptcp_is_checksum_enabled(const struct net *net); int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_path_manager(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); void mptcp_active_disable(struct sock *sk); bool mptcp_active_should_disable(struct sock *ssk); void mptcp_active_enable(struct sock *sk); void mptcp_get_available_schedulers(char *buf, size_t maxlen); void __mptcp_subflow_fully_established(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); void mptcp_check_and_set_pending(struct sock *sk); void __mptcp_push_pending(struct sock *sk, unsigned int flags); bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void __mptcp_unaccepted_force_close(struct sock *sk); void mptcp_set_state(struct sock *sk, int state); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr); void mptcp_remote_address(const struct sock_common *skc, struct mptcp_addr_info *addr); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, const struct mptcp_addr_info *remote); int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, struct socket **new_sock); void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); struct mptcp_sched_ops *mptcp_sched_find(const char *name); int mptcp_validate_scheduler(struct mptcp_sched_ops *sched); int mptcp_register_scheduler(struct mptcp_sched_ops *sched); void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); void mptcp_sched_init(void); int mptcp_init_sched(struct mptcp_sock *msk, struct mptcp_sched_ops *sched); void mptcp_release_sched(struct mptcp_sock *msk); void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled); struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk); struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk); int mptcp_sched_get_send(struct mptcp_sock *msk); int mptcp_sched_get_retrans(struct mptcp_sock *msk); static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) { return READ_ONCE(msk->bytes_received) - READ_ONCE(msk->bytes_consumed); } static inline bool mptcp_epollin_ready(const struct sock *sk) { u64 data_avail = mptcp_data_avail(mptcp_sk(sk)); if (!data_avail) return false; /* mptcp doesn't have to deal with small skbs in the receive queue, * as it can always coalesce them */ return (data_avail >= sk->sk_rcvlowat) || tcp_under_memory_pressure(sk); } int mptcp_set_rcvlowat(struct sock *sk, int val); static inline bool __tcp_can_send(const struct sock *ssk) { /* only send if our side has not closed yet */ return ((1 << inet_sk_state_load(ssk)) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); } static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) { /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ if (subflow->request_join && !READ_ONCE(subflow->fully_established)) return false; return __tcp_can_send(mptcp_subflow_tcp_sock(subflow)); } void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow); bool mptcp_subflow_active(struct mptcp_subflow_context *subflow); void mptcp_subflow_drop_ctx(struct sock *ssk); static inline void mptcp_subflow_tcp_fallback(struct sock *sk, struct mptcp_subflow_context *ctx) { sk->sk_data_ready = sock_def_readable; sk->sk_state_change = ctx->tcp_state_change; sk->sk_write_space = sk_stream_write_space; sk->sk_error_report = ctx->tcp_error_report; inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; } void __init mptcp_proto_init(void); #if IS_ENABLED(CONFIG_MPTCP_IPV6) int __init mptcp_proto_v6_init(void); #endif struct sock *mptcp_sk_clone_init(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct sock *ssk, struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); void __mptcp_sync_state(struct sock *sk, int state); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); static inline void mptcp_stop_tout_timer(struct sock *sk) { if (!inet_csk(sk)->icsk_mtup.probe_timestamp) return; sk_stop_timer(sk, &inet_csk(sk)->mptcp_tout_timer); inet_csk(sk)->icsk_mtup.probe_timestamp = 0; } static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout) { /* avoid 0 timestamp, as that means no close timeout */ inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1; } static inline void mptcp_start_tout_timer(struct sock *sk) { mptcp_set_close_tout(sk, tcp_jiffies32); mptcp_reset_tout_timer(mptcp_sk(sk), 0); } static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && READ_ONCE(mptcp_sk(sk)->fully_established); } void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); bool mptcp_schedule_work(struct sock *sk); int mptcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int mptcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option); u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq); static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit) { if (use_64bit) return cur_seq; return __mptcp_expand_seq(old_seq, cur_seq); } void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void __mptcp_error_report(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) { return READ_ONCE(msk->snd_data_fin_enable) && READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } static inline u32 mptcp_notsent_lowat(const struct sock *sk) { struct net *net = sock_net(sk); u32 val; val = READ_ONCE(mptcp_sk(sk)->notsent_lowat); return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake) { const struct mptcp_sock *msk = mptcp_sk(sk); u32 notsent_bytes; notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt); return (notsent_bytes << wake) < mptcp_notsent_lowat(sk); } static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake) { return mptcp_stream_memory_free(sk, wake) && __sk_stream_is_writeable(sk, wake); } static inline void mptcp_write_space(struct sock *sk) { /* pairs with memory barrier in mptcp_poll */ smp_mb(); if (mptcp_stream_memory_free(sk, 1)) sk_stream_write_space(sk); } static inline void __mptcp_sync_sndbuf(struct sock *sk) { struct mptcp_subflow_context *subflow; int ssk_sndbuf, new_sndbuf; if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) return; new_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]); mptcp_for_each_subflow(mptcp_sk(sk), subflow) { ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf); subflow->cached_sndbuf = ssk_sndbuf; new_sndbuf += ssk_sndbuf; } /* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */ WRITE_ONCE(sk->sk_sndbuf, new_sndbuf); mptcp_write_space(sk); } /* The called held both the msk socket and the subflow socket locks, * possibly under BH */ static inline void __mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); if (READ_ONCE(ssk->sk_sndbuf) != subflow->cached_sndbuf) __mptcp_sync_sndbuf(sk); } /* the caller held only the subflow socket lock, either in process or * BH context. Additionally this can be called under the msk data lock, * so we can't acquire such lock here: let the delegate action acquires * the needed locks in suitable order. */ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); if (likely(READ_ONCE(ssk->sk_sndbuf) == subflow->cached_sndbuf)) return; local_bh_disable(); mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_SNDBUF); local_bh_enable(); } #define MPTCP_TOKEN_MAX_RETRIES 4 void __init mptcp_token_init(void); static inline void mptcp_token_init_request(struct request_sock *req) { mptcp_subflow_rsk(req)->token_node.pprev = NULL; } int mptcp_token_new_request(struct request_sock *req); void mptcp_token_destroy_request(struct request_sock *req); int mptcp_token_new_connect(struct sock *ssk); void mptcp_token_accept(struct mptcp_subflow_request_sock *r, struct mptcp_sock *msk); bool mptcp_token_exists(u32 token); struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token); struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, long *s_num); void mptcp_token_destroy(struct mptcp_sock *msk); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_data_reset(struct mptcp_sock *msk); void mptcp_pm_destroy(struct mptcp_sock *msk); int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, struct mptcp_addr_info *addr); int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, bool require_family, struct mptcp_pm_addr_entry *entry); bool mptcp_pm_addr_families_match(const struct sock *sk, const struct mptcp_addr_info *loc, const struct mptcp_addr_info *rem); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk); bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk); void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct mptcp_subflow_context *subflow); void mptcp_pm_add_addr_received(const struct sock *ssk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, bool prio, bool backup); void mptcp_pm_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id); void mptcp_pm_rm_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info *addr, struct mptcp_addr_info *rem, u8 bkup); bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool check_id); bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, const struct mptcp_addr_info *saddr); bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, struct genl_info *info); int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, struct genl_info *info); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry); /* the default path manager, used in mptcp_pm_unregister */ extern struct mptcp_pm_ops mptcp_pm_kernel; struct mptcp_pm_ops *mptcp_pm_find(const char *name); int mptcp_pm_register(struct mptcp_pm_ops *pm_ops); void mptcp_pm_unregister(struct mptcp_pm_ops *pm_ops); int mptcp_pm_validate(struct mptcp_pm_ops *pm_ops); void mptcp_pm_get_available(char *buf, size_t maxlen); void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); void mptcp_event_pm_listener(const struct sock *ssk, enum mptcp_event_type event); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); int mptcp_pm_genl_fill_addr(struct sk_buff *msg, struct netlink_callback *cb, struct mptcp_pm_addr_entry *entry); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & (BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO)); } static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); } static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); } static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL); } static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk) { return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE; } static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk) { return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL; } static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; if (family == AF_INET6) len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; if (!echo) len += MPTCPOPT_THMAC_LEN; /* account for 2 trailing 'nop' options */ if (port) len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN; return len; } static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list) { if (rm_list->nr == 0 || rm_list->nr > MPTCP_RM_IDS_MAX) return -EINVAL; return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1; } bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, unsigned int opt_size, unsigned int remaining, struct mptcp_addr_info *addr, bool *echo, bool *drop_other_suboptions); bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *skc); bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info); int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info); static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) { int local_id = READ_ONCE(subflow->local_id); if (local_id < 0) return 0; return local_id; } void __init mptcp_pm_kernel_register(void); void __init mptcp_pm_userspace_register(void); void __init mptcp_pm_nl_init(void); void mptcp_pm_worker(struct mptcp_sock *msk); void __mptcp_pm_kernel_worker(struct mptcp_sock *msk); u8 mptcp_pm_get_endp_signal_max(const struct mptcp_sock *msk); u8 mptcp_pm_get_endp_subflow_max(const struct mptcp_sock *msk); u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk); u8 mptcp_pm_get_endp_fullmesh_max(const struct mptcp_sock *msk); u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk); u8 mptcp_pm_get_limit_extra_subflows(const struct mptcp_sock *msk); /* called under PM lock */ static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk) { if (--msk->pm.extra_subflows < mptcp_pm_get_limit_extra_subflows(msk)) WRITE_ONCE(msk->pm.accept_subflow, true); } static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk) { spin_lock_bh(&msk->pm.lock); __mptcp_pm_close_subflow(msk); spin_unlock_bh(&msk->pm.lock); } static inline bool mptcp_pm_add_addr_c_flag_case(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.remote_deny_join_id0) && msk->pm.local_addr_used == 0 && mptcp_pm_get_limit_add_addr_accepted(msk) == 0 && msk->pm.extra_subflows < mptcp_pm_get_limit_extra_subflows(msk); } void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk); static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb) { return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP); } void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops); static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk) { return test_bit(MPTCP_FALLBACK_DONE, &msk->flags); } static inline bool mptcp_check_fallback(const struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); return __mptcp_check_fallback(msk); } static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) { struct sock *ssk = READ_ONCE(msk->first); return ssk && ((1 << inet_sk_state_load(ssk)) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_LISTEN)); } bool __mptcp_try_fallback(struct mptcp_sock *msk, int fb_mib); static inline bool mptcp_try_fallback(struct sock *ssk, int fb_mib) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; struct mptcp_sock *msk; msk = mptcp_sk(sk); if (!__mptcp_try_fallback(msk, fb_mib)) return false; if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) { gfp_t saved_allocation = ssk->sk_allocation; /* we are in a atomic (BH) scope, override ssk default for data * fin allocation */ ssk->sk_allocation = GFP_ATOMIC; ssk->sk_shutdown |= SEND_SHUTDOWN; tcp_shutdown(ssk, SEND_SHUTDOWN); ssk->sk_allocation = saved_allocation; } return true; } static inline void mptcp_early_fallback(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, int fb_mib) { subflow->request_mptcp = 0; WARN_ON_ONCE(!__mptcp_try_fallback(msk, fb_mib)); } static inline bool mptcp_check_infinite_map(struct sk_buff *skb) { struct mptcp_ext *mpext; mpext = skb ? mptcp_get_ext(skb) : NULL; if (mpext && mpext->infinite_map) return true; return false; } static inline bool is_active_ssk(struct mptcp_subflow_context *subflow) { return (subflow->request_mptcp || subflow->request_join); } static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); /* Note that the sk state implies !subflow->conn_finished. */ return sk->sk_state == TCP_SYN_RECV && is_active_ssk(subflow); } #ifdef CONFIG_SYN_COOKIES void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb); bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb); void __init mptcp_join_cookie_init(void); #else static inline void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb) {} static inline bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb) { return false; } static inline void mptcp_join_cookie_init(void) {} #endif #endif /* __MPTCP_PROTOCOL_H */
2 241 167 82 16 972 6 9200 22 21 3 8381 796 56 1178 143 16 5 2 8898 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Berkeley style UIO structures - Alan Cox 1994. */ #ifndef __LINUX_UIO_H #define __LINUX_UIO_H #include <linux/kernel.h> #include <linux/mm_types.h> #include <linux/ucopysize.h> #include <uapi/linux/uio.h> struct page; struct folio_queue; typedef unsigned int __bitwise iov_iter_extraction_t; struct kvec { void *iov_base; /* and that should *never* hold a userland pointer */ size_t iov_len; }; enum iter_type { /* iter types */ ITER_UBUF, ITER_IOVEC, ITER_BVEC, ITER_KVEC, ITER_FOLIOQ, ITER_XARRAY, ITER_DISCARD, }; #define ITER_SOURCE 1 // == WRITE #define ITER_DEST 0 // == READ struct iov_iter_state { size_t iov_offset; size_t count; unsigned long nr_segs; }; struct iov_iter { u8 iter_type; bool nofault; bool data_source; size_t iov_offset; /* * Hack alert: overlay ubuf_iovec with iovec + count, so * that the members resolve correctly regardless of the type * of iterator used. This means that you can use: * * &iter->__ubuf_iovec or iter->__iov * * interchangably for the user_backed cases, hence simplifying * some of the cases that need to deal with both. */ union { /* * This really should be a const, but we cannot do that without * also modifying any of the zero-filling iter init functions. * Leave it non-const for now, but it should be treated as such. */ struct iovec __ubuf_iovec; struct { union { /* use iter_iov() to get the current vec */ const struct iovec *__iov; const struct kvec *kvec; const struct bio_vec *bvec; const struct folio_queue *folioq; struct xarray *xarray; void __user *ubuf; }; size_t count; }; }; union { unsigned long nr_segs; u8 folioq_slot; loff_t xarray_start; }; }; typedef __u16 uio_meta_flags_t; struct uio_meta { uio_meta_flags_t flags; u16 app_tag; u64 seed; struct iov_iter iter; }; static inline const struct iovec *iter_iov(const struct iov_iter *iter) { if (iter->iter_type == ITER_UBUF) return (const struct iovec *) &iter->__ubuf_iovec; return iter->__iov; } #define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset) static inline size_t iter_iov_len(const struct iov_iter *i) { if (i->iter_type == ITER_UBUF) return i->count; return iter_iov(i)->iov_len - i->iov_offset; } static inline enum iter_type iov_iter_type(const struct iov_iter *i) { return i->iter_type; } static inline void iov_iter_save_state(struct iov_iter *iter, struct iov_iter_state *state) { state->iov_offset = iter->iov_offset; state->count = iter->count; state->nr_segs = iter->nr_segs; } static inline bool iter_is_ubuf(const struct iov_iter *i) { return iov_iter_type(i) == ITER_UBUF; } static inline bool iter_is_iovec(const struct iov_iter *i) { return iov_iter_type(i) == ITER_IOVEC; } static inline bool iov_iter_is_kvec(const struct iov_iter *i) { return iov_iter_type(i) == ITER_KVEC; } static inline bool iov_iter_is_bvec(const struct iov_iter *i) { return iov_iter_type(i) == ITER_BVEC; } static inline bool iov_iter_is_discard(const struct iov_iter *i) { return iov_iter_type(i) == ITER_DISCARD; } static inline bool iov_iter_is_folioq(const struct iov_iter *i) { return iov_iter_type(i) == ITER_FOLIOQ; } static inline bool iov_iter_is_xarray(const struct iov_iter *i) { return iov_iter_type(i) == ITER_XARRAY; } static inline unsigned char iov_iter_rw(const struct iov_iter *i) { return i->data_source ? WRITE : READ; } static inline bool user_backed_iter(const struct iov_iter *i) { return iter_is_ubuf(i) || iter_is_iovec(i); } /* * Total number of bytes covered by an iovec. * * NOTE that it is not safe to use this function until all the iovec's * segment lengths have been validated. Because the individual lengths can * overflow a size_t when added together. */ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) { unsigned long seg; size_t ret = 0; for (seg = 0; seg < nr_segs; seg++) ret += iov[seg].iov_len; return ret; } void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes); size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset, size_t bytes, struct iov_iter *i); size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, size_t bytes, struct iov_iter *i) { return copy_page_to_iter(&folio->page, offset, bytes, i); } static inline size_t copy_folio_from_iter(struct folio *folio, size_t offset, size_t bytes, struct iov_iter *i) { return copy_page_from_iter(&folio->page, offset, bytes, i); } size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes, struct iov_iter *i); static __always_inline __must_check size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (check_copy_size(addr, bytes, true)) return _copy_to_iter(addr, bytes, i); return 0; } static __always_inline __must_check size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { if (check_copy_size(addr, bytes, false)) return _copy_from_iter(addr, bytes, i); return 0; } static __always_inline __must_check bool copy_to_iter_full(const void *addr, size_t bytes, struct iov_iter *i) { size_t copied = copy_to_iter(addr, bytes, i); if (likely(copied == bytes)) return true; iov_iter_revert(i, copied); return false; } static __always_inline __must_check bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) { size_t copied = copy_from_iter(addr, bytes, i); if (likely(copied == bytes)) return true; iov_iter_revert(i, copied); return false; } static __always_inline __must_check size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { if (check_copy_size(addr, bytes, false)) return _copy_from_iter_nocache(addr, bytes, i); return 0; } static __always_inline __must_check bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) { size_t copied = copy_from_iter_nocache(addr, bytes, i); if (likely(copied == bytes)) return true; iov_iter_revert(i, copied); return false; } #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /* * Note, users like pmem that depend on the stricter semantics of * _copy_from_iter_flushcache() than _copy_from_iter_nocache() must check for * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the * destination is flushed from the cache on return. */ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #else #define _copy_from_iter_flushcache _copy_from_iter_nocache #endif #ifdef CONFIG_ARCH_HAS_COPY_MC size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); #else #define _copy_mc_to_iter _copy_to_iter #endif size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec, unsigned long nr_segs, size_t count); void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count); void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction, const struct folio_queue *folioq, unsigned int first_slot, unsigned int offset, size_t count); void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, loff_t start, size_t count); ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); static inline size_t iov_iter_count(const struct iov_iter *i) { return i->count; } /* * Cap the iov_iter by given limit; note that the second argument is * *not* the new size - it's upper limit for such. Passing it a value * greater than the amount of data in iov_iter is fine - it'll just do * nothing in that case. */ static inline void iov_iter_truncate(struct iov_iter *i, u64 count) { /* * count doesn't have to fit in size_t - comparison extends both * operands to u64 here and any value that would be truncated by * conversion in assignement is by definition greater than all * values of size_t, including old i->count. */ if (i->count > count) i->count = count; } /* * reexpand a previously truncated iterator; count must be no more than how much * we had shrunk it. */ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { i->count = count; } static inline int iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) { size_t shorted = 0; int npages; if (iov_iter_count(i) > max_bytes) { shorted = iov_iter_count(i) - max_bytes; iov_iter_truncate(i, max_bytes); } npages = iov_iter_npages(i, maxpages); if (shorted) iov_iter_reexpand(i, iov_iter_count(i) + shorted); return npages; } struct iovec *iovec_from_user(const struct iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_iov, bool compat); ssize_t import_iovec(int type, const struct iovec __user *uvec, unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, struct iov_iter *i); ssize_t __import_iovec(int type, const struct iovec __user *uvec, unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, struct iov_iter *i, bool compat); int import_ubuf(int type, void __user *buf, size_t len, struct iov_iter *i); static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, void __user *buf, size_t count) { WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter) { .iter_type = ITER_UBUF, .data_source = direction, .ubuf = buf, .count = count, .nr_segs = 1 }; } /* Flags for iov_iter_get/extract_pages*() */ /* Allow P2PDMA on the extracted pages */ #define ITER_ALLOW_P2PDMA ((__force iov_iter_extraction_t)0x01) ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, iov_iter_extraction_t extraction_flags, size_t *offset0); /** * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained * @iter: The iterator * * Examine the iterator and indicate by returning true or false as to how, if * at all, pages extracted from the iterator will be retained by the extraction * function. * * %true indicates that the pages will have a pin placed in them that the * caller must unpin. This is must be done for DMA/async DIO to force fork() * to forcibly copy a page for the child (the parent must retain the original * page). * * %false indicates that no measures are taken and that it's up to the caller * to retain the pages. */ static inline bool iov_iter_extract_will_pin(const struct iov_iter *iter) { return user_backed_iter(iter); } struct sg_table; ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t len, struct sg_table *sgtable, unsigned int sg_max, iov_iter_extraction_t extraction_flags); #endif
12 13 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 // SPDX-License-Identifier: GPL-2.0-or-later /* * ip_vs_ftp.c: IPVS ftp application module * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * Changes: * * Most code here is taken from ip_masq_ftp.c in kernel 2.2. The difference * is that ip_vs_ftp module handles the reverse direction to ip_masq_ftp. * * IP_MASQ_FTP ftp masquerading module * * Version: @(#)ip_masq_ftp.c 0.04 02/05/96 * * Author: Wouter Gadeyne */ #define pr_fmt(fmt) "IPVS: " fmt #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/ctype.h> #include <linux/inet.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> #include <linux/gfp.h> #include <net/protocol.h> #include <net/tcp.h> #include <linux/unaligned.h> #include <net/ip_vs.h> #define SERVER_STRING_PASV "227 " #define CLIENT_STRING_PORT "PORT" #define SERVER_STRING_EPSV "229 " #define CLIENT_STRING_EPRT "EPRT" enum { IP_VS_FTP_ACTIVE = 0, IP_VS_FTP_PORT = 0, IP_VS_FTP_PASV, IP_VS_FTP_EPRT, IP_VS_FTP_EPSV, }; static bool exiting_module; /* * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper * First port is set to the default port. */ static unsigned int ports_count = 1; static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0}; module_param_array(ports, ushort, &ports_count, 0444); MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands"); static char *ip_vs_ftp_data_ptr(struct sk_buff *skb, struct ip_vs_iphdr *ipvsh) { struct tcphdr *th = (struct tcphdr *)((char *)skb->data + ipvsh->len); if ((th->doff << 2) < sizeof(struct tcphdr)) return NULL; return (char *)th + (th->doff << 2); } static int ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) { /* We use connection tracking for the command connection */ cp->flags |= IP_VS_CONN_F_NFCT; return 0; } static int ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) { return 0; } /* Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started * with the "pattern". <addr,port> is in network order. * Parse extended format depending on ext. In this case addr can be pre-set. */ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, const char *pattern, size_t plen, char skip, bool ext, int mode, union nf_inet_addr *addr, __be16 *port, __u16 af, char **start, char **end) { char *s, c; unsigned char p[6]; char edelim; __u16 hport; int i = 0; if (data_limit - data < plen) { /* check if there is partial match */ if (strncasecmp(data, pattern, data_limit - data) == 0) return -1; else return 0; } if (strncasecmp(data, pattern, plen) != 0) { return 0; } s = data + plen; if (skip) { bool found = false; for (;; s++) { if (s == data_limit) return -1; if (!found) { /* "(" is optional for non-extended format, * so catch the start of IPv4 address */ if (!ext && isdigit(*s)) break; if (*s == skip) found = true; } else if (*s != skip) { break; } } } /* Old IPv4-only format? */ if (!ext) { p[0] = 0; for (data = s; ; data++) { if (data == data_limit) return -1; c = *data; if (isdigit(c)) { p[i] = p[i]*10 + c - '0'; } else if (c == ',' && i < 5) { i++; p[i] = 0; } else { /* unexpected character or terminator */ break; } } if (i != 5) return -1; *start = s; *end = data; addr->ip = get_unaligned((__be32 *) p); *port = get_unaligned((__be16 *) (p + 4)); return 1; } if (s == data_limit) return -1; *start = s; edelim = *s++; if (edelim < 33 || edelim > 126) return -1; if (s == data_limit) return -1; if (*s == edelim) { /* Address family is usually missing for EPSV response */ if (mode != IP_VS_FTP_EPSV) return -1; s++; if (s == data_limit) return -1; /* Then address should be missing too */ if (*s != edelim) return -1; /* Caller can pre-set addr, if needed */ s++; } else { const char *ep; /* We allow address only from same family */ if (af == AF_INET6 && *s != '2') return -1; if (af == AF_INET && *s != '1') return -1; s++; if (s == data_limit) return -1; if (*s != edelim) return -1; s++; if (s == data_limit) return -1; if (af == AF_INET6) { if (in6_pton(s, data_limit - s, (u8 *)addr, edelim, &ep) <= 0) return -1; } else { if (in4_pton(s, data_limit - s, (u8 *)addr, edelim, &ep) <= 0) return -1; } s = (char *) ep; if (s == data_limit) return -1; if (*s != edelim) return -1; s++; } for (hport = 0; ; s++) { if (s == data_limit) return -1; if (!isdigit(*s)) break; hport = hport * 10 + *s - '0'; } if (s == data_limit || !hport || *s != edelim) return -1; s++; *end = s; *port = htons(hport); return 1; } /* Look at outgoing ftp packets to catch the response to a PASV/EPSV command * from the server (inside-to-outside). * When we see one, we build a connection entry with the client address, * client port 0 (unknown at the moment), the server address and the * server port. Mark the current connection entry as a control channel * of the new entry. All this work is just to make the data connection * can be scheduled to the right server later. * * The outgoing packet should be something like * "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)". * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number. * The extended format for EPSV response provides usually only port: * "229 Entering Extended Passive Mode (|||ppp|)" */ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct sk_buff *skb, int *diff, struct ip_vs_iphdr *ipvsh) { char *data, *data_limit; char *start, *end; union nf_inet_addr from; __be16 port; struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned int buf_len; int ret = 0; enum ip_conntrack_info ctinfo; struct nf_conn *ct; *diff = 0; /* Only useful for established sessions */ if (cp->state != IP_VS_TCP_S_ESTABLISHED) return 1; /* Linear packets are much easier to deal with. */ if (skb_ensure_writable(skb, skb->len)) return 0; if (cp->app_data == (void *) IP_VS_FTP_PASV) { data = ip_vs_ftp_data_ptr(skb, ipvsh); data_limit = skb_tail_pointer(skb); if (!data || data >= data_limit) return 1; if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING_PASV, sizeof(SERVER_STRING_PASV)-1, '(', false, IP_VS_FTP_PASV, &from, &port, cp->af, &start, &end) != 1) return 1; IP_VS_DBG(7, "PASV response (%pI4:%u) -> %pI4:%u detected\n", &from.ip, ntohs(port), &cp->caddr.ip, 0); } else if (cp->app_data == (void *) IP_VS_FTP_EPSV) { data = ip_vs_ftp_data_ptr(skb, ipvsh); data_limit = skb_tail_pointer(skb); if (!data || data >= data_limit) return 1; /* Usually, data address is not specified but * we support different address, so pre-set it. */ from = cp->daddr; if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING_EPSV, sizeof(SERVER_STRING_EPSV)-1, '(', true, IP_VS_FTP_EPSV, &from, &port, cp->af, &start, &end) != 1) return 1; IP_VS_DBG_BUF(7, "EPSV response (%s:%u) -> %s:%u detected\n", IP_VS_DBG_ADDR(cp->af, &from), ntohs(port), IP_VS_DBG_ADDR(cp->af, &cp->caddr), 0); } else { return 1; } /* Now update or create a connection entry for it */ { struct ip_vs_conn_param p; ip_vs_conn_fill_param(cp->ipvs, cp->af, ipvsh->protocol, &from, port, &cp->caddr, 0, &p); n_cp = ip_vs_conn_out_get(&p); } if (!n_cp) { struct ip_vs_conn_param p; ip_vs_conn_fill_param(cp->ipvs, cp->af, ipvsh->protocol, &cp->caddr, 0, &cp->vaddr, port, &p); n_cp = ip_vs_conn_new(&p, cp->af, &from, port, IP_VS_CONN_F_NO_CPORT | IP_VS_CONN_F_NFCT, cp->dest, skb->mark); if (!n_cp) return 0; /* add its controller */ ip_vs_control_add(n_cp, cp); } /* Replace the old passive address with the new one */ if (cp->app_data == (void *) IP_VS_FTP_PASV) { from.ip = n_cp->vaddr.ip; port = n_cp->vport; snprintf(buf, sizeof(buf), "%u,%u,%u,%u,%u,%u", ((unsigned char *)&from.ip)[0], ((unsigned char *)&from.ip)[1], ((unsigned char *)&from.ip)[2], ((unsigned char *)&from.ip)[3], ntohs(port) >> 8, ntohs(port) & 0xFF); } else if (cp->app_data == (void *) IP_VS_FTP_EPSV) { from = n_cp->vaddr; port = n_cp->vport; /* Only port, client will use VIP for the data connection */ snprintf(buf, sizeof(buf), "|||%u|", ntohs(port)); } else { *buf = 0; } buf_len = strlen(buf); ct = nf_ct_get(skb, &ctinfo); if (ct) { bool mangled; /* If mangling fails this function will return 0 * which will cause the packet to be dropped. * Mangling can only fail under memory pressure, * hopefully it will succeed on the retransmitted * packet. */ mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, ipvsh->len, start - data, end - start, buf, buf_len); if (mangled) { ip_vs_nfct_expect_related(skb, ct, n_cp, ipvsh->protocol, 0, 0); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_UNNECESSARY; /* csum is updated */ ret = 1; } } /* Not setting 'diff' is intentional, otherwise the sequence * would be adjusted twice. */ cp->app_data = (void *) IP_VS_FTP_ACTIVE; ip_vs_tcp_conn_listen(n_cp); ip_vs_conn_put(n_cp); return ret; } /* Look at incoming ftp packets to catch the PASV/PORT/EPRT/EPSV command * (outside-to-inside). * * The incoming packet having the PORT command should be something like * "PORT xxx,xxx,xxx,xxx,ppp,ppp\n". * xxx,xxx,xxx,xxx is the client address, ppp,ppp is the client port number. * In this case, we create a connection entry using the client address and * port, so that the active ftp data connection from the server can reach * the client. * Extended format: * "EPSV\r\n" when client requests server address from same family * "EPSV 1\r\n" when client requests IPv4 server address * "EPSV 2\r\n" when client requests IPv6 server address * "EPSV ALL\r\n" - not supported * EPRT with specified delimiter (ASCII 33..126), "|" by default: * "EPRT |1|IPv4ADDR|PORT|\r\n" when client provides IPv4 addrport * "EPRT |2|IPv6ADDR|PORT|\r\n" when client provides IPv6 addrport */ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, struct sk_buff *skb, int *diff, struct ip_vs_iphdr *ipvsh) { char *data, *data_start, *data_limit; char *start, *end; union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; /* no diff required for incoming packets */ *diff = 0; /* Only useful for established sessions */ if (cp->state != IP_VS_TCP_S_ESTABLISHED) return 1; /* Linear packets are much easier to deal with. */ if (skb_ensure_writable(skb, skb->len)) return 0; data = data_start = ip_vs_ftp_data_ptr(skb, ipvsh); data_limit = skb_tail_pointer(skb); if (!data || data >= data_limit) return 1; while (data <= data_limit - 6) { if (cp->af == AF_INET && strncasecmp(data, "PASV\r\n", 6) == 0) { /* Passive mode on */ IP_VS_DBG(7, "got PASV at %td of %td\n", data - data_start, data_limit - data_start); cp->app_data = (void *) IP_VS_FTP_PASV; return 1; } /* EPSV or EPSV<space><net-prt> */ if (strncasecmp(data, "EPSV", 4) == 0 && (data[4] == ' ' || data[4] == '\r')) { if (data[4] == ' ') { char proto = data[5]; if (data > data_limit - 7 || data[6] != '\r') return 1; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && proto == '2') { } else #endif if (cp->af == AF_INET && proto == '1') { } else { return 1; } } /* Extended Passive mode on */ IP_VS_DBG(7, "got EPSV at %td of %td\n", data - data_start, data_limit - data_start); cp->app_data = (void *) IP_VS_FTP_EPSV; return 1; } data++; } /* * To support virtual FTP server, the scenerio is as follows: * FTP client ----> Load Balancer ----> FTP server * First detect the port number in the application data, * then create a new connection entry for the coming data * connection. */ if (cp->af == AF_INET && ip_vs_ftp_get_addrport(data_start, data_limit, CLIENT_STRING_PORT, sizeof(CLIENT_STRING_PORT)-1, ' ', false, IP_VS_FTP_PORT, &to, &port, cp->af, &start, &end) == 1) { IP_VS_DBG(7, "PORT %pI4:%u detected\n", &to.ip, ntohs(port)); /* Now update or create a connection entry for it */ IP_VS_DBG(7, "protocol %s %pI4:%u %pI4:%u\n", ip_vs_proto_name(ipvsh->protocol), &to.ip, ntohs(port), &cp->vaddr.ip, ntohs(cp->vport)-1); } else if (ip_vs_ftp_get_addrport(data_start, data_limit, CLIENT_STRING_EPRT, sizeof(CLIENT_STRING_EPRT)-1, ' ', true, IP_VS_FTP_EPRT, &to, &port, cp->af, &start, &end) == 1) { IP_VS_DBG_BUF(7, "EPRT %s:%u detected\n", IP_VS_DBG_ADDR(cp->af, &to), ntohs(port)); /* Now update or create a connection entry for it */ IP_VS_DBG_BUF(7, "protocol %s %s:%u %s:%u\n", ip_vs_proto_name(ipvsh->protocol), IP_VS_DBG_ADDR(cp->af, &to), ntohs(port), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport)-1); } else { return 1; } /* Passive mode off */ cp->app_data = (void *) IP_VS_FTP_ACTIVE; { struct ip_vs_conn_param p; ip_vs_conn_fill_param(cp->ipvs, cp->af, ipvsh->protocol, &to, port, &cp->vaddr, htons(ntohs(cp->vport)-1), &p); n_cp = ip_vs_conn_in_get(&p); if (!n_cp) { n_cp = ip_vs_conn_new(&p, cp->af, &cp->daddr, htons(ntohs(cp->dport)-1), IP_VS_CONN_F_NFCT, cp->dest, skb->mark); if (!n_cp) return 0; /* add its controller */ ip_vs_control_add(n_cp, cp); } } /* * Move tunnel to listen state */ ip_vs_tcp_conn_listen(n_cp); ip_vs_conn_put(n_cp); return 1; } static struct ip_vs_app ip_vs_ftp = { .name = "ftp", .type = IP_VS_APP_TYPE_FTP, .protocol = IPPROTO_TCP, .module = THIS_MODULE, .incs_list = LIST_HEAD_INIT(ip_vs_ftp.incs_list), .init_conn = ip_vs_ftp_init_conn, .done_conn = ip_vs_ftp_done_conn, .bind_conn = NULL, .unbind_conn = NULL, .pkt_out = ip_vs_ftp_out, .pkt_in = ip_vs_ftp_in, }; /* * per netns ip_vs_ftp initialization */ static int __net_init __ip_vs_ftp_init(struct net *net) { int i, ret; struct ip_vs_app *app; struct netns_ipvs *ipvs = net_ipvs(net); if (!ipvs) return -ENOENT; app = register_ip_vs_app(ipvs, &ip_vs_ftp); if (IS_ERR(app)) return PTR_ERR(app); for (i = 0; i < ports_count; i++) { if (!ports[i]) continue; ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]); if (ret) goto err_unreg; } return 0; err_unreg: unregister_ip_vs_app(ipvs, &ip_vs_ftp); return ret; } /* * netns exit */ static void __ip_vs_ftp_exit(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); if (!ipvs || !exiting_module) return; unregister_ip_vs_app(ipvs, &ip_vs_ftp); } static struct pernet_operations ip_vs_ftp_ops = { .init = __ip_vs_ftp_init, .exit = __ip_vs_ftp_exit, }; static int __init ip_vs_ftp_init(void) { /* rcu_barrier() is called by netns on error */ return register_pernet_subsys(&ip_vs_ftp_ops); } /* * ip_vs_ftp finish. */ static void __exit ip_vs_ftp_exit(void) { exiting_module = true; unregister_pernet_subsys(&ip_vs_ftp_ops); /* rcu_barrier() is called by netns */ } module_init(ip_vs_ftp_init); module_exit(ip_vs_ftp_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ipvs ftp helper");
12 14 6 36 14 15 13 13 13 13 13 5 1 4 4 8 1 7 1 1 2 1 16 1 15 1 1 1 13 12 1 2 1 1 1 2 1 1 1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 // SPDX-License-Identifier: GPL-2.0-only /* * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support. * * Copyright (c) 2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab * Copyright (c) 2004 Pavel Machek <pavel@ucw.cz> * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc. * Copyright (C) 2012 Bojan Smojver <bojan@rexursive.com> */ #define pr_fmt(fmt) "PM: hibernation: " fmt #include <crypto/acompress.h> #include <linux/blkdev.h> #include <linux/export.h> #include <linux/suspend.h> #include <linux/reboot.h> #include <linux/string.h> #include <linux/device.h> #include <linux/async.h> #include <linux/delay.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/pm.h> #include <linux/nmi.h> #include <linux/console.h> #include <linux/cpu.h> #include <linux/freezer.h> #include <linux/gfp.h> #include <linux/syscore_ops.h> #include <linux/ctype.h> #include <linux/ktime.h> #include <linux/security.h> #include <linux/secretmem.h> #include <trace/events/power.h> #include "power.h" static int nocompress; static int noresume; static int nohibernate; static int resume_wait; static unsigned int resume_delay; static char resume_file[256] = CONFIG_PM_STD_PARTITION; dev_t swsusp_resume_device; sector_t swsusp_resume_block; __visible int in_suspend __nosavedata; static char hibernate_compressor[CRYPTO_MAX_ALG_NAME] = CONFIG_HIBERNATION_DEF_COMP; /* * Compression/decompression algorithm to be used while saving/loading * image to/from disk. This would later be used in 'kernel/power/swap.c' * to allocate comp streams. */ char hib_comp_algo[CRYPTO_MAX_ALG_NAME]; enum { HIBERNATION_INVALID, HIBERNATION_PLATFORM, HIBERNATION_SHUTDOWN, HIBERNATION_REBOOT, #ifdef CONFIG_SUSPEND HIBERNATION_SUSPEND, #endif HIBERNATION_TEST_RESUME, /* keep last */ __HIBERNATION_AFTER_LAST }; #define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1) #define HIBERNATION_FIRST (HIBERNATION_INVALID + 1) static int hibernation_mode = HIBERNATION_SHUTDOWN; bool freezer_test_done; static const struct platform_hibernation_ops *hibernation_ops; static atomic_t hibernate_atomic = ATOMIC_INIT(1); #ifdef CONFIG_SUSPEND /** * pm_hibernation_mode_is_suspend - Check if hibernation has been set to suspend */ bool pm_hibernation_mode_is_suspend(void) { return hibernation_mode == HIBERNATION_SUSPEND; } EXPORT_SYMBOL_GPL(pm_hibernation_mode_is_suspend); #endif bool hibernate_acquire(void) { return atomic_add_unless(&hibernate_atomic, -1, 0); } void hibernate_release(void) { atomic_inc(&hibernate_atomic); } bool hibernation_in_progress(void) { return !atomic_read(&hibernate_atomic); } bool hibernation_available(void) { return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION) && !secretmem_active() && !cxl_mem_active(); } /** * hibernation_set_ops - Set the global hibernate operations. * @ops: Hibernation operations to use in subsequent hibernation transitions. */ void hibernation_set_ops(const struct platform_hibernation_ops *ops) { unsigned int sleep_flags; if (ops && !(ops->begin && ops->end && ops->pre_snapshot && ops->prepare && ops->finish && ops->enter && ops->pre_restore && ops->restore_cleanup && ops->leave)) { WARN_ON(1); return; } sleep_flags = lock_system_sleep(); hibernation_ops = ops; if (ops) hibernation_mode = HIBERNATION_PLATFORM; else if (hibernation_mode == HIBERNATION_PLATFORM) hibernation_mode = HIBERNATION_SHUTDOWN; unlock_system_sleep(sleep_flags); } EXPORT_SYMBOL_GPL(hibernation_set_ops); static bool entering_platform_hibernation; bool system_entering_hibernation(void) { return entering_platform_hibernation; } EXPORT_SYMBOL(system_entering_hibernation); #ifdef CONFIG_PM_DEBUG static unsigned int pm_test_delay = 5; module_param(pm_test_delay, uint, 0644); MODULE_PARM_DESC(pm_test_delay, "Number of seconds to wait before resuming from hibernation test"); static void hibernation_debug_sleep(void) { pr_info("hibernation debug: Waiting for %d second(s).\n", pm_test_delay); mdelay(pm_test_delay * 1000); } static int hibernation_test(int level) { if (pm_test_level == level) { hibernation_debug_sleep(); return 1; } return 0; } #else /* !CONFIG_PM_DEBUG */ static int hibernation_test(int level) { return 0; } #endif /* !CONFIG_PM_DEBUG */ /** * platform_begin - Call platform to start hibernation. * @platform_mode: Whether or not to use the platform driver. */ static int platform_begin(int platform_mode) { return (platform_mode && hibernation_ops) ? hibernation_ops->begin(PMSG_FREEZE) : 0; } /** * platform_end - Call platform to finish transition to the working state. * @platform_mode: Whether or not to use the platform driver. */ static void platform_end(int platform_mode) { if (platform_mode && hibernation_ops) hibernation_ops->end(); } /** * platform_pre_snapshot - Call platform to prepare the machine for hibernation. * @platform_mode: Whether or not to use the platform driver. * * Use the platform driver to prepare the system for creating a hibernate image, * if so configured, and return an error code if that fails. */ static int platform_pre_snapshot(int platform_mode) { return (platform_mode && hibernation_ops) ? hibernation_ops->pre_snapshot() : 0; } /** * platform_leave - Call platform to prepare a transition to the working state. * @platform_mode: Whether or not to use the platform driver. * * Use the platform driver prepare to prepare the machine for switching to the * normal mode of operation. * * This routine is called on one CPU with interrupts disabled. */ static void platform_leave(int platform_mode) { if (platform_mode && hibernation_ops) hibernation_ops->leave(); } /** * platform_finish - Call platform to switch the system to the working state. * @platform_mode: Whether or not to use the platform driver. * * Use the platform driver to switch the machine to the normal mode of * operation. * * This routine must be called after platform_prepare(). */ static void platform_finish(int platform_mode) { if (platform_mode && hibernation_ops) hibernation_ops->finish(); } /** * platform_pre_restore - Prepare for hibernate image restoration. * @platform_mode: Whether or not to use the platform driver. * * Use the platform driver to prepare the system for resume from a hibernation * image. * * If the restore fails after this function has been called, * platform_restore_cleanup() must be called. */ static int platform_pre_restore(int platform_mode) { return (platform_mode && hibernation_ops) ? hibernation_ops->pre_restore() : 0; } /** * platform_restore_cleanup - Switch to the working state after failing restore. * @platform_mode: Whether or not to use the platform driver. * * Use the platform driver to switch the system to the normal mode of operation * after a failing restore. * * If platform_pre_restore() has been called before the failing restore, this * function must be called too, regardless of the result of * platform_pre_restore(). */ static void platform_restore_cleanup(int platform_mode) { if (platform_mode && hibernation_ops) hibernation_ops->restore_cleanup(); } /** * platform_recover - Recover from a failure to suspend devices. * @platform_mode: Whether or not to use the platform driver. */ static void platform_recover(int platform_mode) { if (platform_mode && hibernation_ops && hibernation_ops->recover) hibernation_ops->recover(); } /** * swsusp_show_speed - Print time elapsed between two events during hibernation. * @start: Starting event. * @stop: Final event. * @nr_pages: Number of memory pages processed between @start and @stop. * @msg: Additional diagnostic message to print. */ void swsusp_show_speed(ktime_t start, ktime_t stop, unsigned nr_pages, char *msg) { ktime_t diff; u64 elapsed_centisecs64; unsigned int centisecs; unsigned int k; unsigned int kps; diff = ktime_sub(stop, start); elapsed_centisecs64 = ktime_divns(diff, 10*NSEC_PER_MSEC); centisecs = elapsed_centisecs64; if (centisecs == 0) centisecs = 1; /* avoid div-by-zero */ k = nr_pages * (PAGE_SIZE / 1024); kps = (k * 100) / centisecs; pr_info("%s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n", msg, k, centisecs / 100, centisecs % 100, kps / 1000, (kps % 1000) / 10); } __weak int arch_resume_nosmt(void) { return 0; } /** * create_image - Create a hibernation image. * @platform_mode: Whether or not to use the platform driver. * * Execute device drivers' "late" and "noirq" freeze callbacks, create a * hibernation image and run the drivers' "noirq" and "early" thaw callbacks. * * Control reappears in this routine after the subsequent restore. */ static int create_image(int platform_mode) { int error; error = dpm_suspend_end(PMSG_FREEZE); if (error) { pr_err("Some devices failed to power down, aborting\n"); return error; } error = platform_pre_snapshot(platform_mode); if (error || hibernation_test(TEST_PLATFORM)) goto Platform_finish; error = pm_sleep_disable_secondary_cpus(); if (error || hibernation_test(TEST_CPUS)) goto Enable_cpus; local_irq_disable(); system_state = SYSTEM_SUSPEND; error = syscore_suspend(); if (error) { pr_err("Some system devices failed to power down, aborting\n"); goto Enable_irqs; } if (hibernation_test(TEST_CORE) || pm_wakeup_pending()) goto Power_up; in_suspend = 1; save_processor_state(); trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true); error = swsusp_arch_suspend(); /* Restore control flow magically appears here */ restore_processor_state(); trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false); if (error) pr_err("Error %d creating image\n", error); if (!in_suspend) { events_check_enabled = false; clear_or_poison_free_pages(); } platform_leave(platform_mode); Power_up: syscore_resume(); Enable_irqs: system_state = SYSTEM_RUNNING; local_irq_enable(); Enable_cpus: pm_sleep_enable_secondary_cpus(); /* Allow architectures to do nosmt-specific post-resume dances */ if (!in_suspend) error = arch_resume_nosmt(); Platform_finish: platform_finish(platform_mode); dpm_resume_start(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); return error; } static void shrink_shmem_memory(void) { struct sysinfo info; unsigned long nr_shmem_pages, nr_freed_pages; si_meminfo(&info); nr_shmem_pages = info.sharedram; /* current page count used for shmem */ /* * The intent is to reclaim all shmem pages. Though shrink_all_memory() can * only reclaim about half of them, it's enough for creating the hibernation * image. */ nr_freed_pages = shrink_all_memory(nr_shmem_pages); pr_debug("requested to reclaim %lu shmem pages, actually freed %lu pages\n", nr_shmem_pages, nr_freed_pages); } /** * hibernation_snapshot - Quiesce devices and create a hibernation image. * @platform_mode: If set, use platform driver to prepare for the transition. * * This routine must be called with system_transition_mutex held. */ int hibernation_snapshot(int platform_mode) { pm_message_t msg; int error; pm_suspend_clear_flags(); error = platform_begin(platform_mode); if (error) goto Close; /* Preallocate image memory before shutting down devices. */ error = hibernate_preallocate_memory(); if (error) goto Close; error = freeze_kernel_threads(); if (error) goto Cleanup; if (hibernation_test(TEST_FREEZER)) { /* * Indicate to the caller that we are returning due to a * successful freezer test. */ freezer_test_done = true; goto Thaw; } error = dpm_prepare(PMSG_FREEZE); if (error) { dpm_complete(PMSG_RECOVER); goto Thaw; } /* * Device drivers may move lots of data to shmem in dpm_prepare(). The shmem * pages will use lots of system memory, causing hibernation image creation * fail due to insufficient free memory. * This call is to force flush the shmem pages to swap disk and reclaim * the system memory so that image creation can succeed. */ shrink_shmem_memory(); console_suspend_all(); pm_restrict_gfp_mask(); error = dpm_suspend(PMSG_FREEZE); if (error || hibernation_test(TEST_DEVICES)) platform_recover(platform_mode); else error = create_image(platform_mode); /* * In the case that we call create_image() above, the control * returns here (1) after the image has been created or the * image creation has failed and (2) after a successful restore. */ /* We may need to release the preallocated image pages here. */ if (error || !in_suspend) swsusp_free(); msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE; dpm_resume(msg); if (error || !in_suspend) pm_restore_gfp_mask(); console_resume_all(); dpm_complete(msg); Close: platform_end(platform_mode); return error; Thaw: thaw_kernel_threads(); Cleanup: swsusp_free(); goto Close; } int __weak hibernate_resume_nonboot_cpu_disable(void) { return suspend_disable_secondary_cpus(); } /** * resume_target_kernel - Restore system state from a hibernation image. * @platform_mode: Whether or not to use the platform driver. * * Execute device drivers' "noirq" and "late" freeze callbacks, restore the * contents of highmem that have not been restored yet from the image and run * the low-level code that will restore the remaining contents of memory and * switch to the just restored target kernel. */ static int resume_target_kernel(bool platform_mode) { int error; error = dpm_suspend_end(PMSG_QUIESCE); if (error) { pr_err("Some devices failed to power down, aborting resume\n"); return error; } error = platform_pre_restore(platform_mode); if (error) goto Cleanup; cpuidle_pause(); error = hibernate_resume_nonboot_cpu_disable(); if (error) goto Enable_cpus; local_irq_disable(); system_state = SYSTEM_SUSPEND; error = syscore_suspend(); if (error) goto Enable_irqs; save_processor_state(); error = restore_highmem(); if (!error) { error = swsusp_arch_resume(); /* * The code below is only ever reached in case of a failure. * Otherwise, execution continues at the place where * swsusp_arch_suspend() was called. */ BUG_ON(!error); /* * This call to restore_highmem() reverts the changes made by * the previous one. */ restore_highmem(); } /* * The only reason why swsusp_arch_resume() can fail is memory being * very tight, so we have to free it as soon as we can to avoid * subsequent failures. */ swsusp_free(); restore_processor_state(); touch_softlockup_watchdog(); syscore_resume(); Enable_irqs: system_state = SYSTEM_RUNNING; local_irq_enable(); Enable_cpus: pm_sleep_enable_secondary_cpus(); Cleanup: platform_restore_cleanup(platform_mode); dpm_resume_start(PMSG_RECOVER); return error; } /** * hibernation_restore - Quiesce devices and restore from a hibernation image. * @platform_mode: If set, use platform driver to prepare for the transition. * * This routine must be called with system_transition_mutex held. If it is * successful, control reappears in the restored target kernel in * hibernation_snapshot(). */ int hibernation_restore(int platform_mode) { int error; pm_prepare_console(); console_suspend_all(); error = dpm_suspend_start(PMSG_QUIESCE); if (!error) { error = resume_target_kernel(platform_mode); /* * The above should either succeed and jump to the new kernel, * or return with an error. Otherwise things are just * undefined, so let's be paranoid. */ BUG_ON(!error); } dpm_resume_end(PMSG_RECOVER); console_resume_all(); pm_restore_console(); return error; } /** * hibernation_platform_enter - Power off the system using the platform driver. */ int hibernation_platform_enter(void) { int error; if (!hibernation_ops) return -ENOSYS; /* * We have cancelled the power transition by running * hibernation_ops->finish() before saving the image, so we should let * the firmware know that we're going to enter the sleep state after all */ error = hibernation_ops->begin(PMSG_HIBERNATE); if (error) goto Close; entering_platform_hibernation = true; console_suspend_all(); error = dpm_suspend_start(PMSG_HIBERNATE); if (error) { if (hibernation_ops->recover) hibernation_ops->recover(); goto Resume_devices; } error = dpm_suspend_end(PMSG_HIBERNATE); if (error) goto Resume_devices; error = hibernation_ops->prepare(); if (error) goto Platform_finish; error = pm_sleep_disable_secondary_cpus(); if (error) goto Enable_cpus; local_irq_disable(); system_state = SYSTEM_SUSPEND; error = syscore_suspend(); if (error) goto Enable_irqs; if (pm_wakeup_pending()) { error = -EAGAIN; goto Power_up; } hibernation_ops->enter(); /* We should never get here */ while (1); Power_up: syscore_resume(); Enable_irqs: system_state = SYSTEM_RUNNING; local_irq_enable(); Enable_cpus: pm_sleep_enable_secondary_cpus(); Platform_finish: hibernation_ops->finish(); dpm_resume_start(PMSG_RESTORE); Resume_devices: entering_platform_hibernation = false; dpm_resume_end(PMSG_RESTORE); console_resume_all(); Close: hibernation_ops->end(); return error; } /** * power_down - Shut the machine down for hibernation. * * Use the platform driver, if configured, to put the system into the sleep * state corresponding to hibernation, or try to power it off or reboot, * depending on the value of hibernation_mode. */ static void power_down(void) { int error; #ifdef CONFIG_SUSPEND if (hibernation_mode == HIBERNATION_SUSPEND) { error = suspend_devices_and_enter(mem_sleep_current); if (!error) goto exit; hibernation_mode = hibernation_ops ? HIBERNATION_PLATFORM : HIBERNATION_SHUTDOWN; } #endif switch (hibernation_mode) { case HIBERNATION_REBOOT: kernel_restart(NULL); break; case HIBERNATION_PLATFORM: error = hibernation_platform_enter(); if (error == -EAGAIN || error == -EBUSY) { events_check_enabled = false; pr_info("Wakeup event detected during hibernation, rolling back.\n"); goto exit; } fallthrough; case HIBERNATION_SHUTDOWN: if (kernel_can_power_off()) { entering_platform_hibernation = true; kernel_power_off(); entering_platform_hibernation = false; } break; } kernel_halt(); /* * Valid image is on the disk, if we continue we risk serious data * corruption after resume. */ pr_crit("Power down manually\n"); while (1) cpu_relax(); exit: /* Restore swap signature. */ error = swsusp_unmark(); if (error) pr_err("Swap will be unusable! Try swapon -a.\n"); } static int load_image_and_restore(void) { int error; unsigned int flags; pm_pr_dbg("Loading hibernation image.\n"); lock_device_hotplug(); error = create_basic_memory_bitmaps(); if (error) { swsusp_close(); goto Unlock; } error = swsusp_read(&flags); swsusp_close(); if (!error) error = hibernation_restore(flags & SF_PLATFORM_MODE); pr_err("Failed to load image, recovering.\n"); swsusp_free(); free_basic_memory_bitmaps(); Unlock: unlock_device_hotplug(); return error; } #define COMPRESSION_ALGO_LZO "lzo" #define COMPRESSION_ALGO_LZ4 "lz4" /** * hibernate - Carry out system hibernation, including saving the image. */ int hibernate(void) { bool snapshot_test = false; unsigned int sleep_flags; int error; if (!hibernation_available()) { pm_pr_dbg("Hibernation not available.\n"); return -EPERM; } /* * Query for the compression algorithm support if compression is enabled. */ if (!nocompress) { strscpy(hib_comp_algo, hibernate_compressor); if (!crypto_has_acomp(hib_comp_algo, 0, CRYPTO_ALG_ASYNC)) { pr_err("%s compression is not available\n", hib_comp_algo); return -EOPNOTSUPP; } } sleep_flags = lock_system_sleep(); /* The snapshot device should not be opened while we're running */ if (!hibernate_acquire()) { error = -EBUSY; goto Unlock; } pr_info("hibernation entry\n"); pm_prepare_console(); error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); if (error) goto Restore; error = pm_sleep_fs_sync(); if (error) goto Notify; filesystems_freeze(filesystem_freeze_enabled); error = freeze_processes(); if (error) goto Exit; lock_device_hotplug(); /* Allocate memory management structures */ error = create_basic_memory_bitmaps(); if (error) goto Thaw; error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); if (error || freezer_test_done) goto Free_bitmaps; if (in_suspend) { unsigned int flags = 0; if (hibernation_mode == HIBERNATION_PLATFORM) flags |= SF_PLATFORM_MODE; if (nocompress) { flags |= SF_NOCOMPRESS_MODE; } else { flags |= SF_CRC32_MODE; /* * By default, LZO compression is enabled. Use SF_COMPRESSION_ALG_LZ4 * to override this behaviour and use LZ4. * * Refer kernel/power/power.h for more details */ if (!strcmp(hib_comp_algo, COMPRESSION_ALGO_LZ4)) flags |= SF_COMPRESSION_ALG_LZ4; else flags |= SF_COMPRESSION_ALG_LZO; } pm_pr_dbg("Writing hibernation image.\n"); error = swsusp_write(flags); swsusp_free(); if (!error) { if (hibernation_mode == HIBERNATION_TEST_RESUME) snapshot_test = true; else power_down(); } in_suspend = 0; pm_restore_gfp_mask(); } else { pm_pr_dbg("Hibernation image restored successfully.\n"); } Free_bitmaps: free_basic_memory_bitmaps(); Thaw: unlock_device_hotplug(); if (snapshot_test) { pm_pr_dbg("Checking hibernation image\n"); error = swsusp_check(false); if (!error) error = load_image_and_restore(); } thaw_processes(); /* Don't bother checking whether freezer_test_done is true */ freezer_test_done = false; Exit: filesystems_thaw(); Notify: pm_notifier_call_chain(PM_POST_HIBERNATION); Restore: pm_restore_console(); hibernate_release(); Unlock: unlock_system_sleep(sleep_flags); pr_info("hibernation exit\n"); return error; } /** * hibernate_quiet_exec - Execute a function with all devices frozen. * @func: Function to execute. * @data: Data pointer to pass to @func. * * Return the @func return value or an error code if it cannot be executed. */ int hibernate_quiet_exec(int (*func)(void *data), void *data) { unsigned int sleep_flags; int error; sleep_flags = lock_system_sleep(); if (!hibernate_acquire()) { error = -EBUSY; goto unlock; } pm_prepare_console(); error = pm_notifier_call_chain_robust(PM_HIBERNATION_PREPARE, PM_POST_HIBERNATION); if (error) goto restore; filesystems_freeze(filesystem_freeze_enabled); error = freeze_processes(); if (error) goto exit; lock_device_hotplug(); pm_suspend_clear_flags(); error = platform_begin(true); if (error) goto thaw; error = freeze_kernel_threads(); if (error) goto thaw; error = dpm_prepare(PMSG_FREEZE); if (error) goto dpm_complete; console_suspend_all(); error = dpm_suspend(PMSG_FREEZE); if (error) goto dpm_resume; error = dpm_suspend_end(PMSG_FREEZE); if (error) goto dpm_resume; error = platform_pre_snapshot(true); if (error) goto skip; error = func(data); skip: platform_finish(true); dpm_resume_start(PMSG_THAW); dpm_resume: dpm_resume(PMSG_THAW); console_resume_all(); dpm_complete: dpm_complete(PMSG_THAW); thaw_kernel_threads(); thaw: platform_end(true); unlock_device_hotplug(); thaw_processes(); exit: filesystems_thaw(); pm_notifier_call_chain(PM_POST_HIBERNATION); restore: pm_restore_console(); hibernate_release(); unlock: unlock_system_sleep(sleep_flags); return error; } EXPORT_SYMBOL_GPL(hibernate_quiet_exec); static int __init find_resume_device(void) { if (!strlen(resume_file)) return -ENOENT; pm_pr_dbg("Checking hibernation image partition %s\n", resume_file); if (resume_delay) { pr_info("Waiting %dsec before reading resume device ...\n", resume_delay); ssleep(resume_delay); } /* Check if the device is there */ if (!early_lookup_bdev(resume_file, &swsusp_resume_device)) return 0; /* * Some device discovery might still be in progress; we need to wait for * this to finish. */ wait_for_device_probe(); if (resume_wait) { while (early_lookup_bdev(resume_file, &swsusp_resume_device)) msleep(10); async_synchronize_full(); } return early_lookup_bdev(resume_file, &swsusp_resume_device); } static int software_resume(void) { int error; pm_pr_dbg("Hibernation image partition %d:%d present\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); pm_pr_dbg("Looking for hibernation image.\n"); mutex_lock(&system_transition_mutex); error = swsusp_check(true); if (error) goto Unlock; /* * Check if the hibernation image is compressed. If so, query for * the algorithm support. */ if (!(swsusp_header_flags & SF_NOCOMPRESS_MODE)) { if (swsusp_header_flags & SF_COMPRESSION_ALG_LZ4) strscpy(hib_comp_algo, COMPRESSION_ALGO_LZ4); else strscpy(hib_comp_algo, COMPRESSION_ALGO_LZO); if (!crypto_has_acomp(hib_comp_algo, 0, CRYPTO_ALG_ASYNC)) { pr_err("%s compression is not available\n", hib_comp_algo); error = -EOPNOTSUPP; goto Unlock; } } /* The snapshot device should not be opened while we're running */ if (!hibernate_acquire()) { error = -EBUSY; swsusp_close(); goto Unlock; } pr_info("resume from hibernation\n"); pm_prepare_console(); error = pm_notifier_call_chain_robust(PM_RESTORE_PREPARE, PM_POST_RESTORE); if (error) goto Restore; filesystems_freeze(filesystem_freeze_enabled); pm_pr_dbg("Preparing processes for hibernation restore.\n"); error = freeze_processes(); if (error) { filesystems_thaw(); goto Close_Finish; } error = freeze_kernel_threads(); if (error) { thaw_processes(); filesystems_thaw(); goto Close_Finish; } error = load_image_and_restore(); thaw_processes(); filesystems_thaw(); Finish: pm_notifier_call_chain(PM_POST_RESTORE); Restore: pm_restore_console(); pr_info("resume failed (%d)\n", error); hibernate_release(); /* For success case, the suspend path will release the lock */ Unlock: mutex_unlock(&system_transition_mutex); pm_pr_dbg("Hibernation image not present or could not be loaded.\n"); return error; Close_Finish: swsusp_close(); goto Finish; } /** * software_resume_initcall - Resume from a saved hibernation image. * * This routine is called as a late initcall, when all devices have been * discovered and initialized already. * * The image reading code is called to see if there is a hibernation image * available for reading. If that is the case, devices are quiesced and the * contents of memory is restored from the saved image. * * If this is successful, control reappears in the restored target kernel in * hibernation_snapshot() which returns to hibernate(). Otherwise, the routine * attempts to recover gracefully and make the kernel return to the normal mode * of operation. */ static int __init software_resume_initcall(void) { /* * If the user said "noresume".. bail out early. */ if (noresume || !hibernation_available()) return 0; if (!swsusp_resume_device) { int error = find_resume_device(); if (error) return error; } return software_resume(); } late_initcall_sync(software_resume_initcall); static const char * const hibernation_modes[] = { [HIBERNATION_PLATFORM] = "platform", [HIBERNATION_SHUTDOWN] = "shutdown", [HIBERNATION_REBOOT] = "reboot", #ifdef CONFIG_SUSPEND [HIBERNATION_SUSPEND] = "suspend", #endif [HIBERNATION_TEST_RESUME] = "test_resume", }; /* * /sys/power/disk - Control hibernation mode. * * Hibernation can be handled in several ways. There are a few different ways * to put the system into the sleep state: using the platform driver (e.g. ACPI * or other hibernation_ops), powering it off or rebooting it (for testing * mostly). * * The sysfs file /sys/power/disk provides an interface for selecting the * hibernation mode to use. Reading from this file causes the available modes * to be printed. There are 3 modes that can be supported: * * 'platform' * 'shutdown' * 'reboot' * * If a platform hibernation driver is in use, 'platform' will be supported * and will be used by default. Otherwise, 'shutdown' will be used by default. * The selected option (i.e. the one corresponding to the current value of * hibernation_mode) is enclosed by a square bracket. * * To select a given hibernation mode it is necessary to write the mode's * string representation (as returned by reading from /sys/power/disk) back * into /sys/power/disk. */ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { ssize_t count = 0; int i; if (!hibernation_available()) return sysfs_emit(buf, "[disabled]\n"); for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { if (!hibernation_modes[i]) continue; switch (i) { case HIBERNATION_SHUTDOWN: case HIBERNATION_REBOOT: #ifdef CONFIG_SUSPEND case HIBERNATION_SUSPEND: #endif case HIBERNATION_TEST_RESUME: break; case HIBERNATION_PLATFORM: if (hibernation_ops) break; /* not a valid mode, continue with loop */ continue; } if (i == hibernation_mode) count += sysfs_emit_at(buf, count, "[%s] ", hibernation_modes[i]); else count += sysfs_emit_at(buf, count, "%s ", hibernation_modes[i]); } /* Convert the last space to a newline if needed. */ if (count > 0) buf[count - 1] = '\n'; return count; } static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { int mode = HIBERNATION_INVALID; unsigned int sleep_flags; int error = 0; int len; char *p; int i; if (!hibernation_available()) return -EPERM; p = memchr(buf, '\n', n); len = p ? p - buf : n; sleep_flags = lock_system_sleep(); for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { if (len == strlen(hibernation_modes[i]) && !strncmp(buf, hibernation_modes[i], len)) { mode = i; break; } } if (mode != HIBERNATION_INVALID) { switch (mode) { case HIBERNATION_SHUTDOWN: case HIBERNATION_REBOOT: #ifdef CONFIG_SUSPEND case HIBERNATION_SUSPEND: #endif case HIBERNATION_TEST_RESUME: hibernation_mode = mode; break; case HIBERNATION_PLATFORM: if (hibernation_ops) hibernation_mode = mode; else error = -EINVAL; } } else error = -EINVAL; if (!error) pm_pr_dbg("Hibernation mode set to '%s'\n", hibernation_modes[mode]); unlock_system_sleep(sleep_flags); return error ? error : n; } power_attr(disk); static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d:%d\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); } static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned int sleep_flags; int len = n; char *name; dev_t dev; int error; if (!hibernation_available()) return n; if (len && buf[len-1] == '\n') len--; name = kstrndup(buf, len, GFP_KERNEL); if (!name) return -ENOMEM; error = lookup_bdev(name, &dev); if (error) { unsigned maj, min, offset; char *p, dummy; error = 0; if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 || sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) { dev = MKDEV(maj, min); if (maj != MAJOR(dev) || min != MINOR(dev)) error = -EINVAL; } else { dev = new_decode_dev(simple_strtoul(name, &p, 16)); if (*p) error = -EINVAL; } } kfree(name); if (error) return error; sleep_flags = lock_system_sleep(); swsusp_resume_device = dev; unlock_system_sleep(sleep_flags); pm_pr_dbg("Configured hibernation resume from disk to %u\n", swsusp_resume_device); noresume = 0; software_resume(); return n; } power_attr(resume); static ssize_t resume_offset_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%llu\n", (unsigned long long)swsusp_resume_block); } static ssize_t resume_offset_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long long offset; int rc; rc = kstrtoull(buf, 0, &offset); if (rc) return rc; swsusp_resume_block = offset; return n; } power_attr(resume_offset); static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", image_size); } static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long size; if (sscanf(buf, "%lu", &size) == 1) { image_size = size; return n; } return -EINVAL; } power_attr(image_size); static ssize_t reserved_size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", reserved_size); } static ssize_t reserved_size_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long size; if (sscanf(buf, "%lu", &size) == 1) { reserved_size = size; return n; } return -EINVAL; } power_attr(reserved_size); static struct attribute *g[] = { &disk_attr.attr, &resume_offset_attr.attr, &resume_attr.attr, &image_size_attr.attr, &reserved_size_attr.attr, NULL, }; static const struct attribute_group attr_group = { .attrs = g, }; static int __init pm_disk_init(void) { return sysfs_create_group(power_kobj, &attr_group); } core_initcall(pm_disk_init); static int __init resume_setup(char *str) { if (noresume) return 1; strscpy(resume_file, str); return 1; } static int __init resume_offset_setup(char *str) { unsigned long long offset; if (noresume) return 1; if (sscanf(str, "%llu", &offset) == 1) swsusp_resume_block = offset; return 1; } static int __init hibernate_setup(char *str) { if (!strncmp(str, "noresume", 8)) { noresume = 1; } else if (!strncmp(str, "nocompress", 10)) { nocompress = 1; } else if (!strncmp(str, "no", 2)) { noresume = 1; nohibernate = 1; } else if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && !strncmp(str, "protect_image", 13)) { enable_restore_image_protection(); } return 1; } static int __init noresume_setup(char *str) { noresume = 1; return 1; } static int __init resumewait_setup(char *str) { resume_wait = 1; return 1; } static int __init resumedelay_setup(char *str) { int rc = kstrtouint(str, 0, &resume_delay); if (rc) pr_warn("resumedelay: bad option string '%s'\n", str); return 1; } static int __init nohibernate_setup(char *str) { noresume = 1; nohibernate = 1; return 1; } static const char * const comp_alg_enabled[] = { #if IS_ENABLED(CONFIG_CRYPTO_LZO) COMPRESSION_ALGO_LZO, #endif #if IS_ENABLED(CONFIG_CRYPTO_LZ4) COMPRESSION_ALGO_LZ4, #endif }; static int hibernate_compressor_param_set(const char *compressor, const struct kernel_param *kp) { int index, ret; if (!mutex_trylock(&system_transition_mutex)) return -EBUSY; index = sysfs_match_string(comp_alg_enabled, compressor); if (index >= 0) { ret = param_set_copystring(comp_alg_enabled[index], kp); if (!ret) strscpy(hib_comp_algo, comp_alg_enabled[index]); } else { ret = index; } mutex_unlock(&system_transition_mutex); if (ret) pr_debug("Cannot set specified compressor %s\n", compressor); return ret; } static const struct kernel_param_ops hibernate_compressor_param_ops = { .set = hibernate_compressor_param_set, .get = param_get_string, }; static struct kparam_string hibernate_compressor_param_string = { .maxlen = sizeof(hibernate_compressor), .string = hibernate_compressor, }; module_param_cb(compressor, &hibernate_compressor_param_ops, &hibernate_compressor_param_string, 0644); MODULE_PARM_DESC(compressor, "Compression algorithm to be used with hibernation"); __setup("noresume", noresume_setup); __setup("resume_offset=", resume_offset_setup); __setup("resume=", resume_setup); __setup("hibernate=", hibernate_setup); __setup("resumewait", resumewait_setup); __setup("resumedelay=", resumedelay_setup); __setup("nohibernate", nohibernate_setup);
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 // SPDX-License-Identifier: GPL-2.0-only /* * (C) 2015 Red Hat GmbH * Author: Florian Westphal <fw@strlen.de> */ #include <linux/module.h> #include <linux/static_key.h> #include <linux/hash.h> #include <linux/siphash.h> #include <linux/if_vlan.h> #include <linux/init.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #define NFT_TRACETYPE_LL_HSIZE 20 #define NFT_TRACETYPE_NETWORK_HSIZE 40 #define NFT_TRACETYPE_TRANSPORT_HSIZE 20 DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); EXPORT_SYMBOL_GPL(nft_trace_enabled); static int trace_fill_header(struct sk_buff *nlskb, u16 type, const struct sk_buff *skb, int off, unsigned int len) { struct nlattr *nla; if (len == 0) return 0; nla = nla_reserve(nlskb, type, len); if (!nla || skb_copy_bits(skb, off, nla_data(nla), len)) return -1; return 0; } static int nf_trace_fill_ll_header(struct sk_buff *nlskb, const struct sk_buff *skb) { struct vlan_ethhdr veth; int off; BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE); off = skb_mac_header(skb) - skb->data; if (off != -ETH_HLEN) return -1; if (skb_copy_bits(skb, off, &veth, ETH_HLEN)) return -1; veth.h_vlan_proto = skb->vlan_proto; veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); veth.h_vlan_encapsulated_proto = skb->protocol; return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth); } static int nf_trace_fill_dev_info(struct sk_buff *nlskb, const struct net_device *indev, const struct net_device *outdev) { if (indev) { if (nla_put_be32(nlskb, NFTA_TRACE_IIF, htonl(indev->ifindex))) return -1; if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE, htons(indev->type))) return -1; } if (outdev) { if (nla_put_be32(nlskb, NFTA_TRACE_OIF, htonl(outdev->ifindex))) return -1; if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE, htons(outdev->type))) return -1; } return 0; } static int nf_trace_fill_ct_info(struct sk_buff *nlskb, const struct sk_buff *skb) { const struct nf_ct_hook *ct_hook; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; u32 state; ct_hook = rcu_dereference(nf_ct_hook); if (!ct_hook) return 0; ct = nf_ct_get(skb, &ctinfo); if (!ct) { if (ctinfo != IP_CT_UNTRACKED) /* not seen by conntrack or invalid */ return 0; state = NF_CT_STATE_UNTRACKED_BIT; } else { state = NF_CT_STATE_BIT(ctinfo); } if (nla_put_be32(nlskb, NFTA_TRACE_CT_STATE, htonl(state))) return -1; if (ct) { u32 id = ct_hook->get_id(&ct->ct_general); u32 status = READ_ONCE(ct->status); u8 dir = CTINFO2DIR(ctinfo); if (nla_put_u8(nlskb, NFTA_TRACE_CT_DIRECTION, dir)) return -1; if (nla_put_be32(nlskb, NFTA_TRACE_CT_ID, (__force __be32)id)) return -1; /* Kernel implementation detail, withhold this from userspace for now */ status &= ~IPS_NAT_CLASH; if (status && nla_put_be32(nlskb, NFTA_TRACE_CT_STATUS, htonl(status))) return -1; } return 0; } static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, const struct nft_pktinfo *pkt) { const struct sk_buff *skb = pkt->skb; int off = skb_network_offset(skb); unsigned int len, nh_end; nh_end = pkt->flags & NFT_PKTINFO_L4PROTO ? nft_thoff(pkt) : skb->len; len = min_t(unsigned int, nh_end - skb_network_offset(skb), NFT_TRACETYPE_NETWORK_HSIZE); if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) return -1; if (pkt->flags & NFT_PKTINFO_L4PROTO) { len = min_t(unsigned int, skb->len - nft_thoff(pkt), NFT_TRACETYPE_TRANSPORT_HSIZE); if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, nft_thoff(pkt), len)) return -1; } if (!skb_mac_header_was_set(skb)) return 0; if (skb_vlan_tag_get(skb)) return nf_trace_fill_ll_header(nlskb, skb); off = skb_mac_header(skb) - skb->data; len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE); return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER, skb, off, len); } static int nf_trace_fill_rule_info(struct sk_buff *nlskb, const struct nft_verdict *verdict, const struct nft_rule_dp *rule, const struct nft_traceinfo *info) { if (!rule || rule->is_last) return 0; /* a continue verdict with ->type == RETURN means that this is * an implicit return (end of chain reached). * * Since no rule matched, the ->rule pointer is invalid. */ if (info->type == NFT_TRACETYPE_RETURN && verdict->code == NFT_CONTINUE) return 0; return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE, cpu_to_be64(rule->handle), NFTA_TRACE_PAD); } static bool nft_trace_have_verdict_chain(const struct nft_verdict *verdict, struct nft_traceinfo *info) { switch (info->type) { case NFT_TRACETYPE_RETURN: case NFT_TRACETYPE_RULE: break; default: return false; } switch (verdict->code) { case NFT_JUMP: case NFT_GOTO: break; default: return false; } return true; } static const struct nft_chain *nft_trace_get_chain(const struct nft_rule_dp *rule, const struct nft_traceinfo *info) { const struct nft_rule_dp_last *last; if (!rule) return &info->basechain->chain; while (!rule->is_last) rule = nft_rule_next(rule); last = (const struct nft_rule_dp_last *)rule; if (WARN_ON_ONCE(!last->chain)) return &info->basechain->chain; return last->chain; } void nft_trace_notify(const struct nft_pktinfo *pkt, const struct nft_verdict *verdict, const struct nft_rule_dp *rule, struct nft_traceinfo *info) { const struct nft_chain *chain; struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; u32 mark = 0; u16 event; if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) return; chain = nft_trace_get_chain(rule, info); size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(strlen(chain->table->name)) + nla_total_size(strlen(chain->name)) + nla_total_size_64bit(sizeof(__be64)) + /* rule handle */ nla_total_size(sizeof(__be32)) + /* trace type */ nla_total_size(0) + /* VERDICT, nested */ nla_total_size(sizeof(u32)) + /* verdict code */ nla_total_size(sizeof(u32)) + /* ct id */ nla_total_size(sizeof(u8)) + /* ct direction */ nla_total_size(sizeof(u32)) + /* ct state */ nla_total_size(sizeof(u32)) + /* ct status */ nla_total_size(sizeof(u32)) + /* trace id */ nla_total_size(NFT_TRACETYPE_LL_HSIZE) + nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) + nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) + nla_total_size(sizeof(u32)) + /* iif */ nla_total_size(sizeof(__be16)) + /* iiftype */ nla_total_size(sizeof(u32)) + /* oif */ nla_total_size(sizeof(__be16)) + /* oiftype */ nla_total_size(sizeof(u32)) + /* mark */ nla_total_size(sizeof(u32)) + /* nfproto */ nla_total_size(sizeof(u32)); /* policy */ if (nft_trace_have_verdict_chain(verdict, info)) size += nla_total_size(strlen(verdict->chain->name)); /* jump target */ skb = nlmsg_new(size, GFP_ATOMIC); if (!skb) return; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE); nlh = nfnl_msg_put(skb, 0, 0, event, 0, info->basechain->type->family, NFNETLINK_V0, 0); if (!nlh) goto nla_put_failure; if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt)))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) goto nla_put_failure; if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid)) goto nla_put_failure; if (nla_put_string(skb, NFTA_TRACE_CHAIN, chain->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_TRACE_TABLE, chain->table->name)) goto nla_put_failure; if (nf_trace_fill_rule_info(skb, verdict, rule, info)) goto nla_put_failure; switch (info->type) { case NFT_TRACETYPE_UNSPEC: case __NFT_TRACETYPE_MAX: break; case NFT_TRACETYPE_RETURN: case NFT_TRACETYPE_RULE: { unsigned int v; if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict)) goto nla_put_failure; /* pkt->skb undefined iff NF_STOLEN, disable dump */ v = verdict->code & NF_VERDICT_MASK; if (v == NF_STOLEN) info->packet_dumped = true; else mark = pkt->skb->mark; break; } case NFT_TRACETYPE_POLICY: mark = pkt->skb->mark; if (nla_put_be32(skb, NFTA_TRACE_POLICY, htonl(info->basechain->policy))) goto nla_put_failure; break; } if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark))) goto nla_put_failure; if (!info->packet_dumped) { if (nf_trace_fill_dev_info(skb, nft_in(pkt), nft_out(pkt))) goto nla_put_failure; if (nf_trace_fill_pkt_info(skb, pkt)) goto nla_put_failure; if (nf_trace_fill_ct_info(skb, pkt->skb)) goto nla_put_failure; info->packet_dumped = true; } nlmsg_end(skb, nlh); nfnetlink_send(skb, nft_net(pkt), 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); return; nla_put_failure: WARN_ON_ONCE(1); kfree_skb(skb); } void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, const struct nft_chain *chain) { static siphash_key_t trace_key __read_mostly; struct sk_buff *skb = pkt->skb; info->basechain = nft_base_chain(chain); info->trace = true; info->nf_trace = pkt->skb->nf_trace; info->packet_dumped = false; net_get_random_once(&trace_key, sizeof(trace_key)); info->skbid = (u32)siphash_3u32(hash32_ptr(skb), skb_get_hash_net(nft_net(pkt), skb), skb->skb_iif, &trace_key); }
1 13 13 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/netfilter.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/percpu.h> #include <linux/netdevice.h> #include <linux/security.h> #include <net/net_namespace.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #include <net/netfilter/nf_log.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> #include <linux/rculist_nulls.h> static bool enable_hooks __read_mostly; MODULE_PARM_DESC(enable_hooks, "Always enable conntrack hooks"); module_param(enable_hooks, bool, 0000); unsigned int nf_conntrack_net_id __read_mostly; #ifdef CONFIG_NF_CONNTRACK_PROCFS void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *l4proto) { switch (tuple->src.l3num) { case NFPROTO_IPV4: seq_printf(s, "src=%pI4 dst=%pI4 ", &tuple->src.u3.ip, &tuple->dst.u3.ip); break; case NFPROTO_IPV6: seq_printf(s, "src=%pI6 dst=%pI6 ", tuple->src.u3.ip6, tuple->dst.u3.ip6); break; default: break; } switch (l4proto->l4proto) { case IPPROTO_ICMP: seq_printf(s, "type=%u code=%u id=%u ", tuple->dst.u.icmp.type, tuple->dst.u.icmp.code, ntohs(tuple->src.u.icmp.id)); break; case IPPROTO_TCP: seq_printf(s, "sport=%hu dport=%hu ", ntohs(tuple->src.u.tcp.port), ntohs(tuple->dst.u.tcp.port)); break; case IPPROTO_UDPLITE: case IPPROTO_UDP: seq_printf(s, "sport=%hu dport=%hu ", ntohs(tuple->src.u.udp.port), ntohs(tuple->dst.u.udp.port)); break; case IPPROTO_SCTP: seq_printf(s, "sport=%hu dport=%hu ", ntohs(tuple->src.u.sctp.port), ntohs(tuple->dst.u.sctp.port)); break; case IPPROTO_ICMPV6: seq_printf(s, "type=%u code=%u id=%u ", tuple->dst.u.icmp.type, tuple->dst.u.icmp.code, ntohs(tuple->src.u.icmp.id)); break; case IPPROTO_GRE: seq_printf(s, "srckey=0x%x dstkey=0x%x ", ntohs(tuple->src.u.gre.key), ntohs(tuple->dst.u.gre.key)); break; default: break; } } EXPORT_SYMBOL_GPL(print_tuple); struct ct_iter_state { struct seq_net_private p; struct hlist_nulls_head *hash; unsigned int htable_size; unsigned int skip_elems; unsigned int bucket; u_int64_t time_now; }; static struct nf_conntrack_tuple_hash *ct_get_next(const struct net *net, struct ct_iter_state *st) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; unsigned int i; for (i = st->bucket; i < st->htable_size; i++) { unsigned int skip = 0; restart: hlist_nulls_for_each_entry_rcu(h, n, &st->hash[i], hnnode) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); struct hlist_nulls_node *tmp = n; if (!net_eq(net, nf_ct_net(ct))) continue; if (++skip <= st->skip_elems) continue; /* h should be returned, skip to nulls marker. */ while (!is_a_nulls(tmp)) tmp = rcu_dereference(hlist_nulls_next_rcu(tmp)); /* check if h is still linked to hash[i] */ if (get_nulls_value(tmp) != i) { skip = 0; goto restart; } st->skip_elems = skip; st->bucket = i; return h; } skip = 0; if (get_nulls_value(n) != i) goto restart; st->skip_elems = 0; } st->bucket = i; return NULL; } static void *ct_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct ct_iter_state *st = seq->private; struct net *net = seq_file_net(seq); st->time_now = ktime_get_real_ns(); rcu_read_lock(); nf_conntrack_get_ht(&st->hash, &st->htable_size); if (*pos == 0) { st->skip_elems = 0; st->bucket = 0; } else if (st->skip_elems) { /* resume from last dumped entry */ st->skip_elems--; } return ct_get_next(net, st); } static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) { struct ct_iter_state *st = s->private; struct net *net = seq_file_net(s); (*pos)++; return ct_get_next(net, st); } static void ct_seq_stop(struct seq_file *s, void *v) __releases(RCU) { rcu_read_unlock(); } #ifdef CONFIG_NF_CONNTRACK_SECMARK static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) { struct lsm_context ctx; int ret; ret = security_secid_to_secctx(ct->secmark, &ctx); if (ret < 0) return; seq_printf(s, "secctx=%s ", ctx.context); security_release_secctx(&ctx); } #else static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) { } #endif #ifdef CONFIG_NF_CONNTRACK_ZONES static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct, int dir) { const struct nf_conntrack_zone *zone = nf_ct_zone(ct); if (zone->dir != dir) return; switch (zone->dir) { case NF_CT_DEFAULT_ZONE_DIR: seq_printf(s, "zone=%u ", zone->id); break; case NF_CT_ZONE_DIR_ORIG: seq_printf(s, "zone-orig=%u ", zone->id); break; case NF_CT_ZONE_DIR_REPL: seq_printf(s, "zone-reply=%u ", zone->id); break; default: break; } } #else static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct, int dir) { } #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) { struct ct_iter_state *st = s->private; struct nf_conn_tstamp *tstamp; s64 delta_time; tstamp = nf_conn_tstamp_find(ct); if (tstamp) { delta_time = st->time_now - tstamp->start; if (delta_time > 0) delta_time = div_s64(delta_time, NSEC_PER_SEC); else delta_time = 0; seq_printf(s, "delta-time=%llu ", (unsigned long long)delta_time); } return; } #else static inline void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) { } #endif static const char* l3proto_name(u16 proto) { switch (proto) { case AF_INET: return "ipv4"; case AF_INET6: return "ipv6"; } return "unknown"; } static const char* l4proto_name(u16 proto) { switch (proto) { case IPPROTO_ICMP: return "icmp"; case IPPROTO_TCP: return "tcp"; case IPPROTO_UDP: return "udp"; case IPPROTO_GRE: return "gre"; case IPPROTO_SCTP: return "sctp"; case IPPROTO_UDPLITE: return "udplite"; case IPPROTO_ICMPV6: return "icmpv6"; } return "unknown"; } static void seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) { struct nf_conn_acct *acct; struct nf_conn_counter *counter; acct = nf_conn_acct_find(ct); if (!acct) return; counter = acct->counter; seq_printf(s, "packets=%llu bytes=%llu ", (unsigned long long)atomic64_read(&counter[dir].packets), (unsigned long long)atomic64_read(&counter[dir].bytes)); } /* return 0 on success, 1 in case of error */ static int ct_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_tuple_hash *hash = v; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); const struct nf_conntrack_l4proto *l4proto; struct net *net = seq_file_net(s); int ret = 0; WARN_ON(!ct); if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use))) return 0; /* load ->status after refcount increase */ smp_acquire__after_ctrl_dep(); if (nf_ct_should_gc(ct)) { struct ct_iter_state *st = s->private; st->skip_elems--; nf_ct_kill(ct); goto release; } /* we only want to print DIR_ORIGINAL */ if (NF_CT_DIRECTION(hash)) goto release; if (!net_eq(nf_ct_net(ct), net)) goto release; l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); ret = -ENOSPC; seq_printf(s, "%-8s %u %-8s %u ", l3proto_name(nf_ct_l3num(ct)), nf_ct_l3num(ct), l4proto_name(l4proto->l4proto), nf_ct_protonum(ct)); if (!test_bit(IPS_OFFLOAD_BIT, &ct->status)) seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ); if (l4proto->print_conntrack) l4proto->print_conntrack(s, ct); print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, l4proto); ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG); if (seq_has_overflowed(s)) goto release; seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL); if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) seq_puts(s, "[UNREPLIED] "); print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l4proto); ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL); seq_print_acct(s, ct, IP_CT_DIR_REPLY); if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status)) seq_puts(s, "[HW_OFFLOAD] "); else if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) seq_puts(s, "[OFFLOAD] "); else if (test_bit(IPS_ASSURED_BIT, &ct->status)) seq_puts(s, "[ASSURED] "); if (seq_has_overflowed(s)) goto release; #if defined(CONFIG_NF_CONNTRACK_MARK) seq_printf(s, "mark=%u ", READ_ONCE(ct->mark)); #endif ct_show_secctx(s, ct); ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR); ct_show_delta_time(s, ct); seq_printf(s, "use=%u\n", refcount_read(&ct->ct_general.use)); if (seq_has_overflowed(s)) goto release; ret = 0; release: nf_ct_put(ct); return ret; } static const struct seq_operations ct_seq_ops = { .start = ct_seq_start, .next = ct_seq_next, .stop = ct_seq_stop, .show = ct_seq_show }; static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) { struct net *net = seq_file_net(seq); int cpu; if (*pos == 0) return SEQ_START_TOKEN; for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; return per_cpu_ptr(net->ct.stat, cpu); } return NULL; } static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); int cpu; for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; return per_cpu_ptr(net->ct.stat, cpu); } (*pos)++; return NULL; } static void ct_cpu_seq_stop(struct seq_file *seq, void *v) { } static int ct_cpu_seq_show(struct seq_file *seq, void *v) { struct net *net = seq_file_net(seq); const struct ip_conntrack_stat *st = v; unsigned int nr_conntracks; if (v == SEQ_START_TOKEN) { seq_puts(seq, "entries clashres found new invalid ignore delete chainlength insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); return 0; } nr_conntracks = nf_conntrack_count(net); seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", nr_conntracks, st->clash_resolve, st->found, 0, st->invalid, 0, 0, st->chaintoolong, st->insert, st->insert_failed, st->drop, st->early_drop, st->error, st->expect_new, st->expect_create, st->expect_delete, st->search_restart ); return 0; } static const struct seq_operations ct_cpu_seq_ops = { .start = ct_cpu_seq_start, .next = ct_cpu_seq_next, .stop = ct_cpu_seq_stop, .show = ct_cpu_seq_show, }; static int nf_conntrack_standalone_init_proc(struct net *net) { struct proc_dir_entry *pde; kuid_t root_uid; kgid_t root_gid; pde = proc_create_net("nf_conntrack", 0440, net->proc_net, &ct_seq_ops, sizeof(struct ct_iter_state)); if (!pde) goto out_nf_conntrack; root_uid = make_kuid(net->user_ns, 0); root_gid = make_kgid(net->user_ns, 0); if (uid_valid(root_uid) && gid_valid(root_gid)) proc_set_user(pde, root_uid, root_gid); pde = proc_create_net("nf_conntrack", 0444, net->proc_net_stat, &ct_cpu_seq_ops, sizeof(struct seq_net_private)); if (!pde) goto out_stat_nf_conntrack; return 0; out_stat_nf_conntrack: remove_proc_entry("nf_conntrack", net->proc_net); out_nf_conntrack: return -ENOMEM; } static void nf_conntrack_standalone_fini_proc(struct net *net) { remove_proc_entry("nf_conntrack", net->proc_net_stat); remove_proc_entry("nf_conntrack", net->proc_net); } #else static int nf_conntrack_standalone_init_proc(struct net *net) { return 0; } static void nf_conntrack_standalone_fini_proc(struct net *net) { } #endif /* CONFIG_NF_CONNTRACK_PROCFS */ u32 nf_conntrack_count(const struct net *net) { const struct nf_conntrack_net *cnet = nf_ct_pernet(net); return atomic_read(&cnet->count); } EXPORT_SYMBOL_GPL(nf_conntrack_count); /* Sysctl support */ #ifdef CONFIG_SYSCTL /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; static int nf_conntrack_hash_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; /* module_param hashsize could have changed value */ nf_conntrack_htable_size_user = nf_conntrack_htable_size; ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret < 0 || !write) return ret; /* update ret, we might not be able to satisfy request */ ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user); /* update it to the actual value used by conntrack */ nf_conntrack_htable_size_user = nf_conntrack_htable_size; return ret; } static int nf_conntrack_log_invalid_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret, i; ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (ret < 0 || !write) return ret; if (*(u8 *)table->data == 0) return 0; /* Load nf_log_syslog only if no logger is currently registered */ for (i = 0; i < NFPROTO_NUMPROTO; i++) { if (nf_log_is_registered(i)) return 0; } request_module("%s", "nf_log_syslog"); return 0; } static struct ctl_table_header *nf_ct_netfilter_header; enum nf_ct_sysctl_index { NF_SYSCTL_CT_MAX, NF_SYSCTL_CT_COUNT, NF_SYSCTL_CT_BUCKETS, NF_SYSCTL_CT_CHECKSUM, NF_SYSCTL_CT_LOG_INVALID, NF_SYSCTL_CT_EXPECT_MAX, NF_SYSCTL_CT_ACCT, #ifdef CONFIG_NF_CONNTRACK_EVENTS NF_SYSCTL_CT_EVENTS, #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP NF_SYSCTL_CT_TIMESTAMP, #endif NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS, NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD, #endif NF_SYSCTL_CT_PROTO_TCP_LOOSE, NF_SYSCTL_CT_PROTO_TCP_LIBERAL, NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST, NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD, #endif NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP, NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6, #ifdef CONFIG_NF_CT_PROTO_SCTP NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT, #endif #ifdef CONFIG_NF_CT_PROTO_GRE NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, #endif NF_SYSCTL_CT_LAST_SYSCTL, }; static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_MAX] = { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, [NF_SYSCTL_CT_COUNT] = { .procname = "nf_conntrack_count", .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, }, [NF_SYSCTL_CT_BUCKETS] = { .procname = "nf_conntrack_buckets", .data = &nf_conntrack_htable_size_user, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = nf_conntrack_hash_sysctl, }, [NF_SYSCTL_CT_CHECKSUM] = { .procname = "nf_conntrack_checksum", .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_LOG_INVALID] = { .procname = "nf_conntrack_log_invalid", .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = nf_conntrack_log_invalid_sysctl, }, [NF_SYSCTL_CT_EXPECT_MAX] = { .procname = "nf_conntrack_expect_max", .data = &nf_ct_expect_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, [NF_SYSCTL_CT_ACCT] = { .procname = "nf_conntrack_acct", .data = &init_net.ct.sysctl_acct, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, #ifdef CONFIG_NF_CONNTRACK_EVENTS [NF_SYSCTL_CT_EVENTS] = { .procname = "nf_conntrack_events", .data = &init_net.ct.sysctl_events, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP [NF_SYSCTL_CT_TIMESTAMP] = { .procname = "nf_conntrack_timestamp", .data = &init_net.ct.sysctl_tstamp, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, #endif [NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = { .procname = "nf_conntrack_generic_timeout", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT] = { .procname = "nf_conntrack_tcp_timeout_syn_sent", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV] = { .procname = "nf_conntrack_tcp_timeout_syn_recv", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED] = { .procname = "nf_conntrack_tcp_timeout_established", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT] = { .procname = "nf_conntrack_tcp_timeout_fin_wait", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT] = { .procname = "nf_conntrack_tcp_timeout_close_wait", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK] = { .procname = "nf_conntrack_tcp_timeout_last_ack", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT] = { .procname = "nf_conntrack_tcp_timeout_time_wait", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE] = { .procname = "nf_conntrack_tcp_timeout_close", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS] = { .procname = "nf_conntrack_tcp_timeout_max_retrans", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK] = { .procname = "nf_conntrack_tcp_timeout_unacknowledged", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = { .procname = "nf_flowtable_tcp_timeout", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, #endif [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = { .procname = "nf_conntrack_tcp_loose", .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = { .procname = "nf_conntrack_tcp_be_liberal", .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST] = { .procname = "nf_conntrack_tcp_ignore_invalid_rst", .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { .procname = "nf_conntrack_tcp_max_retrans", .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP] = { .procname = "nf_conntrack_udp_timeout", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM] = { .procname = "nf_conntrack_udp_timeout_stream", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = { .procname = "nf_flowtable_udp_timeout", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, #endif [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = { .procname = "nf_conntrack_icmp_timeout", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6] = { .procname = "nf_conntrack_icmpv6_timeout", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, #ifdef CONFIG_NF_CT_PROTO_SCTP [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED] = { .procname = "nf_conntrack_sctp_timeout_closed", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT] = { .procname = "nf_conntrack_sctp_timeout_cookie_wait", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED] = { .procname = "nf_conntrack_sctp_timeout_cookie_echoed", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED] = { .procname = "nf_conntrack_sctp_timeout_established", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .procname = "nf_conntrack_sctp_timeout_shutdown_sent", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .procname = "nf_conntrack_sctp_timeout_shutdown_recd", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .procname = "nf_conntrack_sctp_timeout_heartbeat_sent", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, #endif #ifdef CONFIG_NF_CT_PROTO_GRE [NF_SYSCTL_CT_PROTO_TIMEOUT_GRE] = { .procname = "nf_conntrack_gre_timeout", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, [NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM] = { .procname = "nf_conntrack_gre_timeout_stream", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, #endif }; static struct ctl_table nf_ct_netfilter_table[] = { { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, }; static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, struct ctl_table *table) { struct nf_tcp_net *tn = nf_tcp_pernet(net); #define XASSIGN(XNAME, tn) \ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ ## XNAME].data = \ &(tn)->timeouts[TCP_CONNTRACK_ ## XNAME] XASSIGN(SYN_SENT, tn); XASSIGN(SYN_RECV, tn); XASSIGN(ESTABLISHED, tn); XASSIGN(FIN_WAIT, tn); XASSIGN(CLOSE_WAIT, tn); XASSIGN(LAST_ACK, tn); XASSIGN(TIME_WAIT, tn); XASSIGN(CLOSE, tn); XASSIGN(RETRANS, tn); XASSIGN(UNACK, tn); #undef XASSIGN #define XASSIGN(XNAME, rval) \ table[NF_SYSCTL_CT_PROTO_TCP_ ## XNAME].data = (rval) XASSIGN(LOOSE, &tn->tcp_loose); XASSIGN(LIBERAL, &tn->tcp_be_liberal); XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans); XASSIGN(IGNORE_INVALID_RST, &tn->tcp_ignore_invalid_rst); #undef XASSIGN #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout; #endif } static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, struct ctl_table *table) { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net *sn = nf_sctp_pernet(net); #define XASSIGN(XNAME, sn) \ table[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ ## XNAME].data = \ &(sn)->timeouts[SCTP_CONNTRACK_ ## XNAME] XASSIGN(CLOSED, sn); XASSIGN(COOKIE_WAIT, sn); XASSIGN(COOKIE_ECHOED, sn); XASSIGN(ESTABLISHED, sn); XASSIGN(SHUTDOWN_SENT, sn); XASSIGN(SHUTDOWN_RECD, sn); XASSIGN(SHUTDOWN_ACK_SENT, sn); XASSIGN(HEARTBEAT_SENT, sn); #undef XASSIGN #endif } static void nf_conntrack_standalone_init_gre_sysctl(struct net *net, struct ctl_table *table) { #ifdef CONFIG_NF_CT_PROTO_GRE struct nf_gre_net *gn = nf_gre_pernet(net); table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE].data = &gn->timeouts[GRE_CT_UNREPLIED]; table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM].data = &gn->timeouts[GRE_CT_REPLIED]; #endif } static int nf_conntrack_standalone_init_sysctl(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); struct nf_udp_net *un = nf_udp_pernet(net); struct ctl_table *table; BUILD_BUG_ON(ARRAY_SIZE(nf_ct_sysctl_table) != NF_SYSCTL_CT_LAST_SYSCTL); table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table), GFP_KERNEL); if (!table) return -ENOMEM; table[NF_SYSCTL_CT_COUNT].data = &cnet->count; table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum; table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid; table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct; #ifdef CONFIG_NF_CONNTRACK_EVENTS table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events; #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP table[NF_SYSCTL_CT_TIMESTAMP].data = &net->ct.sysctl_tstamp; #endif table[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC].data = &nf_generic_pernet(net)->timeout; table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP].data = &nf_icmp_pernet(net)->timeout; table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout; table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED]; table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED]; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout; #endif nf_conntrack_standalone_init_tcp_sysctl(net, table); nf_conntrack_standalone_init_sctp_sysctl(net, table); nf_conntrack_standalone_init_gre_sysctl(net, table); /* Don't allow non-init_net ns to alter global sysctls */ if (!net_eq(&init_net, net)) { table[NF_SYSCTL_CT_MAX].mode = 0444; table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444; table[NF_SYSCTL_CT_BUCKETS].mode = 0444; } cnet->sysctl_header = register_net_sysctl_sz(net, "net/netfilter", table, ARRAY_SIZE(nf_ct_sysctl_table)); if (!cnet->sysctl_header) goto out_unregister_netfilter; return 0; out_unregister_netfilter: kfree(table); return -ENOMEM; } static void nf_conntrack_standalone_fini_sysctl(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); const struct ctl_table *table; table = cnet->sysctl_header->ctl_table_arg; unregister_net_sysctl_table(cnet->sysctl_header); kfree(table); } #else static int nf_conntrack_standalone_init_sysctl(struct net *net) { return 0; } static void nf_conntrack_standalone_fini_sysctl(struct net *net) { } #endif /* CONFIG_SYSCTL */ static void nf_conntrack_fini_net(struct net *net) { if (enable_hooks) nf_ct_netns_put(net, NFPROTO_INET); nf_conntrack_standalone_fini_proc(net); nf_conntrack_standalone_fini_sysctl(net); } static int nf_conntrack_pernet_init(struct net *net) { int ret; net->ct.sysctl_checksum = 1; ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) return ret; ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; ret = nf_conntrack_init_net(net); if (ret < 0) goto out_init_net; if (enable_hooks) { ret = nf_ct_netns_get(net, NFPROTO_INET); if (ret < 0) goto out_hooks; } return 0; out_hooks: nf_conntrack_cleanup_net(net); out_init_net: nf_conntrack_standalone_fini_proc(net); out_proc: nf_conntrack_standalone_fini_sysctl(net); return ret; } static void nf_conntrack_pernet_exit(struct list_head *net_exit_list) { struct net *net; list_for_each_entry(net, net_exit_list, exit_list) nf_conntrack_fini_net(net); nf_conntrack_cleanup_net_list(net_exit_list); } static struct pernet_operations nf_conntrack_net_ops = { .init = nf_conntrack_pernet_init, .exit_batch = nf_conntrack_pernet_exit, .id = &nf_conntrack_net_id, .size = sizeof(struct nf_conntrack_net), }; static int __init nf_conntrack_standalone_init(void) { int ret = nf_conntrack_init_start(); if (ret < 0) goto out_start; BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER); #ifdef CONFIG_SYSCTL nf_ct_netfilter_header = register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); if (!nf_ct_netfilter_header) { pr_err("nf_conntrack: can't register to sysctl.\n"); ret = -ENOMEM; goto out_sysctl; } nf_conntrack_htable_size_user = nf_conntrack_htable_size; #endif nf_conntrack_init_end(); ret = register_pernet_subsys(&nf_conntrack_net_ops); if (ret < 0) goto out_pernet; return 0; out_pernet: #ifdef CONFIG_SYSCTL unregister_net_sysctl_table(nf_ct_netfilter_header); out_sysctl: #endif nf_conntrack_cleanup_end(); out_start: return ret; } static void __exit nf_conntrack_standalone_fini(void) { nf_conntrack_cleanup_start(); unregister_pernet_subsys(&nf_conntrack_net_ops); #ifdef CONFIG_SYSCTL unregister_net_sysctl_table(nf_ct_netfilter_header); #endif nf_conntrack_cleanup_end(); } module_init(nf_conntrack_standalone_init); module_exit(nf_conntrack_standalone_fini);
84 85 86 189 187 9 189 1 188 186 1 189 189 187 189 12 12 1 1 4 4 1 1 2 12 12 12 1 1 1 1 6 8 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2007-2014 Nicira, Inc. */ #include "flow.h" #include "datapath.h" #include "flow_netlink.h" #include <linux/uaccess.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <net/llc_pdu.h> #include <linux/kernel.h> #include <linux/jhash.h> #include <linux/jiffies.h> #include <linux/llc.h> #include <linux/module.h> #include <linux/in.h> #include <linux/rcupdate.h> #include <linux/cpumask.h> #include <linux/if_arp.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/sctp.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> #include <linux/icmpv6.h> #include <linux/rculist.h> #include <linux/sort.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ndisc.h> #define TBL_MIN_BUCKETS 1024 #define MASK_ARRAY_SIZE_MIN 16 #define REHASH_INTERVAL (10 * 60 * HZ) #define MC_DEFAULT_HASH_ENTRIES 256 #define MC_HASH_SHIFT 8 #define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT) static struct kmem_cache *flow_cache; struct kmem_cache *flow_stats_cache __read_mostly; static u16 range_n_bytes(const struct sw_flow_key_range *range) { return range->end - range->start; } void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, bool full, const struct sw_flow_mask *mask) { int start = full ? 0 : mask->range.start; int len = full ? sizeof *dst : range_n_bytes(&mask->range); const long *m = (const long *)((const u8 *)&mask->key + start); const long *s = (const long *)((const u8 *)src + start); long *d = (long *)((u8 *)dst + start); int i; /* If 'full' is true then all of 'dst' is fully initialized. Otherwise, * if 'full' is false the memory outside of the 'mask->range' is left * uninitialized. This can be used as an optimization when further * operations on 'dst' only use contents within 'mask->range'. */ for (i = 0; i < len; i += sizeof(long)) *d++ = *s++ & *m++; } struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; struct sw_flow_stats *stats; flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL); if (!flow) return ERR_PTR(-ENOMEM); flow->stats_last_writer = -1; flow->cpu_used_mask = (struct cpumask *)&flow->stats[nr_cpu_ids]; /* Initialize the default stat node. */ stats = kmem_cache_alloc_node(flow_stats_cache, GFP_KERNEL | __GFP_ZERO, node_online(0) ? 0 : NUMA_NO_NODE); if (!stats) goto err; spin_lock_init(&stats->lock); RCU_INIT_POINTER(flow->stats[0], stats); cpumask_set_cpu(0, flow->cpu_used_mask); return flow; err: kmem_cache_free(flow_cache, flow); return ERR_PTR(-ENOMEM); } int ovs_flow_tbl_count(const struct flow_table *table) { return table->count; } static void flow_free(struct sw_flow *flow) { unsigned int cpu; if (ovs_identifier_is_key(&flow->id)) kfree(flow->id.unmasked_key); if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *) flow->sf_acts); for_each_cpu(cpu, flow->cpu_used_mask) { if (flow->stats[cpu]) kmem_cache_free(flow_stats_cache, (struct sw_flow_stats __force *)flow->stats[cpu]); } kmem_cache_free(flow_cache, flow); } static void rcu_free_flow_callback(struct rcu_head *rcu) { struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); flow_free(flow); } void ovs_flow_free(struct sw_flow *flow, bool deferred) { if (!flow) return; if (deferred) call_rcu(&flow->rcu, rcu_free_flow_callback); else flow_free(flow); } static void __table_instance_destroy(struct table_instance *ti) { kvfree(ti->buckets); kfree(ti); } static struct table_instance *table_instance_alloc(int new_size) { struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL); int i; if (!ti) return NULL; ti->buckets = kvmalloc_array(new_size, sizeof(struct hlist_head), GFP_KERNEL); if (!ti->buckets) { kfree(ti); return NULL; } for (i = 0; i < new_size; i++) INIT_HLIST_HEAD(&ti->buckets[i]); ti->n_buckets = new_size; ti->node_ver = 0; get_random_bytes(&ti->hash_seed, sizeof(u32)); return ti; } static void __mask_array_destroy(struct mask_array *ma) { free_percpu(ma->masks_usage_stats); kfree(ma); } static void mask_array_rcu_cb(struct rcu_head *rcu) { struct mask_array *ma = container_of(rcu, struct mask_array, rcu); __mask_array_destroy(ma); } static void tbl_mask_array_reset_counters(struct mask_array *ma) { int i, cpu; /* As the per CPU counters are not atomic we can not go ahead and * reset them from another CPU. To be able to still have an approximate * zero based counter we store the value at reset, and subtract it * later when processing. */ for (i = 0; i < ma->max; i++) { ma->masks_usage_zero_cntr[i] = 0; for_each_possible_cpu(cpu) { struct mask_array_stats *stats; unsigned int start; u64 counter; stats = per_cpu_ptr(ma->masks_usage_stats, cpu); do { start = u64_stats_fetch_begin(&stats->syncp); counter = stats->usage_cntrs[i]; } while (u64_stats_fetch_retry(&stats->syncp, start)); ma->masks_usage_zero_cntr[i] += counter; } } } static struct mask_array *tbl_mask_array_alloc(int size) { struct mask_array *new; size = max(MASK_ARRAY_SIZE_MIN, size); new = kzalloc(struct_size(new, masks, size) + sizeof(u64) * size, GFP_KERNEL); if (!new) return NULL; new->masks_usage_zero_cntr = (u64 *)((u8 *)new + struct_size(new, masks, size)); new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) + sizeof(u64) * size, __alignof__(u64)); if (!new->masks_usage_stats) { kfree(new); return NULL; } new->count = 0; new->max = size; return new; } static int tbl_mask_array_realloc(struct flow_table *tbl, int size) { struct mask_array *old; struct mask_array *new; new = tbl_mask_array_alloc(size); if (!new) return -ENOMEM; old = ovsl_dereference(tbl->mask_array); if (old) { int i; for (i = 0; i < old->max; i++) { if (ovsl_dereference(old->masks[i])) new->masks[new->count++] = old->masks[i]; } call_rcu(&old->rcu, mask_array_rcu_cb); } rcu_assign_pointer(tbl->mask_array, new); return 0; } static int tbl_mask_array_add_mask(struct flow_table *tbl, struct sw_flow_mask *new) { struct mask_array *ma = ovsl_dereference(tbl->mask_array); int err, ma_count = READ_ONCE(ma->count); if (ma_count >= ma->max) { err = tbl_mask_array_realloc(tbl, ma->max + MASK_ARRAY_SIZE_MIN); if (err) return err; ma = ovsl_dereference(tbl->mask_array); } else { /* On every add or delete we need to reset the counters so * every new mask gets a fair chance of being prioritized. */ tbl_mask_array_reset_counters(ma); } BUG_ON(ovsl_dereference(ma->masks[ma_count])); rcu_assign_pointer(ma->masks[ma_count], new); WRITE_ONCE(ma->count, ma_count + 1); return 0; } static void tbl_mask_array_del_mask(struct flow_table *tbl, struct sw_flow_mask *mask) { struct mask_array *ma = ovsl_dereference(tbl->mask_array); int i, ma_count = READ_ONCE(ma->count); /* Remove the deleted mask pointers from the array */ for (i = 0; i < ma_count; i++) { if (mask == ovsl_dereference(ma->masks[i])) goto found; } BUG(); return; found: WRITE_ONCE(ma->count, ma_count - 1); rcu_assign_pointer(ma->masks[i], ma->masks[ma_count - 1]); RCU_INIT_POINTER(ma->masks[ma_count - 1], NULL); kfree_rcu(mask, rcu); /* Shrink the mask array if necessary. */ if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && ma_count <= (ma->max / 3)) tbl_mask_array_realloc(tbl, ma->max / 2); else tbl_mask_array_reset_counters(ma); } /* Remove 'mask' from the mask list, if it is not needed any more. */ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) { if (mask) { /* ovs-lock is required to protect mask-refcount and * mask list. */ ASSERT_OVSL(); BUG_ON(!mask->ref_count); mask->ref_count--; if (!mask->ref_count) tbl_mask_array_del_mask(tbl, mask); } } static void __mask_cache_destroy(struct mask_cache *mc) { free_percpu(mc->mask_cache); kfree(mc); } static void mask_cache_rcu_cb(struct rcu_head *rcu) { struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu); __mask_cache_destroy(mc); } static struct mask_cache *tbl_mask_cache_alloc(u32 size) { struct mask_cache_entry __percpu *cache = NULL; struct mask_cache *new; /* Only allow size to be 0, or a power of 2, and does not exceed * percpu allocation size. */ if ((!is_power_of_2(size) && size != 0) || (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE) return NULL; new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return NULL; new->cache_size = size; if (new->cache_size > 0) { cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry), new->cache_size), __alignof__(struct mask_cache_entry)); if (!cache) { kfree(new); return NULL; } } new->mask_cache = cache; return new; } int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size) { struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache); struct mask_cache *new; if (size == mc->cache_size) return 0; if ((!is_power_of_2(size) && size != 0) || (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE) return -EINVAL; new = tbl_mask_cache_alloc(size); if (!new) return -ENOMEM; rcu_assign_pointer(table->mask_cache, new); call_rcu(&mc->rcu, mask_cache_rcu_cb); return 0; } int ovs_flow_tbl_init(struct flow_table *table) { struct table_instance *ti, *ufid_ti; struct mask_cache *mc; struct mask_array *ma; mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES); if (!mc) return -ENOMEM; ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN); if (!ma) goto free_mask_cache; ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ti) goto free_mask_array; ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ufid_ti) goto free_ti; rcu_assign_pointer(table->ti, ti); rcu_assign_pointer(table->ufid_ti, ufid_ti); rcu_assign_pointer(table->mask_array, ma); rcu_assign_pointer(table->mask_cache, mc); table->last_rehash = jiffies; table->count = 0; table->ufid_count = 0; return 0; free_ti: __table_instance_destroy(ti); free_mask_array: __mask_array_destroy(ma); free_mask_cache: __mask_cache_destroy(mc); return -ENOMEM; } static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) { struct table_instance *ti; ti = container_of(rcu, struct table_instance, rcu); __table_instance_destroy(ti); } static void table_instance_flow_free(struct flow_table *table, struct table_instance *ti, struct table_instance *ufid_ti, struct sw_flow *flow) { hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); table->count--; if (ovs_identifier_is_ufid(&flow->id)) { hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); table->ufid_count--; } flow_mask_remove(table, flow->mask); } /* Must be called with OVS mutex held. */ void table_instance_flow_flush(struct flow_table *table, struct table_instance *ti, struct table_instance *ufid_ti) { int i; for (i = 0; i < ti->n_buckets; i++) { struct hlist_head *head = &ti->buckets[i]; struct hlist_node *n; struct sw_flow *flow; hlist_for_each_entry_safe(flow, n, head, flow_table.node[ti->node_ver]) { table_instance_flow_free(table, ti, ufid_ti, flow); ovs_flow_free(flow, true); } } if (WARN_ON(table->count != 0 || table->ufid_count != 0)) { table->count = 0; table->ufid_count = 0; } } static void table_instance_destroy(struct table_instance *ti, struct table_instance *ufid_ti) { call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); } /* No need for locking this function is called from RCU callback or * error path. */ void ovs_flow_tbl_destroy(struct flow_table *table) { struct table_instance *ti = rcu_dereference_raw(table->ti); struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); struct mask_cache *mc = rcu_dereference_raw(table->mask_cache); struct mask_array *ma = rcu_dereference_raw(table->mask_array); call_rcu(&mc->rcu, mask_cache_rcu_cb); call_rcu(&ma->rcu, mask_array_rcu_cb); table_instance_destroy(ti, ufid_ti); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, u32 *bucket, u32 *last) { struct sw_flow *flow; struct hlist_head *head; int ver; int i; ver = ti->node_ver; while (*bucket < ti->n_buckets) { i = 0; head = &ti->buckets[*bucket]; hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) { if (i < *last) { i++; continue; } *last = i + 1; return flow; } (*bucket)++; *last = 0; } return NULL; } static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) { hash = jhash_1word(hash, ti->hash_seed); return &ti->buckets[hash & (ti->n_buckets - 1)]; } static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow) { struct hlist_head *head; head = find_bucket(ti, flow->flow_table.hash); hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head); } static void ufid_table_instance_insert(struct table_instance *ti, struct sw_flow *flow) { struct hlist_head *head; head = find_bucket(ti, flow->ufid_table.hash); hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head); } static void flow_table_copy_flows(struct table_instance *old, struct table_instance *new, bool ufid) { int old_ver; int i; old_ver = old->node_ver; new->node_ver = !old_ver; /* Insert in new table. */ for (i = 0; i < old->n_buckets; i++) { struct sw_flow *flow; struct hlist_head *head = &old->buckets[i]; if (ufid) hlist_for_each_entry_rcu(flow, head, ufid_table.node[old_ver], lockdep_ovsl_is_held()) ufid_table_instance_insert(new, flow); else hlist_for_each_entry_rcu(flow, head, flow_table.node[old_ver], lockdep_ovsl_is_held()) table_instance_insert(new, flow); } } static struct table_instance *table_instance_rehash(struct table_instance *ti, int n_buckets, bool ufid) { struct table_instance *new_ti; new_ti = table_instance_alloc(n_buckets); if (!new_ti) return NULL; flow_table_copy_flows(ti, new_ti, ufid); return new_ti; } int ovs_flow_tbl_flush(struct flow_table *flow_table) { struct table_instance *old_ti, *new_ti; struct table_instance *old_ufid_ti, *new_ufid_ti; new_ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!new_ti) return -ENOMEM; new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!new_ufid_ti) goto err_free_ti; old_ti = ovsl_dereference(flow_table->ti); old_ufid_ti = ovsl_dereference(flow_table->ufid_ti); rcu_assign_pointer(flow_table->ti, new_ti); rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti); flow_table->last_rehash = jiffies; table_instance_flow_flush(flow_table, old_ti, old_ufid_ti); table_instance_destroy(old_ti, old_ufid_ti); return 0; err_free_ti: __table_instance_destroy(new_ti); return -ENOMEM; } static u32 flow_hash(const struct sw_flow_key *key, const struct sw_flow_key_range *range) { const u32 *hash_key = (const u32 *)((const u8 *)key + range->start); /* Make sure number of hash bytes are multiple of u32. */ int hash_u32s = range_n_bytes(range) >> 2; return jhash2(hash_key, hash_u32s, 0); } static int flow_key_start(const struct sw_flow_key *key) { if (key->tun_proto) return 0; else return rounddown(offsetof(struct sw_flow_key, phy), sizeof(long)); } static bool cmp_key(const struct sw_flow_key *key1, const struct sw_flow_key *key2, int key_start, int key_end) { const long *cp1 = (const long *)((const u8 *)key1 + key_start); const long *cp2 = (const long *)((const u8 *)key2 + key_start); int i; for (i = key_start; i < key_end; i += sizeof(long)) if (*cp1++ ^ *cp2++) return false; return true; } static bool flow_cmp_masked_key(const struct sw_flow *flow, const struct sw_flow_key *key, const struct sw_flow_key_range *range) { return cmp_key(&flow->key, key, range->start, range->end); } static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, const struct sw_flow_match *match) { struct sw_flow_key *key = match->key; int key_start = flow_key_start(key); int key_end = match->range.end; BUG_ON(ovs_identifier_is_ufid(&flow->id)); return cmp_key(flow->id.unmasked_key, key, key_start, key_end); } static struct sw_flow *masked_flow_lookup(struct table_instance *ti, const struct sw_flow_key *unmasked, const struct sw_flow_mask *mask, u32 *n_mask_hit) { struct sw_flow *flow; struct hlist_head *head; u32 hash; struct sw_flow_key masked_key; ovs_flow_mask_key(&masked_key, unmasked, false, mask); hash = flow_hash(&masked_key, &mask->range); head = find_bucket(ti, hash); (*n_mask_hit)++; hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver], lockdep_ovsl_is_held()) { if (flow->mask == mask && flow->flow_table.hash == hash && flow_cmp_masked_key(flow, &masked_key, &mask->range)) return flow; } return NULL; } /* Flow lookup does full lookup on flow table. It starts with * mask from index passed in *index. * This function MUST be called with BH disabled due to the use * of CPU specific variables. */ static struct sw_flow *flow_lookup(struct flow_table *tbl, struct table_instance *ti, struct mask_array *ma, const struct sw_flow_key *key, u32 *n_mask_hit, u32 *n_cache_hit, u32 *index) { struct mask_array_stats *stats = this_cpu_ptr(ma->masks_usage_stats); struct sw_flow *flow; struct sw_flow_mask *mask; int i; if (likely(*index < ma->max)) { mask = rcu_dereference_ovsl(ma->masks[*index]); if (mask) { flow = masked_flow_lookup(ti, key, mask, n_mask_hit); if (flow) { u64_stats_update_begin(&stats->syncp); stats->usage_cntrs[*index]++; u64_stats_update_end(&stats->syncp); (*n_cache_hit)++; return flow; } } } for (i = 0; i < ma->max; i++) { if (i == *index) continue; mask = rcu_dereference_ovsl(ma->masks[i]); if (unlikely(!mask)) break; flow = masked_flow_lookup(ti, key, mask, n_mask_hit); if (flow) { /* Found */ *index = i; u64_stats_update_begin(&stats->syncp); stats->usage_cntrs[*index]++; u64_stats_update_end(&stats->syncp); return flow; } } return NULL; } /* * mask_cache maps flow to probable mask. This cache is not tightly * coupled cache, It means updates to mask list can result in inconsistent * cache entry in mask cache. * This is per cpu cache and is divided in MC_HASH_SEGS segments. * In case of a hash collision the entry is hashed in next segment. * */ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, const struct sw_flow_key *key, u32 skb_hash, u32 *n_mask_hit, u32 *n_cache_hit) { struct mask_cache *mc = rcu_dereference(tbl->mask_cache); struct mask_array *ma = rcu_dereference(tbl->mask_array); struct table_instance *ti = rcu_dereference(tbl->ti); struct mask_cache_entry *entries, *ce; struct sw_flow *flow; u32 hash; int seg; *n_mask_hit = 0; *n_cache_hit = 0; if (unlikely(!skb_hash || mc->cache_size == 0)) { u32 mask_index = 0; u32 cache = 0; return flow_lookup(tbl, ti, ma, key, n_mask_hit, &cache, &mask_index); } /* Pre and post recirulation flows usually have the same skb_hash * value. To avoid hash collisions, rehash the 'skb_hash' with * 'recirc_id'. */ if (key->recirc_id) skb_hash = jhash_1word(skb_hash, key->recirc_id); ce = NULL; hash = skb_hash; entries = this_cpu_ptr(mc->mask_cache); /* Find the cache entry 'ce' to operate on. */ for (seg = 0; seg < MC_HASH_SEGS; seg++) { int index = hash & (mc->cache_size - 1); struct mask_cache_entry *e; e = &entries[index]; if (e->skb_hash == skb_hash) { flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, n_cache_hit, &e->mask_index); if (!flow) e->skb_hash = 0; return flow; } if (!ce || e->skb_hash < ce->skb_hash) ce = e; /* A better replacement cache candidate. */ hash >>= MC_HASH_SHIFT; } /* Cache miss, do full lookup. */ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, n_cache_hit, &ce->mask_index); if (flow) ce->skb_hash = skb_hash; *n_cache_hit = 0; return flow; } struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, const struct sw_flow_key *key) { struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); u32 __always_unused n_mask_hit; u32 __always_unused n_cache_hit; struct sw_flow *flow; u32 index = 0; /* This function gets called trough the netlink interface and therefore * is preemptible. However, flow_lookup() function needs to be called * with BH disabled due to CPU specific variables. */ local_bh_disable(); flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index); local_bh_enable(); return flow; } struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, const struct sw_flow_match *match) { struct mask_array *ma = ovsl_dereference(tbl->mask_array); int i; /* Always called under ovs-mutex. */ for (i = 0; i < ma->max; i++) { struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); u32 __always_unused n_mask_hit; struct sw_flow_mask *mask; struct sw_flow *flow; mask = ovsl_dereference(ma->masks[i]); if (!mask) continue; flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit); if (flow && ovs_identifier_is_key(&flow->id) && ovs_flow_cmp_unmasked_key(flow, match)) { return flow; } } return NULL; } static u32 ufid_hash(const struct sw_flow_id *sfid) { return jhash(sfid->ufid, sfid->ufid_len, 0); } static bool ovs_flow_cmp_ufid(const struct sw_flow *flow, const struct sw_flow_id *sfid) { if (flow->id.ufid_len != sfid->ufid_len) return false; return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len); } bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match) { if (ovs_identifier_is_ufid(&flow->id)) return flow_cmp_masked_key(flow, match->key, &match->range); return ovs_flow_cmp_unmasked_key(flow, match); } struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, const struct sw_flow_id *ufid) { struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti); struct sw_flow *flow; struct hlist_head *head; u32 hash; hash = ufid_hash(ufid); head = find_bucket(ti, hash); hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver], lockdep_ovsl_is_held()) { if (flow->ufid_table.hash == hash && ovs_flow_cmp_ufid(flow, ufid)) return flow; } return NULL; } int ovs_flow_tbl_num_masks(const struct flow_table *table) { struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); return READ_ONCE(ma->count); } u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table) { struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache); return READ_ONCE(mc->cache_size); } static struct table_instance *table_instance_expand(struct table_instance *ti, bool ufid) { return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } /* Must be called with OVS mutex held. */ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti = ovsl_dereference(table->ti); struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); BUG_ON(table->count == 0); table_instance_flow_free(table, ti, ufid_ti, flow); } static struct sw_flow_mask *mask_alloc(void) { struct sw_flow_mask *mask; mask = kmalloc(sizeof(*mask), GFP_KERNEL); if (mask) mask->ref_count = 1; return mask; } static bool mask_equal(const struct sw_flow_mask *a, const struct sw_flow_mask *b) { const u8 *a_ = (const u8 *)&a->key + a->range.start; const u8 *b_ = (const u8 *)&b->key + b->range.start; return (a->range.end == b->range.end) && (a->range.start == b->range.start) && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); } static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, const struct sw_flow_mask *mask) { struct mask_array *ma; int i; ma = ovsl_dereference(tbl->mask_array); for (i = 0; i < ma->max; i++) { struct sw_flow_mask *t; t = ovsl_dereference(ma->masks[i]); if (t && mask_equal(mask, t)) return t; } return NULL; } /* Add 'mask' into the mask list, if it is not already there. */ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, const struct sw_flow_mask *new) { struct sw_flow_mask *mask; mask = flow_mask_find(tbl, new); if (!mask) { /* Allocate a new mask if none exists. */ mask = mask_alloc(); if (!mask) return -ENOMEM; mask->key = new->key; mask->range = new->range; /* Add mask to mask-list. */ if (tbl_mask_array_add_mask(tbl, mask)) { kfree(mask); return -ENOMEM; } } else { BUG_ON(!mask->ref_count); mask->ref_count++; } flow->mask = mask; return 0; } /* Must be called with OVS mutex held. */ static void flow_key_insert(struct flow_table *table, struct sw_flow *flow) { struct table_instance *new_ti = NULL; struct table_instance *ti; flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range); ti = ovsl_dereference(table->ti); table_instance_insert(ti, flow); table->count++; /* Expand table, if necessary, to make room. */ if (table->count > ti->n_buckets) new_ti = table_instance_expand(ti, false); else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) new_ti = table_instance_rehash(ti, ti->n_buckets, false); if (new_ti) { rcu_assign_pointer(table->ti, new_ti); call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); table->last_rehash = jiffies; } } /* Must be called with OVS mutex held. */ static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti; flow->ufid_table.hash = ufid_hash(&flow->id); ti = ovsl_dereference(table->ufid_ti); ufid_table_instance_insert(ti, flow); table->ufid_count++; /* Expand table, if necessary, to make room. */ if (table->ufid_count > ti->n_buckets) { struct table_instance *new_ti; new_ti = table_instance_expand(ti, true); if (new_ti) { rcu_assign_pointer(table->ufid_ti, new_ti); call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); } } } /* Must be called with OVS mutex held. */ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, const struct sw_flow_mask *mask) { int err; err = flow_mask_insert(table, flow, mask); if (err) return err; flow_key_insert(table, flow); if (ovs_identifier_is_ufid(&flow->id)) flow_ufid_insert(table, flow); return 0; } static int compare_mask_and_count(const void *a, const void *b) { const struct mask_count *mc_a = a; const struct mask_count *mc_b = b; return (s64)mc_b->counter - (s64)mc_a->counter; } /* Must be called with OVS mutex held. */ void ovs_flow_masks_rebalance(struct flow_table *table) { struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); struct mask_count *masks_and_count; struct mask_array *new; int masks_entries = 0; int i; /* Build array of all current entries with use counters. */ masks_and_count = kmalloc_array(ma->max, sizeof(*masks_and_count), GFP_KERNEL); if (!masks_and_count) return; for (i = 0; i < ma->max; i++) { struct sw_flow_mask *mask; int cpu; mask = rcu_dereference_ovsl(ma->masks[i]); if (unlikely(!mask)) break; masks_and_count[i].index = i; masks_and_count[i].counter = 0; for_each_possible_cpu(cpu) { struct mask_array_stats *stats; unsigned int start; u64 counter; stats = per_cpu_ptr(ma->masks_usage_stats, cpu); do { start = u64_stats_fetch_begin(&stats->syncp); counter = stats->usage_cntrs[i]; } while (u64_stats_fetch_retry(&stats->syncp, start)); masks_and_count[i].counter += counter; } /* Subtract the zero count value. */ masks_and_count[i].counter -= ma->masks_usage_zero_cntr[i]; /* Rather than calling tbl_mask_array_reset_counters() * below when no change is needed, do it inline here. */ ma->masks_usage_zero_cntr[i] += masks_and_count[i].counter; } if (i == 0) goto free_mask_entries; /* Sort the entries */ masks_entries = i; sort(masks_and_count, masks_entries, sizeof(*masks_and_count), compare_mask_and_count, NULL); /* If the order is the same, nothing to do... */ for (i = 0; i < masks_entries; i++) { if (i != masks_and_count[i].index) break; } if (i == masks_entries) goto free_mask_entries; /* Rebuilt the new list in order of usage. */ new = tbl_mask_array_alloc(ma->max); if (!new) goto free_mask_entries; for (i = 0; i < masks_entries; i++) { int index = masks_and_count[i].index; if (ovsl_dereference(ma->masks[index])) new->masks[new->count++] = ma->masks[index]; } rcu_assign_pointer(table->mask_array, new); call_rcu(&ma->rcu, mask_array_rcu_cb); free_mask_entries: kfree(masks_and_count); } /* Initializes the flow module. * Returns zero if successful or a negative error code. */ int ovs_flow_init(void) { BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) + (nr_cpu_ids * sizeof(struct sw_flow_stats *)) + cpumask_size(), 0, 0, NULL); if (flow_cache == NULL) return -ENOMEM; flow_stats_cache = kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats), 0, SLAB_HWCACHE_ALIGN, NULL); if (flow_stats_cache == NULL) { kmem_cache_destroy(flow_cache); flow_cache = NULL; return -ENOMEM; } return 0; } /* Uninitializes the flow module. */ void ovs_flow_exit(void) { kmem_cache_destroy(flow_stats_cache); kmem_cache_destroy(flow_cache); }
35 9 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 // SPDX-License-Identifier: GPL-2.0 /* * fs/partitions/osf.c * * Code extracted from drivers/block/genhd.c * * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King */ #include "check.h" #define MAX_OSF_PARTITIONS 18 #define DISKLABELMAGIC (0x82564557UL) int osf_partition(struct parsed_partitions *state) { int i; int slot = 1; unsigned int npartitions; Sector sect; unsigned char *data; struct disklabel { __le32 d_magic; __le16 d_type,d_subtype; u8 d_typename[16]; u8 d_packname[16]; __le32 d_secsize; __le32 d_nsectors; __le32 d_ntracks; __le32 d_ncylinders; __le32 d_secpercyl; __le32 d_secprtunit; __le16 d_sparespertrack; __le16 d_sparespercyl; __le32 d_acylinders; __le16 d_rpm, d_interleave, d_trackskew, d_cylskew; __le32 d_headswitch, d_trkseek, d_flags; __le32 d_drivedata[5]; __le32 d_spare[5]; __le32 d_magic2; __le16 d_checksum; __le16 d_npartitions; __le32 d_bbsize, d_sbsize; struct d_partition { __le32 p_size; __le32 p_offset; __le32 p_fsize; u8 p_fstype; u8 p_frag; __le16 p_cpg; } d_partitions[MAX_OSF_PARTITIONS]; } * label; struct d_partition * partition; data = read_part_sector(state, 0, &sect); if (!data) return -1; label = (struct disklabel *) (data+64); partition = label->d_partitions; if (le32_to_cpu(label->d_magic) != DISKLABELMAGIC) { put_dev_sector(sect); return 0; } if (le32_to_cpu(label->d_magic2) != DISKLABELMAGIC) { put_dev_sector(sect); return 0; } npartitions = le16_to_cpu(label->d_npartitions); if (npartitions > MAX_OSF_PARTITIONS) { put_dev_sector(sect); return 0; } for (i = 0 ; i < npartitions; i++, partition++) { if (slot == state->limit) break; if (le32_to_cpu(partition->p_size)) put_partition(state, slot, le32_to_cpu(partition->p_offset), le32_to_cpu(partition->p_size)); slot++; } strlcat(state->pp_buf, "\n", PAGE_SIZE); put_dev_sector(sect); return 1; }
13 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234 8235 8236 8237 8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521 8522 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 8605 8606 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635 8636 8637 8638 8639 8640 8641 8642 8643 8644 8645 8646 8647 8648 8649 8650 8651 8652 8653 8654 8655 8656 8657 8658 8659 8660 8661 8662 8663 8664 8665 8666 8667 8668 8669 8670 8671 8672 8673 8674 8675 8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 8694 8695 8696 8697 8698 8699 8700 8701 8702 8703 8704 8705 8706 8707 8708 8709 8710 8711 8712 8713 8714 8715 8716 8717 8718 8719 8720 8721 8722 8723 8724 8725 8726 8727 8728 8729 8730 8731 8732 8733 8734 8735 8736 8737 8738 8739 8740 8741 8742 8743 8744 8745 8746 8747 8748 8749 8750 8751 8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834 8835 8836 8837 8838 8839 8840 8841 8842 8843 8844 8845 8846 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920 8921 8922 8923 8924 8925 8926 8927 8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400 9401 9402 9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421 9422 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448 9449 9450 9451 9452 9453 9454 9455 9456 9457 9458 9459 9460 9461 9462 9463 9464 9465 9466 9467 9468 9469 9470 9471 9472 9473 9474 9475 9476 9477 9478 9479 9480 9481 9482 9483 9484 9485 9486 9487 9488 9489 9490 9491 9492 9493 9494 9495 9496 9497 9498 9499 9500 9501 9502 9503 9504 9505 9506 9507 9508 9509 9510 9511 9512 9513 9514 9515 9516 9517 9518 9519 9520 9521 9522 9523 9524 9525 9526 9527 9528 9529 9530 9531 9532 9533 /* * Copyright (c) 2001 The Regents of the University of Michigan. * All rights reserved. * * Kendrick Smith <kmsmith@umich.edu> * Andy Adamson <kandros@umich.edu> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include <linux/file.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/swap.h> #include <linux/pagemap.h> #include <linux/ratelimit.h> #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/addr.h> #include <linux/jhash.h> #include <linux/string_helpers.h> #include <linux/fsnotify.h> #include <linux/rhashtable.h> #include <linux/nfs_ssc.h> #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" #include "current_stateid.h" #include "netns.h" #include "pnfs.h" #include "filecache.h" #include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_PROC #define all_ones {{ ~0, ~0}, ~0} static const stateid_t one_stateid = { .si_generation = ~0, .si_opaque = all_ones, }; static const stateid_t zero_stateid = { /* all fields zero */ }; static const stateid_t currentstateid = { .si_generation = 1, }; static const stateid_t close_stateid = { .si_generation = 0xffffffffU, }; static u64 current_sessionid = 1; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) #define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t))) #define CLOSE_STATEID(stateid) (!memcmp((stateid), &close_stateid, sizeof(stateid_t))) /* forward declarations */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); static void nfs4_free_ol_stateid(struct nfs4_stid *stid); static void nfsd4_end_grace(struct nfsd_net *nn); static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); static void nfsd4_file_hash_remove(struct nfs4_file *fi); static void deleg_reaper(struct nfsd_net *nn); /* Locking: */ /* * Currently used for the del_recall_lru and file hash table. In an * effort to decrease the scope of the client_mutex, this spinlock may * eventually cover more: */ static DEFINE_SPINLOCK(state_lock); enum nfsd4_st_mutex_lock_subclass { OPEN_STATEID_MUTEX = 0, LOCK_STATEID_MUTEX = 1, }; /* * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for * the refcount on the open stateid to drop. */ static DECLARE_WAIT_QUEUE_HEAD(close_wq); /* * A waitqueue where a writer to clients/#/ctl destroying a client can * wait for cl_rpc_users to drop to 0 and then for the client to be * unhashed. */ static DECLARE_WAIT_QUEUE_HEAD(expiry_wq); static struct kmem_cache *client_slab; static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; static struct kmem_cache *file_slab; static struct kmem_cache *stateid_slab; static struct kmem_cache *deleg_slab; static struct kmem_cache *odstate_slab; static void free_session(struct nfsd4_session *); static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops; static struct workqueue_struct *laundry_wq; int nfsd4_create_laundry_wq(void) { int rc = 0; laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); if (laundry_wq == NULL) rc = -ENOMEM; return rc; } void nfsd4_destroy_laundry_wq(void) { destroy_workqueue(laundry_wq); } static bool is_session_dead(struct nfsd4_session *ses) { return ses->se_dead; } static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) { if (atomic_read(&ses->se_ref) > ref_held_by_me) return nfserr_jukebox; ses->se_dead = true; return nfs_ok; } static bool is_client_expired(struct nfs4_client *clp) { return clp->cl_time == 0; } static void nfsd4_dec_courtesy_client_count(struct nfsd_net *nn, struct nfs4_client *clp) { if (clp->cl_state != NFSD4_ACTIVE) atomic_add_unless(&nn->nfsd_courtesy_clients, -1, 0); } static __be32 get_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); if (is_client_expired(clp)) return nfserr_expired; atomic_inc(&clp->cl_rpc_users); nfsd4_dec_courtesy_client_count(nn, clp); clp->cl_state = NFSD4_ACTIVE; return nfs_ok; } /* must be called under the client_lock */ static inline void renew_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); if (is_client_expired(clp)) { WARN_ON(1); printk("%s: client (clientid %08x/%08x) already expired\n", __func__, clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); return; } list_move_tail(&clp->cl_lru, &nn->client_lru); clp->cl_time = ktime_get_boottime_seconds(); nfsd4_dec_courtesy_client_count(nn, clp); clp->cl_state = NFSD4_ACTIVE; } static void put_client_renew_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); if (!atomic_dec_and_test(&clp->cl_rpc_users)) return; if (!is_client_expired(clp)) renew_client_locked(clp); else wake_up_all(&expiry_wq); } static void put_client_renew(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock)) return; if (!is_client_expired(clp)) renew_client_locked(clp); else wake_up_all(&expiry_wq); spin_unlock(&nn->client_lock); } static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) { __be32 status; if (is_session_dead(ses)) return nfserr_badsession; status = get_client_locked(ses->se_client); if (status) return status; atomic_inc(&ses->se_ref); return nfs_ok; } static void nfsd4_put_session_locked(struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) free_session(ses); put_client_renew_locked(clp); } static void nfsd4_put_session(struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); spin_lock(&nn->client_lock); nfsd4_put_session_locked(ses); spin_unlock(&nn->client_lock); } static struct nfsd4_blocked_lock * find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, struct nfsd_net *nn) { struct nfsd4_blocked_lock *cur, *found = NULL; spin_lock(&nn->blocked_locks_lock); list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { if (fh_match(fh, &cur->nbl_fh)) { list_del_init(&cur->nbl_list); WARN_ON(list_empty(&cur->nbl_lru)); list_del_init(&cur->nbl_lru); found = cur; break; } } spin_unlock(&nn->blocked_locks_lock); if (found) locks_delete_block(&found->nbl_lock); return found; } static struct nfsd4_blocked_lock * find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, struct nfsd_net *nn) { struct nfsd4_blocked_lock *nbl; nbl = find_blocked_lock(lo, fh, nn); if (!nbl) { nbl = kmalloc(sizeof(*nbl), GFP_KERNEL); if (nbl) { INIT_LIST_HEAD(&nbl->nbl_list); INIT_LIST_HEAD(&nbl->nbl_lru); fh_copy_shallow(&nbl->nbl_fh, fh); locks_init_lock(&nbl->nbl_lock); kref_init(&nbl->nbl_kref); nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, &nfsd4_cb_notify_lock_ops, NFSPROC4_CLNT_CB_NOTIFY_LOCK); } } return nbl; } static void free_nbl(struct kref *kref) { struct nfsd4_blocked_lock *nbl; nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref); locks_release_private(&nbl->nbl_lock); kfree(nbl); } static void free_blocked_lock(struct nfsd4_blocked_lock *nbl) { locks_delete_block(&nbl->nbl_lock); kref_put(&nbl->nbl_kref, free_nbl); } static void remove_blocked_locks(struct nfs4_lockowner *lo) { struct nfs4_client *clp = lo->lo_owner.so_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfsd4_blocked_lock *nbl; LIST_HEAD(reaplist); /* Dequeue all blocked locks */ spin_lock(&nn->blocked_locks_lock); while (!list_empty(&lo->lo_blocked)) { nbl = list_first_entry(&lo->lo_blocked, struct nfsd4_blocked_lock, nbl_list); list_del_init(&nbl->nbl_list); WARN_ON(list_empty(&nbl->nbl_lru)); list_move(&nbl->nbl_lru, &reaplist); } spin_unlock(&nn->blocked_locks_lock); /* Now free them */ while (!list_empty(&reaplist)) { nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); free_blocked_lock(nbl); } } static void nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb) { struct nfsd4_blocked_lock *nbl = container_of(cb, struct nfsd4_blocked_lock, nbl_cb); locks_delete_block(&nbl->nbl_lock); } static int nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) { trace_nfsd_cb_notify_lock_done(&zero_stateid, task); /* * Since this is just an optimization, we don't try very hard if it * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and * just quit trying on anything else. */ switch (task->tk_status) { case -NFS4ERR_DELAY: rpc_delay(task, 1 * HZ); return 0; default: return 1; } } static void nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb) { struct nfsd4_blocked_lock *nbl = container_of(cb, struct nfsd4_blocked_lock, nbl_cb); free_blocked_lock(nbl); } static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { .prepare = nfsd4_cb_notify_lock_prepare, .done = nfsd4_cb_notify_lock_done, .release = nfsd4_cb_notify_lock_release, .opcode = OP_CB_NOTIFY_LOCK, }; /* * We store the NONE, READ, WRITE, and BOTH bits separately in the * st_{access,deny}_bmap field of the stateid, in order to track not * only what share bits are currently in force, but also what * combinations of share bits previous opens have used. This allows us * to enforce the recommendation in * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that * the server return an error if the client attempt to downgrade to a * combination of share bits not explicable by closing some of its * previous opens. * * This enforcement is arguably incomplete, since we don't keep * track of access/deny bit combinations; so, e.g., we allow: * * OPEN allow read, deny write * OPEN allow both, deny none * DOWNGRADE allow read, deny none * * which we should reject. * * But you could also argue that our current code is already overkill, * since it only exists to return NFS4ERR_INVAL on incorrect client * behavior. */ static unsigned int bmap_to_share_mode(unsigned long bmap) { int i; unsigned int access = 0; for (i = 1; i < 4; i++) { if (test_bit(i, &bmap)) access |= i; } return access; } /* set share access for a given stateid */ static inline void set_access(u32 access, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << access; WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); stp->st_access_bmap |= mask; } /* clear share access for a given stateid */ static inline void clear_access(u32 access, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << access; WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); stp->st_access_bmap &= ~mask; } /* test whether a given stateid has access */ static inline bool test_access(u32 access, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << access; return (bool)(stp->st_access_bmap & mask); } /* set share deny for a given stateid */ static inline void set_deny(u32 deny, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << deny; WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); stp->st_deny_bmap |= mask; } /* clear share deny for a given stateid */ static inline void clear_deny(u32 deny, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << deny; WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); stp->st_deny_bmap &= ~mask; } /* test whether a given stateid is denying specific access */ static inline bool test_deny(u32 deny, struct nfs4_ol_stateid *stp) { unsigned char mask = 1 << deny; return (bool)(stp->st_deny_bmap & mask); } static int nfs4_access_to_omode(u32 access) { switch (access & NFS4_SHARE_ACCESS_BOTH) { case NFS4_SHARE_ACCESS_READ: return O_RDONLY; case NFS4_SHARE_ACCESS_WRITE: return O_WRONLY; case NFS4_SHARE_ACCESS_BOTH: return O_RDWR; } WARN_ON_ONCE(1); return O_RDONLY; } static inline int access_permit_read(struct nfs4_ol_stateid *stp) { return test_access(NFS4_SHARE_ACCESS_READ, stp) || test_access(NFS4_SHARE_ACCESS_BOTH, stp) || test_access(NFS4_SHARE_ACCESS_WRITE, stp); } static inline int access_permit_write(struct nfs4_ol_stateid *stp) { return test_access(NFS4_SHARE_ACCESS_WRITE, stp) || test_access(NFS4_SHARE_ACCESS_BOTH, stp); } static inline struct nfs4_stateowner * nfs4_get_stateowner(struct nfs4_stateowner *sop) { atomic_inc(&sop->so_count); return sop; } static int same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner) { return (sop->so_owner.len == owner->len) && 0 == memcmp(sop->so_owner.data, owner->data, owner->len); } static struct nfs4_openowner * find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, struct nfs4_client *clp) { struct nfs4_stateowner *so; lockdep_assert_held(&clp->cl_lock); list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[hashval], so_strhash) { if (!so->so_is_open_owner) continue; if (same_owner_str(so, &open->op_owner)) return openowner(nfs4_get_stateowner(so)); } return NULL; } static inline u32 opaque_hashval(const void *ptr, int nbytes) { unsigned char *cptr = (unsigned char *) ptr; u32 x = 0; while (nbytes--) { x *= 37; x += *cptr++; } return x; } void put_nfs4_file(struct nfs4_file *fi) { if (refcount_dec_and_test(&fi->fi_ref)) { nfsd4_file_hash_remove(fi); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); kfree_rcu(fi, fi_rcu); } } static struct nfsd_file * find_writeable_file_locked(struct nfs4_file *f) { struct nfsd_file *ret; lockdep_assert_held(&f->fi_lock); ret = nfsd_file_get(f->fi_fds[O_WRONLY]); if (!ret) ret = nfsd_file_get(f->fi_fds[O_RDWR]); return ret; } static struct nfsd_file * find_writeable_file(struct nfs4_file *f) { struct nfsd_file *ret; spin_lock(&f->fi_lock); ret = find_writeable_file_locked(f); spin_unlock(&f->fi_lock); return ret; } static struct nfsd_file * find_readable_file_locked(struct nfs4_file *f) { struct nfsd_file *ret; lockdep_assert_held(&f->fi_lock); ret = nfsd_file_get(f->fi_fds[O_RDONLY]); if (!ret) ret = nfsd_file_get(f->fi_fds[O_RDWR]); return ret; } static struct nfsd_file * find_readable_file(struct nfs4_file *f) { struct nfsd_file *ret; spin_lock(&f->fi_lock); ret = find_readable_file_locked(f); spin_unlock(&f->fi_lock); return ret; } struct nfsd_file * find_any_file(struct nfs4_file *f) { struct nfsd_file *ret; if (!f) return NULL; spin_lock(&f->fi_lock); ret = nfsd_file_get(f->fi_fds[O_RDWR]); if (!ret) { ret = nfsd_file_get(f->fi_fds[O_WRONLY]); if (!ret) ret = nfsd_file_get(f->fi_fds[O_RDONLY]); } spin_unlock(&f->fi_lock); return ret; } static struct nfsd_file *find_any_file_locked(struct nfs4_file *f) { lockdep_assert_held(&f->fi_lock); if (f->fi_fds[O_RDWR]) return f->fi_fds[O_RDWR]; if (f->fi_fds[O_WRONLY]) return f->fi_fds[O_WRONLY]; if (f->fi_fds[O_RDONLY]) return f->fi_fds[O_RDONLY]; return NULL; } static atomic_long_t num_delegations; unsigned long max_delegations; /* * Open owner state (share locks) */ /* hash tables for lock and open owners */ #define OWNER_HASH_BITS 8 #define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) #define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) { unsigned int ret; ret = opaque_hashval(ownername->data, ownername->len); return ret & OWNER_HASH_MASK; } static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp; static const struct rhashtable_params nfs4_file_rhash_params = { .key_len = sizeof_field(struct nfs4_file, fi_inode), .key_offset = offsetof(struct nfs4_file, fi_inode), .head_offset = offsetof(struct nfs4_file, fi_rlist), /* * Start with a single page hash table to reduce resizing churn * on light workloads. */ .min_size = 256, .automatic_shrinking = true, }; /* * Check if courtesy clients have conflicting access and resolve it if possible * * access: is op_share_access if share_access is true. * Check if access mode, op_share_access, would conflict with * the current deny mode of the file 'fp'. * access: is op_share_deny if share_access is false. * Check if the deny mode, op_share_deny, would conflict with * current access of the file 'fp'. * stp: skip checking this entry. * new_stp: normal open, not open upgrade. * * Function returns: * false - access/deny mode conflict with normal client. * true - no conflict or conflict with courtesy client(s) is resolved. */ static bool nfs4_resolve_deny_conflicts_locked(struct nfs4_file *fp, bool new_stp, struct nfs4_ol_stateid *stp, u32 access, bool share_access) { struct nfs4_ol_stateid *st; bool resolvable = true; unsigned char bmap; struct nfsd_net *nn; struct nfs4_client *clp; lockdep_assert_held(&fp->fi_lock); list_for_each_entry(st, &fp->fi_stateids, st_perfile) { /* ignore lock stateid */ if (st->st_openstp) continue; if (st == stp && new_stp) continue; /* check file access against deny mode or vice versa */ bmap = share_access ? st->st_deny_bmap : st->st_access_bmap; if (!(access & bmap_to_share_mode(bmap))) continue; clp = st->st_stid.sc_client; if (try_to_expire_client(clp)) continue; resolvable = false; break; } if (resolvable) { clp = stp->st_stid.sc_client; nn = net_generic(clp->net, nfsd_net_id); mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); } return resolvable; } static void __nfs4_file_get_access(struct nfs4_file *fp, u32 access) { lockdep_assert_held(&fp->fi_lock); if (access & NFS4_SHARE_ACCESS_WRITE) atomic_inc(&fp->fi_access[O_WRONLY]); if (access & NFS4_SHARE_ACCESS_READ) atomic_inc(&fp->fi_access[O_RDONLY]); } static __be32 nfs4_file_get_access(struct nfs4_file *fp, u32 access) { lockdep_assert_held(&fp->fi_lock); /* Does this access mode make sense? */ if (access & ~NFS4_SHARE_ACCESS_BOTH) return nfserr_inval; /* Does it conflict with a deny mode already set? */ if ((access & fp->fi_share_deny) != 0) return nfserr_share_denied; __nfs4_file_get_access(fp, access); return nfs_ok; } static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny) { /* Common case is that there is no deny mode. */ if (deny) { /* Does this deny mode make sense? */ if (deny & ~NFS4_SHARE_DENY_BOTH) return nfserr_inval; if ((deny & NFS4_SHARE_DENY_READ) && atomic_read(&fp->fi_access[O_RDONLY])) return nfserr_share_denied; if ((deny & NFS4_SHARE_DENY_WRITE) && atomic_read(&fp->fi_access[O_WRONLY])) return nfserr_share_denied; } return nfs_ok; } static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { might_lock(&fp->fi_lock); if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) { struct nfsd_file *f1 = NULL; struct nfsd_file *f2 = NULL; swap(f1, fp->fi_fds[oflag]); if (atomic_read(&fp->fi_access[1 - oflag]) == 0) swap(f2, fp->fi_fds[O_RDWR]); spin_unlock(&fp->fi_lock); if (f1) nfsd_file_put(f1); if (f2) nfsd_file_put(f2); } } static void nfs4_file_put_access(struct nfs4_file *fp, u32 access) { WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH); if (access & NFS4_SHARE_ACCESS_WRITE) __nfs4_file_put_access(fp, O_WRONLY); if (access & NFS4_SHARE_ACCESS_READ) __nfs4_file_put_access(fp, O_RDONLY); } /* * Allocate a new open/delegation state counter. This is needed for * pNFS for proper return on close semantics. * * Note that we only allocate it for pNFS-enabled exports, otherwise * all pointers to struct nfs4_clnt_odstate are always NULL. */ static struct nfs4_clnt_odstate * alloc_clnt_odstate(struct nfs4_client *clp) { struct nfs4_clnt_odstate *co; co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL); if (co) { co->co_client = clp; refcount_set(&co->co_odcount, 1); } return co; } static void hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co) { struct nfs4_file *fp = co->co_file; lockdep_assert_held(&fp->fi_lock); list_add(&co->co_perfile, &fp->fi_clnt_odstate); } static inline void get_clnt_odstate(struct nfs4_clnt_odstate *co) { if (co) refcount_inc(&co->co_odcount); } static void put_clnt_odstate(struct nfs4_clnt_odstate *co) { struct nfs4_file *fp; if (!co) return; fp = co->co_file; if (refcount_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { list_del(&co->co_perfile); spin_unlock(&fp->fi_lock); nfsd4_return_all_file_layouts(co->co_client, fp); kmem_cache_free(odstate_slab, co); } } static struct nfs4_clnt_odstate * find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new) { struct nfs4_clnt_odstate *co; struct nfs4_client *cl; if (!new) return NULL; cl = new->co_client; spin_lock(&fp->fi_lock); list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) { if (co->co_client == cl) { get_clnt_odstate(co); goto out; } } co = new; co->co_file = fp; hash_clnt_odstate_locked(new); out: spin_unlock(&fp->fi_lock); return co; } struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, void (*sc_free)(struct nfs4_stid *)) { struct nfs4_stid *stid; int new_id; stid = kmem_cache_zalloc(slab, GFP_KERNEL); if (!stid) return NULL; idr_preload(GFP_KERNEL); spin_lock(&cl->cl_lock); /* Reserving 0 for start of file in nfsdfs "states" file: */ new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT); spin_unlock(&cl->cl_lock); idr_preload_end(); if (new_id < 0) goto out_free; stid->sc_free = sc_free; stid->sc_client = cl; stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ refcount_set(&stid->sc_count, 1); spin_lock_init(&stid->sc_lock); INIT_LIST_HEAD(&stid->sc_cp_list); return stid; out_free: kmem_cache_free(slab, stid); return NULL; } /* * Create a unique stateid_t to represent each COPY. */ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, unsigned char cs_type) { int new_id; stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; idr_preload(GFP_KERNEL); spin_lock(&nn->s2s_cp_lock); new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); stid->cs_stid.si_opaque.so_id = new_id; stid->cs_stid.si_generation = 1; spin_unlock(&nn->s2s_cp_lock); idr_preload_end(); if (new_id < 0) return 0; stid->cs_type = cs_type; return 1; } int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy) { return nfs4_init_cp_state(nn, &copy->cp_stateid, NFS4_COPY_STID); } struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, struct nfs4_stid *p_stid) { struct nfs4_cpntf_state *cps; cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL); if (!cps) return NULL; cps->cpntf_time = ktime_get_boottime_seconds(); refcount_set(&cps->cp_stateid.cs_count, 1); if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) goto out_free; spin_lock(&nn->s2s_cp_lock); list_add(&cps->cp_list, &p_stid->sc_cp_list); spin_unlock(&nn->s2s_cp_lock); return cps; out_free: kfree(cps); return NULL; } void nfs4_free_copy_state(struct nfsd4_copy *copy) { struct nfsd_net *nn; if (copy->cp_stateid.cs_type != NFS4_COPY_STID) return; nn = net_generic(copy->cp_clp->net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.cs_stid.si_opaque.so_id); spin_unlock(&nn->s2s_cp_lock); } static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid) { struct nfs4_cpntf_state *cps; struct nfsd_net *nn; nn = net_generic(net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); while (!list_empty(&stid->sc_cp_list)) { cps = list_first_entry(&stid->sc_cp_list, struct nfs4_cpntf_state, cp_list); _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); } static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { struct nfs4_stid *stid; stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid); if (!stid) return NULL; return openlockstateid(stid); } /* * As the sc_free callback of deleg, this may be called by nfs4_put_stid * in nfsd_break_one_deleg. * Considering nfsd_break_one_deleg is called with the flc->flc_lock held, * this function mustn't ever sleep. */ static void nfs4_free_deleg(struct nfs4_stid *stid) { struct nfs4_delegation *dp = delegstateid(stid); WARN_ON_ONCE(!list_empty(&stid->sc_cp_list)); WARN_ON_ONCE(!list_empty(&dp->dl_perfile)); WARN_ON_ONCE(!list_empty(&dp->dl_perclnt)); WARN_ON_ONCE(!list_empty(&dp->dl_recall_lru)); kmem_cache_free(deleg_slab, stid); atomic_long_dec(&num_delegations); } /* * When we recall a delegation, we should be careful not to hand it * out again straight away. * To ensure this we keep a pair of bloom filters ('new' and 'old') * in which the filehandles of recalled delegations are "stored". * If a filehandle appear in either filter, a delegation is blocked. * When a delegation is recalled, the filehandle is stored in the "new" * filter. * Every 30 seconds we swap the filters and clear the "new" one, * unless both are empty of course. This results in delegations for a * given filehandle being blocked for between 30 and 60 seconds. * * Each filter is 256 bits. We hash the filehandle to 32bit and use the * low 3 bytes as hash-table indices. * * 'blocked_delegations_lock', which is always taken in block_delegations(), * is used to manage concurrent access. Testing does not need the lock * except when swapping the two filters. */ static DEFINE_SPINLOCK(blocked_delegations_lock); static struct bloom_pair { int entries, old_entries; time64_t swap_time; int new; /* index into 'set' */ DECLARE_BITMAP(set[2], 256); } blocked_delegations; static int delegation_blocked(struct knfsd_fh *fh) { u32 hash; struct bloom_pair *bd = &blocked_delegations; if (bd->entries == 0) return 0; if (ktime_get_seconds() - bd->swap_time > 30) { spin_lock(&blocked_delegations_lock); if (ktime_get_seconds() - bd->swap_time > 30) { bd->entries -= bd->old_entries; bd->old_entries = bd->entries; bd->new = 1-bd->new; memset(bd->set[bd->new], 0, sizeof(bd->set[0])); bd->swap_time = ktime_get_seconds(); } spin_unlock(&blocked_delegations_lock); } hash = jhash(&fh->fh_raw, fh->fh_size, 0); if (test_bit(hash&255, bd->set[0]) && test_bit((hash>>8)&255, bd->set[0]) && test_bit((hash>>16)&255, bd->set[0])) return 1; if (test_bit(hash&255, bd->set[1]) && test_bit((hash>>8)&255, bd->set[1]) && test_bit((hash>>16)&255, bd->set[1])) return 1; return 0; } static void block_delegations(struct knfsd_fh *fh) { u32 hash; struct bloom_pair *bd = &blocked_delegations; hash = jhash(&fh->fh_raw, fh->fh_size, 0); spin_lock(&blocked_delegations_lock); __set_bit(hash&255, bd->set[bd->new]); __set_bit((hash>>8)&255, bd->set[bd->new]); __set_bit((hash>>16)&255, bd->set[bd->new]); if (bd->entries == 0) bd->swap_time = ktime_get_seconds(); bd->entries += 1; spin_unlock(&blocked_delegations_lock); } static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate, u32 dl_type) { struct nfs4_delegation *dp; struct nfs4_stid *stid; long n; dprintk("NFSD alloc_init_deleg\n"); n = atomic_long_inc_return(&num_delegations); if (n < 0 || n > max_delegations) goto out_dec; if (delegation_blocked(&fp->fi_fhandle)) goto out_dec; stid = nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg); if (stid == NULL) goto out_dec; dp = delegstateid(stid); /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid * 0 anyway just for consistency and use 1: */ dp->dl_stid.sc_stateid.si_generation = 1; INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); dp->dl_clnt_odstate = odstate; get_clnt_odstate(odstate); dp->dl_type = dl_type; dp->dl_retries = 1; dp->dl_recalled = false; nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client, &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR); dp->dl_cb_fattr.ncf_file_modified = false; get_nfs4_file(fp); dp->dl_stid.sc_file = fp; return dp; out_dec: atomic_long_dec(&num_delegations); return NULL; } void nfs4_put_stid(struct nfs4_stid *s) { struct nfs4_file *fp = s->sc_file; struct nfs4_client *clp = s->sc_client; might_lock(&clp->cl_lock); if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) { wake_up_all(&close_wq); return; } idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); if (s->sc_status & SC_STATUS_ADMIN_REVOKED) atomic_dec(&s->sc_client->cl_admin_revoked); nfs4_free_cpntf_statelist(clp->net, s); spin_unlock(&clp->cl_lock); s->sc_free(s); if (fp) put_nfs4_file(fp); } void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid) { stateid_t *src = &stid->sc_stateid; spin_lock(&stid->sc_lock); if (unlikely(++src->si_generation == 0)) src->si_generation = 1; memcpy(dst, src, sizeof(*dst)); spin_unlock(&stid->sc_lock); } static void put_deleg_file(struct nfs4_file *fp) { struct nfsd_file *rnf = NULL; struct nfsd_file *nf = NULL; spin_lock(&fp->fi_lock); if (--fp->fi_delegees == 0) { swap(nf, fp->fi_deleg_file); swap(rnf, fp->fi_rdeleg_file); } spin_unlock(&fp->fi_lock); if (nf) nfsd_file_put(nf); if (rnf) { nfsd_file_put(rnf); nfs4_file_put_access(fp, NFS4_SHARE_ACCESS_READ); } } static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct file *f) { struct iattr ia = { .ia_valid = ATTR_ATIME | ATTR_CTIME | ATTR_MTIME | ATTR_DELEG }; struct inode *inode = file_inode(f); int ret; /* don't do anything if FMODE_NOCMTIME isn't set */ if ((READ_ONCE(f->f_mode) & FMODE_NOCMTIME) == 0) return; spin_lock(&f->f_lock); f->f_mode &= ~FMODE_NOCMTIME; spin_unlock(&f->f_lock); /* was it never written? */ if (!dp->dl_written) return; /* did it get a setattr for the timestamps at some point? */ if (dp->dl_setattr) return; /* Stamp everything to "now" */ inode_lock(inode); ret = notify_change(&nop_mnt_idmap, f->f_path.dentry, &ia, NULL); inode_unlock(inode); if (ret) { struct inode *inode = file_inode(f); pr_notice_ratelimited("nfsd: Unable to update timestamps on inode %02x:%02x:%lu: %d\n", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino, ret); } } static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; struct nfsd_file *nf = fp->fi_deleg_file; WARN_ON_ONCE(!fp->fi_delegees); nfsd4_finalize_deleg_timestamps(dp, nf->nf_file); kernel_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp); put_deleg_file(fp); } static void destroy_unhashed_deleg(struct nfs4_delegation *dp) { put_clnt_odstate(dp->dl_clnt_odstate); nfs4_unlock_deleg_lease(dp); nfs4_put_stid(&dp->dl_stid); } /** * nfs4_delegation_exists - Discover if this delegation already exists * @clp: a pointer to the nfs4_client we're granting a delegation to * @fp: a pointer to the nfs4_file we're granting a delegation on * * Return: * On success: true iff an existing delegation is found */ static bool nfs4_delegation_exists(struct nfs4_client *clp, struct nfs4_file *fp) { struct nfs4_delegation *searchdp = NULL; struct nfs4_client *searchclp = NULL; lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { searchclp = searchdp->dl_stid.sc_client; if (clp == searchclp) { return true; } } return false; } /** * hash_delegation_locked - Add a delegation to the appropriate lists * @dp: a pointer to the nfs4_delegation we are adding. * @fp: a pointer to the nfs4_file we're granting a delegation on * * Return: * On success: NULL if the delegation was successfully hashed. * * On error: -EAGAIN if one was previously granted to this * nfs4_client for this nfs4_file. Delegation is not hashed. * */ static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { struct nfs4_client *clp = dp->dl_stid.sc_client; lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); lockdep_assert_held(&clp->cl_lock); if (nfs4_delegation_exists(clp, fp)) return -EAGAIN; refcount_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = SC_TYPE_DELEG; list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &clp->cl_delegations); return 0; } static bool delegation_hashed(struct nfs4_delegation *dp) { return !(list_empty(&dp->dl_perfile)); } static bool unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short statusmask) { struct nfs4_file *fp = dp->dl_stid.sc_file; lockdep_assert_held(&state_lock); if (!delegation_hashed(dp)) return false; if (statusmask == SC_STATUS_REVOKED && dp->dl_stid.sc_client->cl_minorversion == 0) statusmask = SC_STATUS_CLOSED; dp->dl_stid.sc_status |= statusmask; if (statusmask & SC_STATUS_ADMIN_REVOKED) atomic_inc(&dp->dl_stid.sc_client->cl_admin_revoked); /* Ensure that deleg break won't try to requeue it */ ++dp->dl_time; spin_lock(&fp->fi_lock); list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_perfile); spin_unlock(&fp->fi_lock); return true; } static void destroy_delegation(struct nfs4_delegation *dp) { bool unhashed; spin_lock(&state_lock); unhashed = unhash_delegation_locked(dp, SC_STATUS_CLOSED); spin_unlock(&state_lock); if (unhashed) destroy_unhashed_deleg(dp); } /** * revoke_delegation - perform nfs4 delegation structure cleanup * @dp: pointer to the delegation * * This function assumes that it's called either from the administrative * interface (nfsd4_revoke_states()) that's revoking a specific delegation * stateid or it's called from a laundromat thread (nfsd4_landromat()) that * determined that this specific state has expired and needs to be revoked * (both mark state with the appropriate stid sc_status mode). It is also * assumed that a reference was taken on the @dp state. * * If this function finds that the @dp state is SC_STATUS_FREED it means * that a FREE_STATEID operation for this stateid has been processed and * we can proceed to removing it from recalled list. However, if @dp state * isn't marked SC_STATUS_FREED, it means we need place it on the cl_revoked * list and wait for the FREE_STATEID to arrive from the client. At the same * time, we need to mark it as SC_STATUS_FREEABLE to indicate to the * nfsd4_free_stateid() function that this stateid has already been added * to the cl_revoked list and that nfsd4_free_stateid() is now responsible * for removing it from the list. Inspection of where the delegation state * in the revocation process is protected by the clp->cl_lock. */ static void revoke_delegation(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_stid.sc_client; WARN_ON(!list_empty(&dp->dl_recall_lru)); WARN_ON_ONCE(dp->dl_stid.sc_client->cl_minorversion > 0 && !(dp->dl_stid.sc_status & (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED))); trace_nfsd_stid_revoke(&dp->dl_stid); spin_lock(&clp->cl_lock); if (dp->dl_stid.sc_status & SC_STATUS_FREED) { list_del_init(&dp->dl_recall_lru); goto out; } list_add(&dp->dl_recall_lru, &clp->cl_revoked); dp->dl_stid.sc_status |= SC_STATUS_FREEABLE; out: spin_unlock(&clp->cl_lock); destroy_unhashed_deleg(dp); } /* * SETCLIENTID state */ static unsigned int clientid_hashval(u32 id) { return id & CLIENT_HASH_MASK; } static unsigned int clientstr_hashval(struct xdr_netobj name) { return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK; } /* * A stateid that had a deny mode associated with it is being released * or downgraded. Recalculate the deny mode on the file. */ static void recalculate_deny_mode(struct nfs4_file *fp) { struct nfs4_ol_stateid *stp; u32 old_deny; spin_lock(&fp->fi_lock); old_deny = fp->fi_share_deny; fp->fi_share_deny = 0; list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap); if (fp->fi_share_deny == old_deny) break; } spin_unlock(&fp->fi_lock); } static void reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp) { int i; bool change = false; for (i = 1; i < 4; i++) { if ((i & deny) != i) { change = true; clear_deny(i, stp); } } /* Recalculate per-file deny mode if there was a change */ if (change) recalculate_deny_mode(stp->st_stid.sc_file); } /* release all access and file references for a given stateid */ static void release_all_access(struct nfs4_ol_stateid *stp) { int i; struct nfs4_file *fp = stp->st_stid.sc_file; if (fp && stp->st_deny_bmap != 0) recalculate_deny_mode(fp); for (i = 1; i < 4; i++) { if (test_access(i, stp)) nfs4_file_put_access(stp->st_stid.sc_file, i); clear_access(i, stp); } } static inline void nfs4_free_stateowner(struct nfs4_stateowner *sop) { kfree(sop->so_owner.data); sop->so_ops->so_free(sop); } static void nfs4_put_stateowner(struct nfs4_stateowner *sop) { struct nfs4_client *clp = sop->so_client; might_lock(&clp->cl_lock); if (!atomic_dec_and_lock(&sop->so_count, &clp->cl_lock)) return; sop->so_ops->so_unhash(sop); spin_unlock(&clp->cl_lock); nfs4_free_stateowner(sop); } static bool nfs4_ol_stateid_unhashed(const struct nfs4_ol_stateid *stp) { return list_empty(&stp->st_perfile); } static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_file *fp = stp->st_stid.sc_file; lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock); if (list_empty(&stp->st_perfile)) return false; spin_lock(&fp->fi_lock); list_del_init(&stp->st_perfile); spin_unlock(&fp->fi_lock); list_del(&stp->st_perstateowner); return true; } static void nfs4_free_ol_stateid(struct nfs4_stid *stid) { struct nfs4_ol_stateid *stp = openlockstateid(stid); put_clnt_odstate(stp->st_clnt_odstate); release_all_access(stp); if (stp->st_stateowner) nfs4_put_stateowner(stp->st_stateowner); if (!list_empty(&stid->sc_cp_list)) nfs4_free_cpntf_statelist(stid->sc_client->net, stid); kmem_cache_free(stateid_slab, stid); } static void nfs4_free_lock_stateid(struct nfs4_stid *stid) { struct nfs4_ol_stateid *stp = openlockstateid(stid); struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); struct nfsd_file *nf; nf = find_any_file(stp->st_stid.sc_file); if (nf) { get_file(nf->nf_file); filp_close(nf->nf_file, (fl_owner_t)lo); nfsd_file_put(nf); } nfs4_free_ol_stateid(stid); } /* * Put the persistent reference to an already unhashed generic stateid, while * holding the cl_lock. If it's the last reference, then put it onto the * reaplist for later destruction. */ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, struct list_head *reaplist) { struct nfs4_stid *s = &stp->st_stid; struct nfs4_client *clp = s->sc_client; lockdep_assert_held(&clp->cl_lock); WARN_ON_ONCE(!list_empty(&stp->st_locks)); if (!refcount_dec_and_test(&s->sc_count)) { wake_up_all(&close_wq); return; } idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); if (s->sc_status & SC_STATUS_ADMIN_REVOKED) atomic_dec(&s->sc_client->cl_admin_revoked); list_add(&stp->st_locks, reaplist); } static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) { lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); if (!unhash_ol_stateid(stp)) return false; list_del_init(&stp->st_locks); stp->st_stid.sc_status |= SC_STATUS_CLOSED; return true; } static void release_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_client *clp = stp->st_stid.sc_client; bool unhashed; spin_lock(&clp->cl_lock); unhashed = unhash_lock_stateid(stp); spin_unlock(&clp->cl_lock); if (unhashed) nfs4_put_stid(&stp->st_stid); } static void unhash_lockowner_locked(struct nfs4_lockowner *lo) { struct nfs4_client *clp = lo->lo_owner.so_client; lockdep_assert_held(&clp->cl_lock); list_del_init(&lo->lo_owner.so_strhash); } /* * Free a list of generic stateids that were collected earlier after being * fully unhashed. */ static void free_ol_stateid_reaplist(struct list_head *reaplist) { struct nfs4_ol_stateid *stp; struct nfs4_file *fp; might_sleep(); while (!list_empty(reaplist)) { stp = list_first_entry(reaplist, struct nfs4_ol_stateid, st_locks); list_del(&stp->st_locks); fp = stp->st_stid.sc_file; stp->st_stid.sc_free(&stp->st_stid); if (fp) put_nfs4_file(fp); } } static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, struct list_head *reaplist) { struct nfs4_ol_stateid *stp; lockdep_assert_held(&open_stp->st_stid.sc_client->cl_lock); while (!list_empty(&open_stp->st_locks)) { stp = list_entry(open_stp->st_locks.next, struct nfs4_ol_stateid, st_locks); unhash_lock_stateid(stp); put_ol_stateid_locked(stp, reaplist); } } static bool unhash_open_stateid(struct nfs4_ol_stateid *stp, struct list_head *reaplist) { lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); if (!unhash_ol_stateid(stp)) return false; release_open_stateid_locks(stp, reaplist); return true; } static void release_open_stateid(struct nfs4_ol_stateid *stp) { LIST_HEAD(reaplist); spin_lock(&stp->st_stid.sc_client->cl_lock); stp->st_stid.sc_status |= SC_STATUS_CLOSED; if (unhash_open_stateid(stp, &reaplist)) put_ol_stateid_locked(stp, &reaplist); spin_unlock(&stp->st_stid.sc_client->cl_lock); free_ol_stateid_reaplist(&reaplist); } static bool nfs4_openowner_unhashed(struct nfs4_openowner *oo) { lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); return list_empty(&oo->oo_owner.so_strhash) && list_empty(&oo->oo_perclient); } static void unhash_openowner_locked(struct nfs4_openowner *oo) { struct nfs4_client *clp = oo->oo_owner.so_client; lockdep_assert_held(&clp->cl_lock); list_del_init(&oo->oo_owner.so_strhash); list_del_init(&oo->oo_perclient); } static void release_last_closed_stateid(struct nfs4_openowner *oo) { struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net, nfsd_net_id); struct nfs4_ol_stateid *s; spin_lock(&nn->client_lock); s = oo->oo_last_closed_stid; if (s) { list_del_init(&oo->oo_close_lru); oo->oo_last_closed_stid = NULL; } spin_unlock(&nn->client_lock); if (s) nfs4_put_stid(&s->st_stid); } static void release_openowner(struct nfs4_openowner *oo) { struct nfs4_ol_stateid *stp; struct nfs4_client *clp = oo->oo_owner.so_client; LIST_HEAD(reaplist); spin_lock(&clp->cl_lock); unhash_openowner_locked(oo); while (!list_empty(&oo->oo_owner.so_stateids)) { stp = list_first_entry(&oo->oo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); if (unhash_open_stateid(stp, &reaplist)) put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); release_last_closed_stateid(oo); nfs4_put_stateowner(&oo->oo_owner); } static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp, struct super_block *sb, unsigned int sc_types) { unsigned long id, tmp; struct nfs4_stid *stid; spin_lock(&clp->cl_lock); idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id) if ((stid->sc_type & sc_types) && stid->sc_status == 0 && stid->sc_file->fi_inode->i_sb == sb) { refcount_inc(&stid->sc_count); break; } spin_unlock(&clp->cl_lock); return stid; } /** * nfsd4_revoke_states - revoke all nfsv4 states associated with given filesystem * @nn: used to identify instance of nfsd (there is one per net namespace) * @sb: super_block used to identify target filesystem * * All nfs4 states (open, lock, delegation, layout) held by the server instance * and associated with a file on the given filesystem will be revoked resulting * in any files being closed and so all references from nfsd to the filesystem * being released. Thus nfsd will no longer prevent the filesystem from being * unmounted. * * The clients which own the states will subsequently being notified that the * states have been "admin-revoked". */ void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb) { unsigned int idhashval; unsigned int sc_types; sc_types = SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG | SC_TYPE_LAYOUT; spin_lock(&nn->client_lock); for (idhashval = 0; idhashval < CLIENT_HASH_SIZE; idhashval++) { struct list_head *head = &nn->conf_id_hashtbl[idhashval]; struct nfs4_client *clp; retry: list_for_each_entry(clp, head, cl_idhash) { struct nfs4_stid *stid = find_one_sb_stid(clp, sb, sc_types); if (stid) { struct nfs4_ol_stateid *stp; struct nfs4_delegation *dp; struct nfs4_layout_stateid *ls; spin_unlock(&nn->client_lock); switch (stid->sc_type) { case SC_TYPE_OPEN: stp = openlockstateid(stid); mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); spin_lock(&clp->cl_lock); if (stid->sc_status == 0) { stid->sc_status |= SC_STATUS_ADMIN_REVOKED; atomic_inc(&clp->cl_admin_revoked); spin_unlock(&clp->cl_lock); release_all_access(stp); } else spin_unlock(&clp->cl_lock); mutex_unlock(&stp->st_mutex); break; case SC_TYPE_LOCK: stp = openlockstateid(stid); mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX); spin_lock(&clp->cl_lock); if (stid->sc_status == 0) { struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); struct nfsd_file *nf; stid->sc_status |= SC_STATUS_ADMIN_REVOKED; atomic_inc(&clp->cl_admin_revoked); spin_unlock(&clp->cl_lock); nf = find_any_file(stp->st_stid.sc_file); if (nf) { get_file(nf->nf_file); filp_close(nf->nf_file, (fl_owner_t)lo); nfsd_file_put(nf); } release_all_access(stp); } else spin_unlock(&clp->cl_lock); mutex_unlock(&stp->st_mutex); break; case SC_TYPE_DELEG: refcount_inc(&stid->sc_count); dp = delegstateid(stid); spin_lock(&state_lock); if (!unhash_delegation_locked( dp, SC_STATUS_ADMIN_REVOKED)) dp = NULL; spin_unlock(&state_lock); if (dp) revoke_delegation(dp); break; case SC_TYPE_LAYOUT: ls = layoutstateid(stid); nfsd4_close_layout(ls); break; } nfs4_put_stid(stid); spin_lock(&nn->client_lock); if (clp->cl_minorversion == 0) /* Allow cleanup after a lease period. * store_release ensures cleanup will * see any newly revoked states if it * sees the time updated. */ nn->nfs40_last_revoke = ktime_get_boottime_seconds(); goto retry; } } } spin_unlock(&nn->client_lock); } static inline int hash_sessionid(struct nfs4_sessionid *sessionid) { struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid; return sid->sequence % SESSION_HASH_SIZE; } #ifdef CONFIG_SUNRPC_DEBUG static inline void dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) { u32 *ptr = (u32 *)(&sessionid->data[0]); dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]); } #else static inline void dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) { } #endif /* * Bump the seqid on cstate->replay_owner, and clear replay_owner if it * won't be used for replay. */ void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr) { struct nfs4_stateowner *so = cstate->replay_owner; if (nfserr == nfserr_replay_me) return; if (!seqid_mutating_err(ntohl(nfserr))) { nfsd4_cstate_clear_replay(cstate); return; } if (!so) return; if (so->so_is_open_owner) release_last_closed_stateid(openowner(so)); so->so_seqid++; return; } static void gen_sessionid(struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd4_sessionid *sid; sid = (struct nfsd4_sessionid *)ses->se_sessionid.data; sid->clientid = clp->cl_clientid; sid->sequence = current_sessionid++; sid->reserved = 0; } /* * The protocol defines ca_maxresponssize_cached to include the size of * the rpc header, but all we need to cache is the data starting after * the end of the initial SEQUENCE operation--the rest we regenerate * each time. Therefore we can advertise a ca_maxresponssize_cached * value that is the number of bytes in our cache plus a few additional * bytes. In order to stay on the safe side, and not promise more than * we can cache, those additional bytes must be the minimum possible: 24 * bytes of rpc header (xid through accept state, with AUTH_NULL * verifier), 12 for the compound header (with zero-length tag), and 44 * for the SEQUENCE op response: */ #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) static struct shrinker *nfsd_slot_shrinker; static DEFINE_SPINLOCK(nfsd_session_list_lock); static LIST_HEAD(nfsd_session_list); /* The sum of "target_slots-1" on every session. The shrinker can push this * down, though it can take a little while for the memory to actually * be freed. The "-1" is because we can never free slot 0 while the * session is active. */ static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0); static void free_session_slots(struct nfsd4_session *ses, int from) { int i; if (from >= ses->se_fchannel.maxreqs) return; for (i = from; i < ses->se_fchannel.maxreqs; i++) { struct nfsd4_slot *slot = xa_load(&ses->se_slots, i); /* * Save the seqid in case we reactivate this slot. * This will never require a memory allocation so GFP * flag is irrelevant */ xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0); free_svc_cred(&slot->sl_cred); kfree(slot); } ses->se_fchannel.maxreqs = from; if (ses->se_target_maxslots > from) { int new_target = from ?: 1; atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots); ses->se_target_maxslots = new_target; } } /** * reduce_session_slots - reduce the target max-slots of a session if possible * @ses: The session to affect * @dec: how much to decrease the target by * * This interface can be used by a shrinker to reduce the target max-slots * for a session so that some slots can eventually be freed. * It uses spin_trylock() as it may be called in a context where another * spinlock is held that has a dependency on client_lock. As shrinkers are * best-effort, skiping a session is client_lock is already held has no * great coast * * Return value: * The number of slots that the target was reduced by. */ static int reduce_session_slots(struct nfsd4_session *ses, int dec) { struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); int ret = 0; if (ses->se_target_maxslots <= 1) return ret; if (!spin_trylock(&nn->client_lock)) return ret; ret = min(dec, ses->se_target_maxslots-1); ses->se_target_maxslots -= ret; atomic_sub(ret, &nfsd_total_target_slots); ses->se_slot_gen += 1; if (ses->se_slot_gen == 0) { int i; ses->se_slot_gen = 1; for (i = 0; i < ses->se_fchannel.maxreqs; i++) { struct nfsd4_slot *slot = xa_load(&ses->se_slots, i); slot->sl_generation = 0; } } spin_unlock(&nn->client_lock); return ret; } static struct nfsd4_slot *nfsd4_alloc_slot(struct nfsd4_channel_attrs *fattrs, int index, gfp_t gfp) { struct nfsd4_slot *slot; size_t size; /* * The RPC and NFS session headers are never saved in * the slot reply cache buffer. */ size = fattrs->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ ? 0 : fattrs->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; slot = kzalloc(struct_size(slot, sl_data, size), gfp); if (!slot) return NULL; slot->sl_index = index; return slot; } static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, struct nfsd4_channel_attrs *battrs) { int numslots = fattrs->maxreqs; struct nfsd4_session *new; struct nfsd4_slot *slot; int i; new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return NULL; xa_init(&new->se_slots); slot = nfsd4_alloc_slot(fattrs, 0, GFP_KERNEL); if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL))) goto out_free; for (i = 1; i < numslots; i++) { const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; slot = nfsd4_alloc_slot(fattrs, i, gfp); if (!slot) break; if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) { kfree(slot); break; } } fattrs->maxreqs = i; memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); new->se_target_maxslots = i; atomic_add(i - 1, &nfsd_total_target_slots); new->se_cb_slot_avail = ~0U; new->se_cb_highest_slot = min(battrs->maxreqs - 1, NFSD_BC_SLOT_TABLE_SIZE - 1); spin_lock_init(&new->se_lock); return new; out_free: kfree(slot); xa_destroy(&new->se_slots); kfree(new); return NULL; } static void free_conn(struct nfsd4_conn *c) { svc_xprt_put(c->cn_xprt); kfree(c); } static void nfsd4_conn_lost(struct svc_xpt_user *u) { struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user); struct nfs4_client *clp = c->cn_session->se_client; trace_nfsd_cb_lost(clp); spin_lock(&clp->cl_lock); if (!list_empty(&c->cn_persession)) { list_del(&c->cn_persession); free_conn(c); } nfsd4_probe_callback(clp); spin_unlock(&clp->cl_lock); } static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) { struct nfsd4_conn *conn; conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL); if (!conn) return NULL; svc_xprt_get(rqstp->rq_xprt); conn->cn_xprt = rqstp->rq_xprt; conn->cn_flags = flags; INIT_LIST_HEAD(&conn->cn_xpt_user.list); return conn; } static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) { conn->cn_session = ses; list_add(&conn->cn_persession, &ses->se_conns); } static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; spin_lock(&clp->cl_lock); __nfsd4_hash_conn(conn, ses); spin_unlock(&clp->cl_lock); } static int nfsd4_register_conn(struct nfsd4_conn *conn) { conn->cn_xpt_user.callback = nfsd4_conn_lost; return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); } static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, struct nfsd4_session *ses) { int ret; nfsd4_hash_conn(conn, ses); ret = nfsd4_register_conn(conn); if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&conn->cn_xpt_user); /* We may have gained or lost a callback channel: */ nfsd4_probe_callback_sync(ses->se_client); } static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses) { u32 dir = NFS4_CDFC4_FORE; if (cses->flags & SESSION4_BACK_CHAN) dir |= NFS4_CDFC4_BACK; return alloc_conn(rqstp, dir); } /* must be called under client_lock */ static void nfsd4_del_conns(struct nfsd4_session *s) { struct nfs4_client *clp = s->se_client; struct nfsd4_conn *c; spin_lock(&clp->cl_lock); while (!list_empty(&s->se_conns)) { c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession); list_del_init(&c->cn_persession); spin_unlock(&clp->cl_lock); unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user); free_conn(c); spin_lock(&clp->cl_lock); } spin_unlock(&clp->cl_lock); } static void __free_session(struct nfsd4_session *ses) { free_session_slots(ses, 0); xa_destroy(&ses->se_slots); kfree(ses); } static void free_session(struct nfsd4_session *ses) { nfsd4_del_conns(ses); __free_session(ses); } static unsigned long nfsd_slot_count(struct shrinker *s, struct shrink_control *sc) { unsigned long cnt = atomic_read(&nfsd_total_target_slots); return cnt ? cnt : SHRINK_EMPTY; } static unsigned long nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc) { struct nfsd4_session *ses; unsigned long scanned = 0; unsigned long freed = 0; spin_lock(&nfsd_session_list_lock); list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) { freed += reduce_session_slots(ses, 1); scanned += 1; if (scanned >= sc->nr_to_scan) { /* Move starting point for next scan */ list_move(&nfsd_session_list, &ses->se_all_sessions); break; } } spin_unlock(&nfsd_session_list_lock); sc->nr_scanned = scanned; return freed; } static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) { int idx; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); new->se_client = clp; gen_sessionid(new); INIT_LIST_HEAD(&new->se_conns); atomic_set(&new->se_ref, 0); new->se_dead = false; new->se_cb_prog = cses->callback_prog; new->se_cb_sec = cses->cb_sec; for (idx = 0; idx < NFSD_BC_SLOT_TABLE_SIZE; ++idx) new->se_cb_seq_nr[idx] = 1; idx = hash_sessionid(&new->se_sessionid); list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); spin_lock(&clp->cl_lock); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); spin_lock(&nfsd_session_list_lock); list_add_tail(&new->se_all_sessions, &nfsd_session_list); spin_unlock(&nfsd_session_list_lock); { struct sockaddr *sa = svc_addr(rqstp); /* * This is a little silly; with sessions there's no real * use for the callback address. Use the peer address * as a reasonable default for now, but consider fixing * the rpc client not to require an address in the * future: */ rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); } } /* caller must hold client_lock */ static struct nfsd4_session * __find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) { struct nfsd4_session *elem; int idx; struct nfsd_net *nn = net_generic(net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); dump_sessionid(__func__, sessionid); idx = hash_sessionid(sessionid); /* Search in the appropriate list */ list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) { if (!memcmp(elem->se_sessionid.data, sessionid->data, NFS4_MAX_SESSIONID_LEN)) { return elem; } } dprintk("%s: session not found\n", __func__); return NULL; } static struct nfsd4_session * find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net, __be32 *ret) { struct nfsd4_session *session; __be32 status = nfserr_badsession; session = __find_in_sessionid_hashtbl(sessionid, net); if (!session) goto out; status = nfsd4_get_session_locked(session); if (status) session = NULL; out: *ret = status; return session; } /* caller must hold client_lock */ static void unhash_session(struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); list_del(&ses->se_hash); spin_lock(&ses->se_client->cl_lock); list_del(&ses->se_perclnt); spin_unlock(&ses->se_client->cl_lock); spin_lock(&nfsd_session_list_lock); list_del(&ses->se_all_sessions); spin_unlock(&nfsd_session_list_lock); } /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ static int STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) { /* * We're assuming the clid was not given out from a boot * precisely 2^32 (about 136 years) before this one. That seems * a safe assumption: */ if (clid->cl_boot == (u32)nn->boot_time) return 0; trace_nfsd_clid_stale(clid); return 1; } static struct nfs4_client *alloc_client(struct xdr_netobj name, struct nfsd_net *nn) { struct nfs4_client *clp; int i; if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients && atomic_read(&nn->nfsd_courtesy_clients) > 0) mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL); if (clp->cl_name.data == NULL) goto err_no_name; clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); if (!clp->cl_ownerstr_hashtbl) goto err_no_hashtbl; clp->cl_callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0); if (!clp->cl_callback_wq) goto err_no_callback_wq; for (i = 0; i < OWNER_HASH_SIZE; i++) INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]); INIT_LIST_HEAD(&clp->cl_sessions); idr_init(&clp->cl_stateids); atomic_set(&clp->cl_rpc_users, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; clp->cl_state = NFSD4_ACTIVE; atomic_inc(&nn->nfs4_client_count); atomic_set(&clp->cl_delegs_in_recall, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_lru); INIT_LIST_HEAD(&clp->cl_revoked); #ifdef CONFIG_NFSD_PNFS INIT_LIST_HEAD(&clp->cl_lo_states); #endif INIT_LIST_HEAD(&clp->async_copies); spin_lock_init(&clp->async_lock); spin_lock_init(&clp->cl_lock); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; err_no_callback_wq: kfree(clp->cl_ownerstr_hashtbl); err_no_hashtbl: kfree(clp->cl_name.data); err_no_name: kmem_cache_free(client_slab, clp); return NULL; } static void __free_client(struct kref *k) { struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref); struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs); free_svc_cred(&clp->cl_cred); destroy_workqueue(clp->cl_callback_wq); kfree(clp->cl_ownerstr_hashtbl); kfree(clp->cl_name.data); kfree(clp->cl_nii_domain.data); kfree(clp->cl_nii_name.data); idr_destroy(&clp->cl_stateids); kfree(clp->cl_ra); kmem_cache_free(client_slab, clp); } static void drop_client(struct nfs4_client *clp) { kref_put(&clp->cl_nfsdfs.cl_ref, __free_client); } static void free_client(struct nfs4_client *clp) { while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, se_perclnt); list_del(&ses->se_perclnt); WARN_ON_ONCE(atomic_read(&ses->se_ref)); free_session(ses); } rpc_destroy_wait_queue(&clp->cl_cb_waitq); if (clp->cl_nfsd_dentry) { nfsd_client_rmdir(clp->cl_nfsd_dentry); clp->cl_nfsd_dentry = NULL; wake_up_all(&expiry_wq); } drop_client(clp); } /* must be called under the client_lock */ static void unhash_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfsd4_session *ses; lockdep_assert_held(&nn->client_lock); /* Mark the client as expired! */ clp->cl_time = 0; /* Make it invisible */ if (!list_empty(&clp->cl_idhash)) { list_del_init(&clp->cl_idhash); if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) rb_erase(&clp->cl_namenode, &nn->conf_name_tree); else rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); } list_del_init(&clp->cl_lru); spin_lock(&clp->cl_lock); spin_lock(&nfsd_session_list_lock); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) { list_del_init(&ses->se_hash); list_del_init(&ses->se_all_sessions); } spin_unlock(&nfsd_session_list_lock); spin_unlock(&clp->cl_lock); } static void unhash_client(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); spin_lock(&nn->client_lock); unhash_client_locked(clp); spin_unlock(&nn->client_lock); } static __be32 mark_client_expired_locked(struct nfs4_client *clp) { int users = atomic_read(&clp->cl_rpc_users); trace_nfsd_mark_client_expired(clp, users); if (users) return nfserr_jukebox; unhash_client_locked(clp); return nfs_ok; } static void __destroy_client(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; LIST_HEAD(reaplist); spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); unhash_delegation_locked(dp, SC_STATUS_CLOSED); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); destroy_unhashed_deleg(dp); } while (!list_empty(&clp->cl_revoked)) { dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); nfs4_put_stid(&dp->dl_stid); } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); nfs4_get_stateowner(&oo->oo_owner); release_openowner(oo); } for (i = 0; i < OWNER_HASH_SIZE; i++) { struct nfs4_stateowner *so, *tmp; list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i], so_strhash) { /* Should be no openowners at this point */ WARN_ON_ONCE(so->so_is_open_owner); remove_blocked_locks(lockowner(so)); } } nfsd4_return_all_client_layouts(clp); nfsd4_shutdown_copy(clp); nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); atomic_add_unless(&nn->nfs4_client_count, -1, 0); nfsd4_dec_courtesy_client_count(nn, clp); free_client(clp); wake_up_all(&expiry_wq); } static void destroy_client(struct nfs4_client *clp) { unhash_client(clp); __destroy_client(clp); } static void inc_reclaim_complete(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); if (!nn->track_reclaim_completes) return; if (!nfsd4_find_reclaim_client(clp->cl_name, nn)) return; if (atomic_inc_return(&nn->nr_reclaim_complete) == nn->reclaim_str_hashtbl_size) { printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n", clp->net->ns.inum); nfsd4_end_grace(nn); } } static void expire_client(struct nfs4_client *clp) { unhash_client(clp); nfsd4_client_record_remove(clp); __destroy_client(clp); } static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) { memcpy(target->cl_verifier.data, source->data, sizeof(target->cl_verifier.data)); } static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) { target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; target->cl_clientid.cl_id = source->cl_clientid.cl_id; } static int copy_cred(struct svc_cred *target, struct svc_cred *source) { target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL); target->cr_raw_principal = kstrdup(source->cr_raw_principal, GFP_KERNEL); target->cr_targ_princ = kstrdup(source->cr_targ_princ, GFP_KERNEL); if ((source->cr_principal && !target->cr_principal) || (source->cr_raw_principal && !target->cr_raw_principal) || (source->cr_targ_princ && !target->cr_targ_princ)) return -ENOMEM; target->cr_flavor = source->cr_flavor; target->cr_uid = source->cr_uid; target->cr_gid = source->cr_gid; target->cr_group_info = source->cr_group_info; get_group_info(target->cr_group_info); target->cr_gss_mech = source->cr_gss_mech; if (source->cr_gss_mech) gss_mech_get(source->cr_gss_mech); return 0; } static int compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2) { if (o1->len < o2->len) return -1; if (o1->len > o2->len) return 1; return memcmp(o1->data, o2->data, o1->len); } static int same_verf(nfs4_verifier *v1, nfs4_verifier *v2) { return 0 == memcmp(v1->data, v2->data, sizeof(v1->data)); } static int same_clid(clientid_t *cl1, clientid_t *cl2) { return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id); } static bool groups_equal(struct group_info *g1, struct group_info *g2) { int i; if (g1->ngroups != g2->ngroups) return false; for (i=0; i<g1->ngroups; i++) if (!gid_eq(g1->gid[i], g2->gid[i])) return false; return true; } /* * RFC 3530 language requires clid_inuse be returned when the * "principal" associated with a requests differs from that previously * used. We use uid, gid's, and gss principal string as our best * approximation. We also don't want to allow non-gss use of a client * established using gss: in theory cr_principal should catch that * change, but in practice cr_principal can be null even in the gss case * since gssd doesn't always pass down a principal string. */ static bool is_gss_cred(struct svc_cred *cr) { /* Is cr_flavor one of the gss "pseudoflavors"?: */ return (cr->cr_flavor > RPC_AUTH_MAXFLAVOR); } static bool same_creds(struct svc_cred *cr1, struct svc_cred *cr2) { if ((is_gss_cred(cr1) != is_gss_cred(cr2)) || (!uid_eq(cr1->cr_uid, cr2->cr_uid)) || (!gid_eq(cr1->cr_gid, cr2->cr_gid)) || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) return false; /* XXX: check that cr_targ_princ fields match ? */ if (cr1->cr_principal == cr2->cr_principal) return true; if (!cr1->cr_principal || !cr2->cr_principal) return false; return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); } static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp) { struct svc_cred *cr = &rqstp->rq_cred; u32 service; if (!cr->cr_gss_mech) return false; service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); return service == RPC_GSS_SVC_INTEGRITY || service == RPC_GSS_SVC_PRIVACY; } bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) { struct svc_cred *cr = &rqstp->rq_cred; if (!cl->cl_mach_cred) return true; if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech) return false; if (!svc_rqst_integrity_protected(rqstp)) return false; if (cl->cl_cred.cr_raw_principal) return 0 == strcmp(cl->cl_cred.cr_raw_principal, cr->cr_raw_principal); if (!cr->cr_principal) return false; return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); } static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn) { __be32 verf[2]; /* * This is opaque to client, so no need to byte-swap. Use * __force to keep sparse happy */ verf[0] = (__force __be32)(u32)ktime_get_real_seconds(); verf[1] = (__force __be32)nn->clverifier_counter++; memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { clp->cl_clientid.cl_boot = (u32)nn->boot_time; clp->cl_clientid.cl_id = nn->clientid_counter++; gen_confirm(clp, nn); } static struct nfs4_stid * find_stateid_locked(struct nfs4_client *cl, stateid_t *t) { struct nfs4_stid *ret; ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id); if (!ret || !ret->sc_type) return NULL; return ret; } static struct nfs4_stid * find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, unsigned short typemask, unsigned short ok_states) { struct nfs4_stid *s; spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, t); if (s != NULL) { if ((s->sc_status & ~ok_states) == 0 && (typemask & s->sc_type)) refcount_inc(&s->sc_count); else s = NULL; } spin_unlock(&cl->cl_lock); return s; } static struct nfs4_client *get_nfsdfs_clp(struct inode *inode) { struct nfsdfs_client *nc; nc = get_nfsdfs_client(inode); if (!nc) return NULL; return container_of(nc, struct nfs4_client, cl_nfsdfs); } static void seq_quote_mem(struct seq_file *m, char *data, int len) { seq_puts(m, "\""); seq_escape_mem(m, data, len, ESCAPE_HEX | ESCAPE_NAP | ESCAPE_APPEND, "\"\\"); seq_puts(m, "\""); } static const char *cb_state2str(int state) { switch (state) { case NFSD4_CB_UP: return "UP"; case NFSD4_CB_UNKNOWN: return "UNKNOWN"; case NFSD4_CB_DOWN: return "DOWN"; case NFSD4_CB_FAULT: return "FAULT"; } return "UNDEFINED"; } static int client_info_show(struct seq_file *m, void *v) { struct inode *inode = file_inode(m->file); struct nfsd4_session *ses; struct nfs4_client *clp; u64 clid; clp = get_nfsdfs_clp(inode); if (!clp) return -ENXIO; memcpy(&clid, &clp->cl_clientid, sizeof(clid)); seq_printf(m, "clientid: 0x%llx\n", clid); seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr); if (clp->cl_state == NFSD4_COURTESY) seq_puts(m, "status: courtesy\n"); else if (clp->cl_state == NFSD4_EXPIRABLE) seq_puts(m, "status: expirable\n"); else if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) seq_puts(m, "status: confirmed\n"); else seq_puts(m, "status: unconfirmed\n"); seq_printf(m, "seconds from last renew: %lld\n", ktime_get_boottime_seconds() - clp->cl_time); seq_puts(m, "name: "); seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len); seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion); if (clp->cl_nii_domain.data) { seq_puts(m, "Implementation domain: "); seq_quote_mem(m, clp->cl_nii_domain.data, clp->cl_nii_domain.len); seq_puts(m, "\nImplementation name: "); seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len); seq_printf(m, "\nImplementation time: [%lld, %ld]\n", clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); } seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state)); seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr); seq_printf(m, "admin-revoked states: %d\n", atomic_read(&clp->cl_admin_revoked)); spin_lock(&clp->cl_lock); seq_printf(m, "session slots:"); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) seq_printf(m, " %u", ses->se_fchannel.maxreqs); seq_printf(m, "\nsession target slots:"); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) seq_printf(m, " %u", ses->se_target_maxslots); spin_unlock(&clp->cl_lock); seq_puts(m, "\n"); drop_client(clp); return 0; } DEFINE_SHOW_ATTRIBUTE(client_info); static void *states_start(struct seq_file *s, loff_t *pos) __acquires(&clp->cl_lock) { struct nfs4_client *clp = s->private; unsigned long id = *pos; void *ret; spin_lock(&clp->cl_lock); ret = idr_get_next_ul(&clp->cl_stateids, &id); *pos = id; return ret; } static void *states_next(struct seq_file *s, void *v, loff_t *pos) { struct nfs4_client *clp = s->private; unsigned long id = *pos; void *ret; id = *pos; id++; ret = idr_get_next_ul(&clp->cl_stateids, &id); *pos = id; return ret; } static void states_stop(struct seq_file *s, void *v) __releases(&clp->cl_lock) { struct nfs4_client *clp = s->private; spin_unlock(&clp->cl_lock); } static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f) { seq_printf(s, "filename: \"%pD2\"", f->nf_file); } static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f) { struct inode *inode = file_inode(f->nf_file); seq_printf(s, "superblock: \"%02x:%02x:%ld\"", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); } static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo) { seq_puts(s, "owner: "); seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len); } static void nfs4_show_stateid(struct seq_file *s, stateid_t *stid) { seq_printf(s, "0x%.8x", stid->si_generation); seq_printf(s, "%12phN", &stid->si_opaque); } static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_ol_stateid *ols; struct nfs4_file *nf; struct nfsd_file *file; struct nfs4_stateowner *oo; unsigned int access, deny; ols = openlockstateid(st); oo = ols->st_stateowner; nf = st->sc_file; seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); seq_puts(s, ": { type: open, "); access = bmap_to_share_mode(ols->st_access_bmap); deny = bmap_to_share_mode(ols->st_deny_bmap); seq_printf(s, "access: %s%s, ", access & NFS4_SHARE_ACCESS_READ ? "r" : "-", access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); seq_printf(s, "deny: %s%s, ", deny & NFS4_SHARE_ACCESS_READ ? "r" : "-", deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); if (nf) { spin_lock(&nf->fi_lock); file = find_any_file_locked(nf); if (file) { nfs4_show_superblock(s, file); seq_puts(s, ", "); nfs4_show_fname(s, file); seq_puts(s, ", "); } spin_unlock(&nf->fi_lock); } else seq_puts(s, "closed, "); nfs4_show_owner(s, oo); if (st->sc_status & SC_STATUS_ADMIN_REVOKED) seq_puts(s, ", admin-revoked"); seq_puts(s, " }\n"); return 0; } static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_ol_stateid *ols; struct nfs4_file *nf; struct nfsd_file *file; struct nfs4_stateowner *oo; ols = openlockstateid(st); oo = ols->st_stateowner; nf = st->sc_file; seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); seq_puts(s, ": { type: lock, "); spin_lock(&nf->fi_lock); file = find_any_file_locked(nf); if (file) { /* * Note: a lock stateid isn't really the same thing as a lock, * it's the locking state held by one owner on a file, and there * may be multiple (or no) lock ranges associated with it. * (Same for the matter is true of open stateids.) */ nfs4_show_superblock(s, file); /* XXX: open stateid? */ seq_puts(s, ", "); nfs4_show_fname(s, file); seq_puts(s, ", "); } nfs4_show_owner(s, oo); if (st->sc_status & SC_STATUS_ADMIN_REVOKED) seq_puts(s, ", admin-revoked"); seq_puts(s, " }\n"); spin_unlock(&nf->fi_lock); return 0; } static char *nfs4_show_deleg_type(u32 dl_type) { switch (dl_type) { case OPEN_DELEGATE_READ: return "r"; case OPEN_DELEGATE_WRITE: return "w"; case OPEN_DELEGATE_READ_ATTRS_DELEG: return "ra"; case OPEN_DELEGATE_WRITE_ATTRS_DELEG: return "wa"; } return "?"; } static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_delegation *ds; struct nfs4_file *nf; struct nfsd_file *file; ds = delegstateid(st); nf = st->sc_file; seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); seq_puts(s, ": { type: deleg, "); seq_printf(s, "access: %s", nfs4_show_deleg_type(ds->dl_type)); /* XXX: lease time, whether it's being recalled. */ spin_lock(&nf->fi_lock); file = nf->fi_deleg_file; if (file) { seq_puts(s, ", "); nfs4_show_superblock(s, file); seq_puts(s, ", "); nfs4_show_fname(s, file); } spin_unlock(&nf->fi_lock); if (st->sc_status & SC_STATUS_ADMIN_REVOKED) seq_puts(s, ", admin-revoked"); seq_puts(s, " }\n"); return 0; } static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_layout_stateid *ls; struct nfsd_file *file; ls = container_of(st, struct nfs4_layout_stateid, ls_stid); seq_puts(s, "- "); nfs4_show_stateid(s, &st->sc_stateid); seq_puts(s, ": { type: layout"); /* XXX: What else would be useful? */ spin_lock(&ls->ls_stid.sc_file->fi_lock); file = ls->ls_file; if (file) { seq_puts(s, ", "); nfs4_show_superblock(s, file); seq_puts(s, ", "); nfs4_show_fname(s, file); } spin_unlock(&ls->ls_stid.sc_file->fi_lock); if (st->sc_status & SC_STATUS_ADMIN_REVOKED) seq_puts(s, ", admin-revoked"); seq_puts(s, " }\n"); return 0; } static int states_show(struct seq_file *s, void *v) { struct nfs4_stid *st = v; switch (st->sc_type) { case SC_TYPE_OPEN: return nfs4_show_open(s, st); case SC_TYPE_LOCK: return nfs4_show_lock(s, st); case SC_TYPE_DELEG: return nfs4_show_deleg(s, st); case SC_TYPE_LAYOUT: return nfs4_show_layout(s, st); default: return 0; /* XXX: or SEQ_SKIP? */ } /* XXX: copy stateids? */ } static struct seq_operations states_seq_ops = { .start = states_start, .next = states_next, .stop = states_stop, .show = states_show }; static int client_states_open(struct inode *inode, struct file *file) { struct seq_file *s; struct nfs4_client *clp; int ret; clp = get_nfsdfs_clp(inode); if (!clp) return -ENXIO; ret = seq_open(file, &states_seq_ops); if (ret) { drop_client(clp); return ret; } s = file->private_data; s->private = clp; return 0; } static int client_opens_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; struct nfs4_client *clp = m->private; /* XXX: alternatively, we could get/drop in seq start/stop */ drop_client(clp); return seq_release(inode, file); } static const struct file_operations client_states_fops = { .open = client_states_open, .read = seq_read, .llseek = seq_lseek, .release = client_opens_release, }; /* * Normally we refuse to destroy clients that are in use, but here the * administrator is telling us to just do it. We also want to wait * so the caller has a guarantee that the client's locks are gone by * the time the write returns: */ static void force_expire_client(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); bool already_expired; trace_nfsd_clid_admin_expired(&clp->cl_clientid); spin_lock(&nn->client_lock); clp->cl_time = 0; spin_unlock(&nn->client_lock); wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0); spin_lock(&nn->client_lock); already_expired = list_empty(&clp->cl_lru); if (!already_expired) unhash_client_locked(clp); spin_unlock(&nn->client_lock); if (!already_expired) expire_client(clp); else wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL); } static ssize_t client_ctl_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) { char *data; struct nfs4_client *clp; data = simple_transaction_get(file, buf, size); if (IS_ERR(data)) return PTR_ERR(data); if (size != 7 || 0 != memcmp(data, "expire\n", 7)) return -EINVAL; clp = get_nfsdfs_clp(file_inode(file)); if (!clp) return -ENXIO; force_expire_client(clp); drop_client(clp); return 7; } static const struct file_operations client_ctl_fops = { .write = client_ctl_write, .release = simple_transaction_release, }; static const struct tree_descr client_files[] = { [0] = {"info", &client_info_fops, S_IRUSR}, [1] = {"states", &client_states_fops, S_IRUSR}, [2] = {"ctl", &client_ctl_fops, S_IWUSR}, [3] = {""}, }; static int nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, struct rpc_task *task) { trace_nfsd_cb_recall_any_done(cb, task); switch (task->tk_status) { case -NFS4ERR_DELAY: rpc_delay(task, 2 * HZ); return 0; default: return 1; } } static void nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; drop_client(clp); } static int nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task) { struct nfs4_cb_fattr *ncf = container_of(cb, struct nfs4_cb_fattr, ncf_getattr); struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); trace_nfsd_cb_getattr_done(&dp->dl_stid.sc_stateid, task); ncf->ncf_cb_status = task->tk_status; switch (task->tk_status) { case -NFS4ERR_DELAY: rpc_delay(task, 2 * HZ); return 0; default: return 1; } } static void nfsd4_cb_getattr_release(struct nfsd4_callback *cb) { struct nfs4_cb_fattr *ncf = container_of(cb, struct nfs4_cb_fattr, ncf_getattr); struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); nfs4_put_stid(&dp->dl_stid); } static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { .done = nfsd4_cb_recall_any_done, .release = nfsd4_cb_recall_any_release, .opcode = OP_CB_RECALL_ANY, }; static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = { .done = nfsd4_cb_getattr_done, .release = nfsd4_cb_getattr_release, .opcode = OP_CB_GETATTR, }; static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf) { struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ncf->ncf_getattr.cb_flags)) return; /* set to proper status when nfsd4_cb_getattr_done runs */ ncf->ncf_cb_status = NFS4ERR_IO; /* ensure that wake_bit is done when RUNNING is cleared */ set_bit(NFSD4_CALLBACK_WAKE, &ncf->ncf_getattr.cb_flags); refcount_inc(&dp->dl_stid.sc_count); nfsd4_run_cb(&ncf->ncf_getattr); } static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { struct nfs4_client *clp; struct sockaddr *sa = svc_addr(rqstp); int ret; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct dentry *dentries[ARRAY_SIZE(client_files)]; clp = alloc_client(name, nn); if (clp == NULL) return NULL; ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); if (ret) { free_client(clp); return NULL; } gen_clid(clp, nn); kref_init(&clp->cl_nfsdfs.cl_ref); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); clp->cl_time = ktime_get_boottime_seconds(); copy_verf(clp, verf); memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; clp->net = net; clp->cl_nfsd_dentry = nfsd_client_mkdir( nn, &clp->cl_nfsdfs, clp->cl_clientid.cl_id - nn->clientid_base, client_files, dentries); clp->cl_nfsd_info_dentry = dentries[0]; if (!clp->cl_nfsd_dentry) { free_client(clp); return NULL; } clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL); if (!clp->cl_ra) { free_client(clp); return NULL; } clp->cl_ra_time = 0; nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops, NFSPROC4_CLNT_CB_RECALL_ANY); return clp; } static void add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root) { struct rb_node **new = &(root->rb_node), *parent = NULL; struct nfs4_client *clp; while (*new) { clp = rb_entry(*new, struct nfs4_client, cl_namenode); parent = *new; if (compare_blob(&clp->cl_name, &new_clp->cl_name) > 0) new = &((*new)->rb_left); else new = &((*new)->rb_right); } rb_link_node(&new_clp->cl_namenode, parent, new); rb_insert_color(&new_clp->cl_namenode, root); } static struct nfs4_client * find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root) { int cmp; struct rb_node *node = root->rb_node; struct nfs4_client *clp; while (node) { clp = rb_entry(node, struct nfs4_client, cl_namenode); cmp = compare_blob(&clp->cl_name, name); if (cmp > 0) node = node->rb_left; else if (cmp < 0) node = node->rb_right; else return clp; } return NULL; } static void add_to_unconfirmed(struct nfs4_client *clp) { unsigned int idhashval; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); add_clp_to_name_tree(clp, &nn->unconf_name_tree); idhashval = clientid_hashval(clp->cl_clientid.cl_id); list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]); renew_client_locked(clp); } static void move_to_confirmed(struct nfs4_client *clp) { unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); add_clp_to_name_tree(clp, &nn->conf_name_tree); set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); trace_nfsd_clid_confirmed(&clp->cl_clientid); renew_client_locked(clp); } static struct nfs4_client * find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions) { struct nfs4_client *clp; unsigned int idhashval = clientid_hashval(clid->cl_id); list_for_each_entry(clp, &tbl[idhashval], cl_idhash) { if (same_clid(&clp->cl_clientid, clid)) { if ((bool)clp->cl_minorversion != sessions) return NULL; renew_client_locked(clp); return clp; } } return NULL; } static struct nfs4_client * find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct list_head *tbl = nn->conf_id_hashtbl; lockdep_assert_held(&nn->client_lock); return find_client_in_id_table(tbl, clid, sessions); } static struct nfs4_client * find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct list_head *tbl = nn->unconf_id_hashtbl; lockdep_assert_held(&nn->client_lock); return find_client_in_id_table(tbl, clid, sessions); } static bool clp_used_exchangeid(struct nfs4_client *clp) { return clp->cl_exchange_flags != 0; } static struct nfs4_client * find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { lockdep_assert_held(&nn->client_lock); return find_clp_in_name_tree(name, &nn->conf_name_tree); } static struct nfs4_client * find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { lockdep_assert_held(&nn->client_lock); return find_clp_in_name_tree(name, &nn->unconf_name_tree); } static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp) { struct nfs4_cb_conn *conn = &clp->cl_cb_conn; struct sockaddr *sa = svc_addr(rqstp); u32 scopeid = rpc_get_scope_id(sa); unsigned short expected_family; /* Currently, we only support tcp and tcp6 for the callback channel */ if (se->se_callback_netid_len == 3 && !memcmp(se->se_callback_netid_val, "tcp", 3)) expected_family = AF_INET; else if (se->se_callback_netid_len == 4 && !memcmp(se->se_callback_netid_val, "tcp6", 4)) expected_family = AF_INET6; else goto out_err; conn->cb_addrlen = rpc_uaddr2sockaddr(clp->net, se->se_callback_addr_val, se->se_callback_addr_len, (struct sockaddr *)&conn->cb_addr, sizeof(conn->cb_addr)); if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family) goto out_err; if (conn->cb_addr.ss_family == AF_INET6) ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid; conn->cb_prog = se->se_callback_prog; conn->cb_ident = se->se_callback_ident; memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen); trace_nfsd_cb_args(clp, conn); return; out_err: conn->cb_addr.ss_family = AF_UNSPEC; conn->cb_addrlen = 0; trace_nfsd_cb_nodelegs(clp); return; } /* * Cache a reply. nfsd4_check_resp_size() has bounded the cache size. */ static void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { struct xdr_buf *buf = resp->xdr->buf; struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; /* * RFC 5661 Section 2.10.6.1.2: * * Any time SEQUENCE ... returns an error ... [t]he replier MUST NOT * modify the reply cache entry for the slot whenever an error is * returned from SEQUENCE ... * * Because nfsd4_store_cache_entry is called only by * nfsd4_sequence_done(), nfsd4_store_cache_entry() is called only * when a SEQUENCE operation was part of the COMPOUND. * nfs41_check_op_ordering() ensures SEQUENCE is the first op. */ if (resp->opcnt == 1 && resp->cstate.status != nfs_ok) return; slot->sl_flags |= NFSD4_SLOT_INITIALIZED; slot->sl_opcnt = resp->opcnt; slot->sl_status = resp->cstate.status; free_svc_cred(&slot->sl_cred); copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred); if (!(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)) { slot->sl_flags &= ~NFSD4_SLOT_CACHED; return; } slot->sl_flags |= NFSD4_SLOT_CACHED; base = resp->cstate.data_offset; slot->sl_datalen = buf->len - base; if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) WARN(1, "%s: sessions DRC could not cache compound\n", __func__); return; } /* * The sequence operation is not cached because we can use the slot and * session values. */ static __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; struct nfsd4_slot *slot = resp->cstate.slot; struct xdr_stream *xdr = resp->xdr; __be32 *p; dprintk("--> %s slot %p\n", __func__, slot); /* Always encode the SEQUENCE response. */ nfsd4_encode_operation(resp, &args->ops[0]); if (args->opcnt == 1) /* A solo SEQUENCE - nothing was cached */ return args->ops[0].status; if (!(slot->sl_flags & NFSD4_SLOT_CACHED)) { /* We weren't asked to cache this. */ struct nfsd4_op *op; op = &args->ops[resp->opcnt++]; op->status = nfserr_retry_uncached_rep; nfsd4_encode_operation(resp, op); return op->status; } /* return reply from cache */ p = xdr_reserve_space(xdr, slot->sl_datalen); if (!p) { WARN_ON_ONCE(1); return nfserr_serverfault; } xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen); xdr_commit_encode(xdr); resp->opcnt = slot->sl_opcnt; return slot->sl_status; } /* * Set the exchange_id flags returned by the server. */ static void nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) { #ifdef CONFIG_NFSD_PNFS new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS; #else new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; #endif /* Referrals are supported, Migration is not. */ new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; /* set the wire flags to return to client. */ clid->flags = new->cl_exchange_flags; } static bool client_has_openowners(struct nfs4_client *clp) { struct nfs4_openowner *oo; list_for_each_entry(oo, &clp->cl_openowners, oo_perclient) { if (!list_empty(&oo->oo_owner.so_stateids)) return true; } return false; } static bool client_has_state(struct nfs4_client *clp) { return client_has_openowners(clp) #ifdef CONFIG_NFSD_PNFS || !list_empty(&clp->cl_lo_states) #endif || !list_empty(&clp->cl_delegations) || !list_empty(&clp->cl_sessions) || nfsd4_has_active_async_copies(clp); } static __be32 copy_impl_id(struct nfs4_client *clp, struct nfsd4_exchange_id *exid) { if (!exid->nii_domain.data) return 0; xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL); if (!clp->cl_nii_domain.data) return nfserr_jukebox; xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL); if (!clp->cl_nii_name.data) return nfserr_jukebox; clp->cl_nii_time = exid->nii_time; return 0; } __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_exchange_id *exid = &u->exchange_id; struct nfs4_client *conf, *new; struct nfs4_client *unconf = NULL; __be32 status; char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; struct sockaddr *sa = svc_addr(rqstp); bool update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " "ip_addr=%s flags %x, spa_how %u\n", __func__, rqstp, exid, exid->clname.len, exid->clname.data, addr_str, exid->flags, exid->spa_how); exid->server_impl_name = kasprintf(GFP_KERNEL, "%s %s %s %s", utsname()->sysname, utsname()->release, utsname()->version, utsname()->machine); if (!exid->server_impl_name) return nfserr_jukebox; if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; new = create_client(exid->clname, rqstp, &verf); if (new == NULL) return nfserr_jukebox; status = copy_impl_id(new, exid); if (status) goto out_nolock; switch (exid->spa_how) { case SP4_MACH_CRED: exid->spo_must_enforce[0] = 0; exid->spo_must_enforce[1] = ( 1 << (OP_BIND_CONN_TO_SESSION - 32) | 1 << (OP_EXCHANGE_ID - 32) | 1 << (OP_CREATE_SESSION - 32) | 1 << (OP_DESTROY_SESSION - 32) | 1 << (OP_DESTROY_CLIENTID - 32)); exid->spo_must_allow[0] &= (1 << (OP_CLOSE) | 1 << (OP_OPEN_DOWNGRADE) | 1 << (OP_LOCKU) | 1 << (OP_DELEGRETURN)); exid->spo_must_allow[1] &= ( 1 << (OP_TEST_STATEID - 32) | 1 << (OP_FREE_STATEID - 32)); if (!svc_rqst_integrity_protected(rqstp)) { status = nfserr_inval; goto out_nolock; } /* * Sometimes userspace doesn't give us a principal. * Which is a bug, really. Anyway, we can't enforce * MACH_CRED in that case, better to give up now: */ if (!new->cl_cred.cr_principal && !new->cl_cred.cr_raw_principal) { status = nfserr_serverfault; goto out_nolock; } new->cl_mach_cred = true; break; case SP4_NONE: break; default: /* checked by xdr code */ WARN_ON_ONCE(1); fallthrough; case SP4_SSV: status = nfserr_encr_alg_unsupp; goto out_nolock; } /* Cases below refer to rfc 5661 section 18.35.4: */ spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&exid->clname, nn); if (conf) { bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); bool verfs_match = same_verf(&verf, &conf->cl_verifier); if (update) { if (!clp_used_exchangeid(conf)) { /* buggy client */ status = nfserr_inval; goto out; } if (!nfsd4_mach_creds_match(conf, rqstp)) { status = nfserr_wrong_cred; goto out; } if (!creds_match) { /* case 9 */ status = nfserr_perm; goto out; } if (!verfs_match) { /* case 8 */ status = nfserr_not_same; goto out; } /* case 6 */ exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; trace_nfsd_clid_confirmed_r(conf); goto out_copy; } if (!creds_match) { /* case 3 */ if (client_has_state(conf)) { status = nfserr_clid_inuse; trace_nfsd_clid_cred_mismatch(conf, rqstp); goto out; } goto out_new; } if (verfs_match) { /* case 2 */ conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; trace_nfsd_clid_confirmed_r(conf); goto out_copy; } /* case 5, client reboot */ trace_nfsd_clid_verf_mismatch(conf, rqstp, &verf); conf = NULL; goto out_new; } if (update) { /* case 7 */ status = nfserr_noent; goto out; } unconf = find_unconfirmed_client_by_name(&exid->clname, nn); if (unconf) /* case 4, possible retry or client restart */ unhash_client_locked(unconf); /* case 1, new owner ID */ trace_nfsd_clid_fresh(new); out_new: if (conf) { status = mark_client_expired_locked(conf); if (status) goto out; trace_nfsd_clid_replaced(&conf->cl_clientid); } new->cl_minorversion = cstate->minorversion; new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0]; new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1]; /* Contrived initial CREATE_SESSION response */ new->cl_cs_slot.sl_status = nfserr_seq_misordered; add_to_unconfirmed(new); swap(new, conf); out_copy: exid->clientid.cl_boot = conf->cl_clientid.cl_boot; exid->clientid.cl_id = conf->cl_clientid.cl_id; exid->seqid = conf->cl_cs_slot.sl_seqid + 1; nfsd4_set_ex_flags(conf, exid); exid->nii_domain.len = sizeof("kernel.org") - 1; exid->nii_domain.data = "kernel.org"; /* * Note that RFC 8881 places no length limit on * nii_name, but this implementation permits no * more than NFS4_OPAQUE_LIMIT bytes. */ exid->nii_name.len = strlen(exid->server_impl_name); if (exid->nii_name.len > NFS4_OPAQUE_LIMIT) exid->nii_name.len = NFS4_OPAQUE_LIMIT; exid->nii_name.data = exid->server_impl_name; /* just send zeros - the date is in nii_name */ exid->nii_time.tv_sec = 0; exid->nii_time.tv_nsec = 0; dprintk("nfsd4_exchange_id seqid %d flags %x\n", conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags); status = nfs_ok; out: spin_unlock(&nn->client_lock); out_nolock: if (new) expire_client(new); if (unconf) { trace_nfsd_clid_expire_unconf(&unconf->cl_clientid); expire_client(unconf); } return status; } void nfsd4_exchange_id_release(union nfsd4_op_u *u) { struct nfsd4_exchange_id *exid = &u->exchange_id; kfree(exid->server_impl_name); } static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags) { /* The slot is in use, and no response has been sent. */ if (flags & NFSD4_SLOT_INUSE) { if (seqid == slot_seqid) return nfserr_jukebox; else return nfserr_seq_misordered; } /* Note unsigned 32-bit arithmetic handles wraparound: */ if (likely(seqid == slot_seqid + 1)) return nfs_ok; if ((flags & NFSD4_SLOT_REUSED) && seqid == 1) return nfs_ok; if (seqid == slot_seqid) return nfserr_replay_cache; return nfserr_seq_misordered; } /* * Cache the create session result into the create session single DRC * slot cache by saving the xdr structure. sl_seqid has been set. * Do this for solo or embedded create session operations. */ static void nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, struct nfsd4_clid_slot *slot, __be32 nfserr) { slot->sl_status = nfserr; memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); } static __be32 nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, struct nfsd4_clid_slot *slot) { memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses)); return slot->sl_status; } #define NFSD_MIN_REQ_HDR_SEQ_SZ ((\ 2 * 2 + /* credential,verifier: AUTH_NULL, length 0 */ \ 1 + /* MIN tag is length with zero, only length */ \ 3 + /* version, opcount, opcode */ \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ /* seqid, slotID, slotID, cache */ \ 4 ) * sizeof(__be32)) #define NFSD_MIN_RESP_HDR_SEQ_SZ ((\ 2 + /* verifier: AUTH_NULL, length 0 */\ 1 + /* status */ \ 1 + /* MIN tag is length with zero, only length */ \ 3 + /* opcount, opcode, opstatus*/ \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ /* seqid, slotID, slotID, slotID, status */ \ 5 ) * sizeof(__be32)) static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) { u32 maxrpc = nn->nfsd_serv->sv_max_mesg; if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ) return nfserr_toosmall; if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ) return nfserr_toosmall; ca->headerpadsz = 0; ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc); ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc); ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND); ca->maxresp_cached = min_t(u32, ca->maxresp_cached, NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); return nfs_ok; } /* * Server's NFSv4.1 backchannel support is AUTH_SYS-only for now. * These are based on similar macros in linux/sunrpc/msg_prot.h . */ #define RPC_MAX_HEADER_WITH_AUTH_SYS \ (RPC_CALLHDRSIZE + 2 * (2 + UNX_CALLSLACK)) #define RPC_MAX_REPHEADER_WITH_AUTH_SYS \ (RPC_REPHDRSIZE + (2 + NUL_REPLYSLACK)) #define NFSD_CB_MAX_REQ_SZ ((NFS4_enc_cb_recall_sz + \ RPC_MAX_HEADER_WITH_AUTH_SYS) * sizeof(__be32)) #define NFSD_CB_MAX_RESP_SZ ((NFS4_dec_cb_recall_sz + \ RPC_MAX_REPHEADER_WITH_AUTH_SYS) * \ sizeof(__be32)) static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) { ca->headerpadsz = 0; if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ) return nfserr_toosmall; if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ) return nfserr_toosmall; ca->maxresp_cached = 0; if (ca->maxops < 2) return nfserr_toosmall; return nfs_ok; } static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs) { switch (cbs->flavor) { case RPC_AUTH_NULL: case RPC_AUTH_UNIX: return nfs_ok; default: /* * GSS case: the spec doesn't allow us to return this * error. But it also doesn't allow us not to support * GSS. * I'd rather this fail hard than return some error the * client might think it can already handle: */ return nfserr_encr_alg_unsupp; } } __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_create_session *cr_ses = &u->create_session; struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; struct nfsd4_clid_slot *cs_slot; struct nfs4_client *old = NULL; struct nfsd4_session *new; struct nfsd4_conn *conn; __be32 status = 0; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; status = nfsd4_check_cb_sec(&cr_ses->cb_sec); if (status) return status; status = check_forechannel_attrs(&cr_ses->fore_channel, nn); if (status) return status; status = check_backchannel_attrs(&cr_ses->back_channel); if (status) goto out_err; status = nfserr_jukebox; new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel); if (!new) goto out_err; conn = alloc_conn_from_crses(rqstp, cr_ses); if (!conn) goto out_free_session; spin_lock(&nn->client_lock); /* RFC 8881 Section 18.36.4 Phase 1: Client record look-up. */ unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); conf = find_confirmed_client(&cr_ses->clientid, true, nn); if (!conf && !unconf) { status = nfserr_stale_clientid; goto out_free_conn; } /* RFC 8881 Section 18.36.4 Phase 2: Sequence ID processing. */ if (conf) { cs_slot = &conf->cl_cs_slot; trace_nfsd_slot_seqid_conf(conf, cr_ses); } else { cs_slot = &unconf->cl_cs_slot; trace_nfsd_slot_seqid_unconf(unconf, cr_ses); } status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); switch (status) { case nfs_ok: cs_slot->sl_seqid++; cr_ses->seqid = cs_slot->sl_seqid; break; case nfserr_replay_cache: status = nfsd4_replay_create_session(cr_ses, cs_slot); fallthrough; case nfserr_jukebox: /* The server MUST NOT cache NFS4ERR_DELAY */ goto out_free_conn; default: goto out_cache_error; } /* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */ if (conf) { status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(conf, rqstp)) goto out_cache_error; } else { status = nfserr_clid_inuse; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { trace_nfsd_clid_cred_mismatch(unconf, rqstp); goto out_cache_error; } status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(unconf, rqstp)) goto out_cache_error; old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = mark_client_expired_locked(old); if (status) goto out_expired_error; trace_nfsd_clid_replaced(&old->cl_clientid); } move_to_confirmed(unconf); conf = unconf; } /* RFC 8881 Section 18.36.4 Phase 4: Session creation. */ status = nfs_ok; /* Persistent sessions are not supported */ cr_ses->flags &= ~SESSION4_PERSIST; /* Upshifting from TCP to RDMA is not supported */ cr_ses->flags &= ~SESSION4_RDMA; /* Report the correct number of backchannel slots */ cr_ses->back_channel.maxreqs = new->se_cb_highest_slot + 1; init_session(rqstp, new, conf, cr_ses); nfsd4_get_session_locked(new); memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); /* cache solo and embedded create sessions under the client_lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); spin_unlock(&nn->client_lock); if (conf == unconf) fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY); /* init connection and backchannel */ nfsd4_init_conn(rqstp, conn, new); nfsd4_put_session(new); if (old) expire_client(old); return status; out_expired_error: /* * Revert the slot seq_nr change so the server will process * the client's resend instead of returning a cached response. */ if (status == nfserr_jukebox) { cs_slot->sl_seqid--; cr_ses->seqid = cs_slot->sl_seqid; goto out_free_conn; } out_cache_error: nfsd4_cache_create_session(cr_ses, cs_slot, status); out_free_conn: spin_unlock(&nn->client_lock); free_conn(conn); out_free_session: __free_session(new); out_err: return status; } static __be32 nfsd4_map_bcts_dir(u32 *dir) { switch (*dir) { case NFS4_CDFC4_FORE: case NFS4_CDFC4_BACK: return nfs_ok; case NFS4_CDFC4_FORE_OR_BOTH: case NFS4_CDFC4_BACK_OR_BOTH: *dir = NFS4_CDFC4_BOTH; return nfs_ok; } return nfserr_inval; } __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl; struct nfsd4_session *session = cstate->session; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); __be32 status; status = nfsd4_check_cb_sec(&bc->bc_cb_sec); if (status) return status; spin_lock(&nn->client_lock); session->se_cb_prog = bc->bc_cb_program; session->se_cb_sec = bc->bc_cb_sec; spin_unlock(&nn->client_lock); nfsd4_probe_callback(session->se_client); return nfs_ok; } static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) { struct nfsd4_conn *c; list_for_each_entry(c, &s->se_conns, cn_persession) { if (c->cn_xprt == xpt) { return c; } } return NULL; } static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst, struct nfsd4_session *session, u32 req, struct nfsd4_conn **conn) { struct nfs4_client *clp = session->se_client; struct svc_xprt *xpt = rqst->rq_xprt; struct nfsd4_conn *c; __be32 status; /* Following the last paragraph of RFC 5661 Section 18.34.3: */ spin_lock(&clp->cl_lock); c = __nfsd4_find_conn(xpt, session); if (!c) status = nfserr_noent; else if (req == c->cn_flags) status = nfs_ok; else if (req == NFS4_CDFC4_FORE_OR_BOTH && c->cn_flags != NFS4_CDFC4_BACK) status = nfs_ok; else if (req == NFS4_CDFC4_BACK_OR_BOTH && c->cn_flags != NFS4_CDFC4_FORE) status = nfs_ok; else status = nfserr_inval; spin_unlock(&clp->cl_lock); if (status == nfs_ok && conn) *conn = c; return status; } __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; __be32 status; struct nfsd4_conn *conn; struct nfsd4_session *session; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (!nfsd4_last_compound_op(rqstp)) return nfserr_not_only_op; spin_lock(&nn->client_lock); session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status); spin_unlock(&nn->client_lock); if (!session) goto out_no_session; status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(session->se_client, rqstp)) goto out; status = nfsd4_match_existing_connection(rqstp, session, bcts->dir, &conn); if (status == nfs_ok) { if (bcts->dir == NFS4_CDFC4_FORE_OR_BOTH || bcts->dir == NFS4_CDFC4_BACK) conn->cn_flags |= NFS4_CDFC4_BACK; nfsd4_probe_callback(session->se_client); goto out; } if (status == nfserr_inval) goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) goto out; conn = alloc_conn(rqstp, bcts->dir); status = nfserr_jukebox; if (!conn) goto out; nfsd4_init_conn(rqstp, conn, session); status = nfs_ok; out: nfsd4_put_session(session); out_no_session: return status; } static bool nfsd4_compound_in_session(struct nfsd4_compound_state *cstate, struct nfs4_sessionid *sid) { if (!cstate->session) return false; return !memcmp(sid, &cstate->session->se_sessionid, sizeof(*sid)); } __be32 nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfs4_sessionid *sessionid = &u->destroy_session.sessionid; struct nfsd4_session *ses; __be32 status; int ref_held_by_me = 0; struct net *net = SVC_NET(r); struct nfsd_net *nn = net_generic(net, nfsd_net_id); status = nfserr_not_only_op; if (nfsd4_compound_in_session(cstate, sessionid)) { if (!nfsd4_last_compound_op(r)) goto out; ref_held_by_me++; } dump_sessionid(__func__, sessionid); spin_lock(&nn->client_lock); ses = find_in_sessionid_hashtbl(sessionid, net, &status); if (!ses) goto out_client_lock; status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(ses->se_client, r)) goto out_put_session; status = mark_session_dead_locked(ses, 1 + ref_held_by_me); if (status) goto out_put_session; unhash_session(ses); spin_unlock(&nn->client_lock); nfsd4_probe_callback_sync(ses->se_client); spin_lock(&nn->client_lock); status = nfs_ok; out_put_session: nfsd4_put_session_locked(ses); out_client_lock: spin_unlock(&nn->client_lock); out: return status; } static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd4_conn *c; __be32 status = nfs_ok; int ret; spin_lock(&clp->cl_lock); c = __nfsd4_find_conn(new->cn_xprt, ses); if (c) goto out_free; status = nfserr_conn_not_bound_to_session; if (clp->cl_mach_cred) goto out_free; __nfsd4_hash_conn(new, ses); spin_unlock(&clp->cl_lock); ret = nfsd4_register_conn(new); if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&new->cn_xpt_user); return nfs_ok; out_free: spin_unlock(&clp->cl_lock); free_conn(new); return status; } static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) { struct nfsd4_compoundargs *args = rqstp->rq_argp; return args->opcnt > session->se_fchannel.maxops; } static bool nfsd4_request_too_big(struct svc_rqst *rqstp, struct nfsd4_session *session) { struct xdr_buf *xb = &rqstp->rq_arg; return xb->len > session->se_fchannel.maxreq_sz; } static bool replay_matches_cache(struct svc_rqst *rqstp, struct nfsd4_sequence *seq, struct nfsd4_slot *slot) { struct nfsd4_compoundargs *argp = rqstp->rq_argp; if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) != (bool)seq->cachethis) return false; /* * If there's an error then the reply can have fewer ops than * the call. */ if (slot->sl_opcnt < argp->opcnt && !slot->sl_status) return false; /* * But if we cached a reply with *more* ops than the call you're * sending us now, then this new call is clearly not really a * replay of the old one: */ if (slot->sl_opcnt > argp->opcnt) return false; /* This is the only check explicitly called by spec: */ if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) return false; /* * There may be more comparisons we could actually do, but the * spec doesn't require us to catch every case where the calls * don't match (that would require caching the call as well as * the reply), so we don't bother. */ return true; } /* * Note that the response is constructed here both for the case * of a new SEQUENCE request and for a replayed SEQUENCE request. * We do not cache SEQUENCE responses as SEQUENCE is idempotent. */ static void nfsd4_construct_sequence_response(struct nfsd4_session *session, struct nfsd4_sequence *seq) { struct nfs4_client *clp = session->se_client; seq->maxslots_response = max(session->se_target_maxslots, seq->maxslots); seq->target_maxslots = session->se_target_maxslots; switch (clp->cl_cb_state) { case NFSD4_CB_DOWN: seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; break; case NFSD4_CB_FAULT: seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; break; default: seq->status_flags = 0; } if (!list_empty(&clp->cl_revoked)) seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; if (atomic_read(&clp->cl_admin_revoked)) seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED; } __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_sequence *seq = &u->sequence; struct nfsd4_compoundres *resp = rqstp->rq_resp; struct xdr_stream *xdr = resp->xdr; struct nfsd4_session *session; struct nfs4_client *clp; struct nfsd4_slot *slot; struct nfsd4_conn *conn; __be32 status; int buflen; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (resp->opcnt != 1) return nfserr_sequence_pos; /* * Will be either used or freed by nfsd4_sequence_check_conn * below. */ conn = alloc_conn(rqstp, NFS4_CDFC4_FORE); if (!conn) return nfserr_jukebox; spin_lock(&nn->client_lock); session = find_in_sessionid_hashtbl(&seq->sessionid, net, &status); if (!session) goto out_no_session; clp = session->se_client; status = nfserr_too_many_ops; if (nfsd4_session_too_many_ops(rqstp, session)) goto out_put_session; status = nfserr_req_too_big; if (nfsd4_request_too_big(rqstp, session)) goto out_put_session; status = nfserr_badslot; if (seq->slotid >= session->se_fchannel.maxreqs) goto out_put_session; slot = xa_load(&session->se_slots, seq->slotid); dprintk("%s: slotid %d\n", __func__, seq->slotid); trace_nfsd_slot_seqid_sequence(clp, seq, slot); nfsd4_construct_sequence_response(session, seq); status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags); if (status == nfserr_replay_cache) { status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) goto out_put_session; status = nfserr_seq_false_retry; if (!replay_matches_cache(rqstp, seq, slot)) goto out_put_session; cstate->slot = slot; cstate->session = session; cstate->clp = clp; /* Return the cached reply status and set cstate->status * for nfsd4_proc_compound processing */ status = nfsd4_replay_cache_entry(resp, seq); cstate->status = nfserr_replay_cache; goto out; } if (status) goto out_put_session; status = nfsd4_sequence_check_conn(conn, session); conn = NULL; if (status) goto out_put_session; if (session->se_target_maxslots < session->se_fchannel.maxreqs && slot->sl_generation == session->se_slot_gen && seq->maxslots <= session->se_target_maxslots) /* Client acknowledged our reduce maxreqs */ free_session_slots(session, session->se_target_maxslots); buflen = (seq->cachethis) ? session->se_fchannel.maxresp_cached : session->se_fchannel.maxresp_sz; status = (seq->cachethis) ? nfserr_rep_too_big_to_cache : nfserr_rep_too_big; if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) goto out_put_session; svc_reserve_auth(rqstp, buflen); status = nfs_ok; /* Success! accept new slot seqid */ slot->sl_seqid = seq->seqid; slot->sl_flags &= ~NFSD4_SLOT_REUSED; slot->sl_flags |= NFSD4_SLOT_INUSE; slot->sl_generation = session->se_slot_gen; if (seq->cachethis) slot->sl_flags |= NFSD4_SLOT_CACHETHIS; else slot->sl_flags &= ~NFSD4_SLOT_CACHETHIS; cstate->slot = slot; cstate->session = session; cstate->clp = clp; /* * If the client ever uses the highest available slot, * gently try to allocate another 20%. This allows * fairly quick growth without grossly over-shooting what * the client might use. */ if (seq->slotid == session->se_fchannel.maxreqs - 1 && session->se_target_maxslots >= session->se_fchannel.maxreqs && session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) { int s = session->se_fchannel.maxreqs; int cnt = DIV_ROUND_UP(s, 5); void *prev_slot; do { /* * GFP_NOWAIT both allows allocation under a * spinlock, and only succeeds if there is * plenty of memory. */ slot = nfsd4_alloc_slot(&session->se_fchannel, s, GFP_NOWAIT); prev_slot = xa_load(&session->se_slots, s); if (xa_is_value(prev_slot) && slot) { slot->sl_seqid = xa_to_value(prev_slot); slot->sl_flags |= NFSD4_SLOT_REUSED; } if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot, GFP_NOWAIT))) { s += 1; session->se_fchannel.maxreqs = s; atomic_add(s - session->se_target_maxslots, &nfsd_total_target_slots); session->se_target_maxslots = s; } else { kfree(slot); slot = NULL; } } while (slot && --cnt > 0); } out: trace_nfsd_seq4_status(rqstp, seq); out_no_session: if (conn) free_conn(conn); spin_unlock(&nn->client_lock); return status; out_put_session: nfsd4_put_session_locked(session); goto out_no_session; } void nfsd4_sequence_done(struct nfsd4_compoundres *resp) { struct nfsd4_compound_state *cs = &resp->cstate; if (nfsd4_has_session(cs)) { if (cs->status != nfserr_replay_cache) { nfsd4_store_cache_entry(resp); cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; } /* Drop session reference that was taken in nfsd4_sequence() */ nfsd4_put_session(cs->session); } else if (cs->clp) put_client_renew(cs->clp); } __be32 nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_destroy_clientid *dc = &u->destroy_clientid; struct nfs4_client *conf, *unconf; struct nfs4_client *clp = NULL; __be32 status = 0; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); spin_lock(&nn->client_lock); unconf = find_unconfirmed_client(&dc->clientid, true, nn); conf = find_confirmed_client(&dc->clientid, true, nn); WARN_ON_ONCE(conf && unconf); if (conf) { if (client_has_state(conf)) { status = nfserr_clientid_busy; goto out; } status = mark_client_expired_locked(conf); if (status) goto out; clp = conf; } else if (unconf) clp = unconf; else { status = nfserr_stale_clientid; goto out; } if (!nfsd4_mach_creds_match(clp, rqstp)) { clp = NULL; status = nfserr_wrong_cred; goto out; } trace_nfsd_clid_destroyed(&clp->cl_clientid); unhash_client_locked(clp); out: spin_unlock(&nn->client_lock); if (clp) expire_client(clp); return status; } __be32 nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_reclaim_complete *rc = &u->reclaim_complete; struct nfs4_client *clp = cstate->clp; __be32 status = 0; if (rc->rca_one_fs) { if (!cstate->current_fh.fh_dentry) return nfserr_nofilehandle; /* * We don't take advantage of the rca_one_fs case. * That's OK, it's optional, we can safely ignore it. */ return nfs_ok; } status = nfserr_complete_already; if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags)) goto out; status = nfserr_stale_clientid; if (is_client_expired(clp)) /* * The following error isn't really legal. * But we only get here if the client just explicitly * destroyed the client. Surely it no longer cares what * error it gets back on an operation for the dead * client. */ goto out; status = nfs_ok; trace_nfsd_clid_reclaim_complete(&clp->cl_clientid); nfsd4_client_record_create(clp); inc_reclaim_complete(clp); out: return status; } __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_setclientid *setclid = &u->setclientid; struct xdr_netobj clname = setclid->se_name; nfs4_verifier clverifier = setclid->se_verf; struct nfs4_client *conf, *new; struct nfs4_client *unconf = NULL; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); new = create_client(clname, rqstp, &clverifier); if (new == NULL) return nfserr_jukebox; spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); if (conf && client_has_state(conf)) { status = nfserr_clid_inuse; if (clp_used_exchangeid(conf)) goto out; if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { trace_nfsd_clid_cred_mismatch(conf, rqstp); goto out; } } unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) unhash_client_locked(unconf); if (conf) { if (same_verf(&conf->cl_verifier, &clverifier)) { copy_clid(new, conf); gen_confirm(new, nn); } else trace_nfsd_clid_verf_mismatch(conf, rqstp, &clverifier); } else trace_nfsd_clid_fresh(new); new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); add_to_unconfirmed(new); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); new = NULL; status = nfs_ok; out: spin_unlock(&nn->client_lock); if (new) free_client(new); if (unconf) { trace_nfsd_clid_expire_unconf(&unconf->cl_clientid); expire_client(unconf); } return status; } __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_setclientid_confirm *setclientid_confirm = &u->setclientid_confirm; struct nfs4_client *conf, *unconf; struct nfs4_client *old = NULL; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; spin_lock(&nn->client_lock); conf = find_confirmed_client(clid, false, nn); unconf = find_unconfirmed_client(clid, false, nn); /* * We try hard to give out unique clientid's, so if we get an * attempt to confirm the same clientid with a different cred, * the client may be buggy; this should never happen. * * Nevertheless, RFC 7530 recommends INUSE for this case: */ status = nfserr_clid_inuse; if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { trace_nfsd_clid_cred_mismatch(unconf, rqstp); goto out; } if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) { trace_nfsd_clid_cred_mismatch(conf, rqstp); goto out; } if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { if (conf && same_verf(&confirm, &conf->cl_confirm)) { status = nfs_ok; } else status = nfserr_stale_clientid; goto out; } status = nfs_ok; if (conf) { if (get_client_locked(conf) == nfs_ok) { old = unconf; unhash_client_locked(old); nfsd4_change_callback(conf, &unconf->cl_cb_conn); } else { conf = NULL; } } if (!conf) { old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = nfserr_clid_inuse; if (client_has_state(old) && !same_creds(&unconf->cl_cred, &old->cl_cred)) { old = NULL; goto out; } status = mark_client_expired_locked(old); if (status) { old = NULL; goto out; } trace_nfsd_clid_replaced(&old->cl_clientid); } status = get_client_locked(unconf); if (status != nfs_ok) { old = NULL; goto out; } move_to_confirmed(unconf); conf = unconf; } spin_unlock(&nn->client_lock); if (conf == unconf) fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY); nfsd4_probe_callback(conf); spin_lock(&nn->client_lock); put_client_renew_locked(conf); out: spin_unlock(&nn->client_lock); if (old) expire_client(old); return status; } static struct nfs4_file *nfsd4_alloc_file(void) { return kmem_cache_alloc(file_slab, GFP_KERNEL); } /* OPEN Share state helper functions */ static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp) { refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); INIT_LIST_HEAD(&fp->fi_clnt_odstate); fh_copy_shallow(&fp->fi_fhandle, &fh->fh_handle); fp->fi_deleg_file = NULL; fp->fi_rdeleg_file = NULL; fp->fi_had_conflict = false; fp->fi_share_deny = 0; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); fp->fi_aliased = false; fp->fi_inode = d_inode(fh->fh_dentry); #ifdef CONFIG_NFSD_PNFS INIT_LIST_HEAD(&fp->fi_lo_states); atomic_set(&fp->fi_lo_recalls, 0); #endif } void nfsd4_free_slabs(void) { kmem_cache_destroy(client_slab); kmem_cache_destroy(openowner_slab); kmem_cache_destroy(lockowner_slab); kmem_cache_destroy(file_slab); kmem_cache_destroy(stateid_slab); kmem_cache_destroy(deleg_slab); kmem_cache_destroy(odstate_slab); } int nfsd4_init_slabs(void) { client_slab = KMEM_CACHE(nfs4_client, 0); if (client_slab == NULL) goto out; openowner_slab = KMEM_CACHE(nfs4_openowner, 0); if (openowner_slab == NULL) goto out_free_client_slab; lockowner_slab = KMEM_CACHE(nfs4_lockowner, 0); if (lockowner_slab == NULL) goto out_free_openowner_slab; file_slab = KMEM_CACHE(nfs4_file, 0); if (file_slab == NULL) goto out_free_lockowner_slab; stateid_slab = KMEM_CACHE(nfs4_ol_stateid, 0); if (stateid_slab == NULL) goto out_free_file_slab; deleg_slab = KMEM_CACHE(nfs4_delegation, 0); if (deleg_slab == NULL) goto out_free_stateid_slab; odstate_slab = KMEM_CACHE(nfs4_clnt_odstate, 0); if (odstate_slab == NULL) goto out_free_deleg_slab; return 0; out_free_deleg_slab: kmem_cache_destroy(deleg_slab); out_free_stateid_slab: kmem_cache_destroy(stateid_slab); out_free_file_slab: kmem_cache_destroy(file_slab); out_free_lockowner_slab: kmem_cache_destroy(lockowner_slab); out_free_openowner_slab: kmem_cache_destroy(openowner_slab); out_free_client_slab: kmem_cache_destroy(client_slab); out: return -ENOMEM; } static unsigned long nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { struct nfsd_net *nn = shrink->private_data; long count; count = atomic_read(&nn->nfsd_courtesy_clients); if (!count) count = atomic_long_read(&num_delegations); if (count) queue_work(laundry_wq, &nn->nfsd_shrinker_work); return (unsigned long)count; } static unsigned long nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) { return SHRINK_STOP; } void nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; u64 max_clients; nn->nfsd4_lease = 90; /* default lease time */ nn->nfsd4_grace = 90; nn->somebody_reclaimed = false; nn->track_reclaim_completes = false; nn->clverifier_counter = get_random_u32(); nn->clientid_base = get_random_u32(); nn->clientid_counter = nn->clientid_base + 1; nn->s2s_cp_cl_id = nn->clientid_counter++; atomic_set(&nn->nfs4_client_count, 0); si_meminfo(&si); max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024); max_clients *= NFS4_CLIENTS_PER_GB; nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); } enum rp_lock { RP_UNLOCKED, RP_LOCKED, RP_UNHASHED, }; static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; rp->rp_buflen = 0; rp->rp_buf = rp->rp_ibuf; rp->rp_locked = RP_UNLOCKED; } static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so) { if (!nfsd4_has_session(cstate)) { wait_var_event(&so->so_replay.rp_locked, cmpxchg(&so->so_replay.rp_locked, RP_UNLOCKED, RP_LOCKED) != RP_LOCKED); if (so->so_replay.rp_locked == RP_UNHASHED) return -EAGAIN; cstate->replay_owner = nfs4_get_stateowner(so); } return 0; } void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) { struct nfs4_stateowner *so = cstate->replay_owner; if (so != NULL) { cstate->replay_owner = NULL; store_release_wake_up(&so->so_replay.rp_locked, RP_UNLOCKED); nfs4_put_stateowner(so); } } static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp) { struct nfs4_stateowner *sop; sop = kmem_cache_alloc(slab, GFP_KERNEL); if (!sop) return NULL; xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL); if (!sop->so_owner.data) { kmem_cache_free(slab, sop); return NULL; } INIT_LIST_HEAD(&sop->so_stateids); sop->so_client = clp; init_nfs4_replay(&sop->so_replay); atomic_set(&sop->so_count, 1); return sop; } static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { lockdep_assert_held(&clp->cl_lock); list_add(&oo->oo_owner.so_strhash, &clp->cl_ownerstr_hashtbl[strhashval]); list_add(&oo->oo_perclient, &clp->cl_openowners); } static void nfs4_unhash_openowner(struct nfs4_stateowner *so) { unhash_openowner_locked(openowner(so)); } static void nfs4_free_openowner(struct nfs4_stateowner *so) { struct nfs4_openowner *oo = openowner(so); kmem_cache_free(openowner_slab, oo); } static const struct nfs4_stateowner_operations openowner_ops = { .so_unhash = nfs4_unhash_openowner, .so_free = nfs4_free_openowner, }; static struct nfs4_ol_stateid * nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_ol_stateid *local, *ret = NULL; struct nfs4_openowner *oo = open->op_openowner; lockdep_assert_held(&fp->fi_lock); list_for_each_entry(local, &fp->fi_stateids, st_perfile) { /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; if (local->st_stateowner != &oo->oo_owner) continue; if (local->st_stid.sc_type == SC_TYPE_OPEN && !local->st_stid.sc_status) { ret = local; refcount_inc(&ret->st_stid.sc_count); break; } } return ret; } static void nfsd4_drop_revoked_stid(struct nfs4_stid *s) __releases(&s->sc_client->cl_lock) { struct nfs4_client *cl = s->sc_client; LIST_HEAD(reaplist); struct nfs4_ol_stateid *stp; struct nfs4_delegation *dp; bool unhashed; switch (s->sc_type) { case SC_TYPE_OPEN: stp = openlockstateid(s); if (unhash_open_stateid(stp, &reaplist)) put_ol_stateid_locked(stp, &reaplist); spin_unlock(&cl->cl_lock); free_ol_stateid_reaplist(&reaplist); break; case SC_TYPE_LOCK: stp = openlockstateid(s); unhashed = unhash_lock_stateid(stp); spin_unlock(&cl->cl_lock); if (unhashed) nfs4_put_stid(s); break; case SC_TYPE_DELEG: dp = delegstateid(s); list_del_init(&dp->dl_recall_lru); spin_unlock(&cl->cl_lock); nfs4_put_stid(s); break; default: spin_unlock(&cl->cl_lock); } } static void nfsd40_drop_revoked_stid(struct nfs4_client *cl, stateid_t *stid) { /* NFSv4.0 has no way for the client to tell the server * that it can forget an admin-revoked stateid. * So we keep it around until the first time that the * client uses it, and drop it the first time * nfserr_admin_revoked is returned. * For v4.1 and later we wait until explicitly told * to free the stateid. */ if (cl->cl_minorversion == 0) { struct nfs4_stid *st; spin_lock(&cl->cl_lock); st = find_stateid_locked(cl, stid); if (st) nfsd4_drop_revoked_stid(st); else spin_unlock(&cl->cl_lock); } } static __be32 nfsd4_verify_open_stid(struct nfs4_stid *s) { __be32 ret = nfs_ok; if (s->sc_status & SC_STATUS_ADMIN_REVOKED) ret = nfserr_admin_revoked; else if (s->sc_status & SC_STATUS_REVOKED) ret = nfserr_deleg_revoked; else if (s->sc_status & SC_STATUS_CLOSED) ret = nfserr_bad_stateid; return ret; } /* Lock the stateid st_mutex, and deal with races with CLOSE */ static __be32 nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp) { __be32 ret; mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX); ret = nfsd4_verify_open_stid(&stp->st_stid); if (ret == nfserr_admin_revoked) nfsd40_drop_revoked_stid(stp->st_stid.sc_client, &stp->st_stid.sc_stateid); if (ret != nfs_ok) mutex_unlock(&stp->st_mutex); return ret; } static struct nfs4_ol_stateid * nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_ol_stateid *stp; for (;;) { spin_lock(&fp->fi_lock); stp = nfsd4_find_existing_open(fp, open); spin_unlock(&fp->fi_lock); if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok) break; nfs4_put_stid(&stp->st_stid); } return stp; } static struct nfs4_openowner * find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) { struct nfs4_client *clp = cstate->clp; struct nfs4_openowner *oo, *new = NULL; retry: spin_lock(&clp->cl_lock); oo = find_openstateowner_str(strhashval, open, clp); if (!oo && new) { hash_openowner(new, clp, strhashval); spin_unlock(&clp->cl_lock); return new; } spin_unlock(&clp->cl_lock); if (oo && !(oo->oo_flags & NFS4_OO_CONFIRMED)) { /* Replace unconfirmed owners without checking for replay. */ release_openowner(oo); oo = NULL; } if (oo) { if (new) nfs4_free_stateowner(&new->oo_owner); return oo; } new = alloc_stateowner(openowner_slab, &open->op_owner, clp); if (!new) return NULL; new->oo_owner.so_ops = &openowner_ops; new->oo_owner.so_is_open_owner = 1; new->oo_owner.so_seqid = open->op_seqid; new->oo_flags = 0; if (nfsd4_has_session(cstate)) new->oo_flags |= NFS4_OO_CONFIRMED; new->oo_time = 0; new->oo_last_closed_stid = NULL; INIT_LIST_HEAD(&new->oo_close_lru); goto retry; } static struct nfs4_ol_stateid * init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; struct nfs4_ol_stateid *retstp = NULL; struct nfs4_ol_stateid *stp; stp = open->op_stp; /* We are moving these outside of the spinlocks to avoid the warnings */ mutex_init(&stp->st_mutex); mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); retry: spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); if (nfs4_openowner_unhashed(oo)) { mutex_unlock(&stp->st_mutex); stp = NULL; goto out_unlock; } retstp = nfsd4_find_existing_open(fp, open); if (retstp) goto out_unlock; open->op_stp = NULL; refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = SC_TYPE_OPEN; INIT_LIST_HEAD(&stp->st_locks); stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); if (retstp) { /* Handle races with CLOSE */ if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { nfs4_put_stid(&retstp->st_stid); goto retry; } /* To keep mutex tracking happy */ mutex_unlock(&stp->st_mutex); stp = retstp; } return stp; } /* * In the 4.0 case we need to keep the owners around a little while to handle * CLOSE replay. We still do need to release any file access that is held by * them before returning however. */ static void move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) { struct nfs4_ol_stateid *last; struct nfs4_openowner *oo = openowner(s->st_stateowner); struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net, nfsd_net_id); dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); /* * We know that we hold one reference via nfsd4_close, and another * "persistent" reference for the client. If the refcount is higher * than 2, then there are still calls in progress that are using this * stateid. We can't put the sc_file reference until they are finished. * Wait for the refcount to drop to 2. Since it has been unhashed, * there should be no danger of the refcount going back up again at * this point. * Some threads with a reference might be waiting for rp_locked, * so tell them to stop waiting. */ store_release_wake_up(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED); wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2); release_all_access(s); if (s->st_stid.sc_file) { put_nfs4_file(s->st_stid.sc_file); s->st_stid.sc_file = NULL; } spin_lock(&nn->client_lock); last = oo->oo_last_closed_stid; oo->oo_last_closed_stid = s; list_move_tail(&oo->oo_close_lru, &nn->close_lru); oo->oo_time = ktime_get_boottime_seconds(); spin_unlock(&nn->client_lock); if (last) nfs4_put_stid(&last->st_stid); } static noinline_for_stack struct nfs4_file * nfsd4_file_hash_lookup(const struct svc_fh *fhp) { struct inode *inode = d_inode(fhp->fh_dentry); struct rhlist_head *tmp, *list; struct nfs4_file *fi; rcu_read_lock(); list = rhltable_lookup(&nfs4_file_rhltable, &inode, nfs4_file_rhash_params); rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (refcount_inc_not_zero(&fi->fi_ref)) { rcu_read_unlock(); return fi; } } } rcu_read_unlock(); return NULL; } /* * On hash insertion, identify entries with the same inode but * distinct filehandles. They will all be on the list returned * by rhltable_lookup(). * * inode->i_lock prevents racing insertions from adding an entry * for the same inode/fhp pair twice. */ static noinline_for_stack struct nfs4_file * nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) { struct inode *inode = d_inode(fhp->fh_dentry); struct rhlist_head *tmp, *list; struct nfs4_file *ret = NULL; bool alias_found = false; struct nfs4_file *fi; int err; rcu_read_lock(); spin_lock(&inode->i_lock); list = rhltable_lookup(&nfs4_file_rhltable, &inode, nfs4_file_rhash_params); rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (refcount_inc_not_zero(&fi->fi_ref)) ret = fi; } else fi->fi_aliased = alias_found = true; } if (ret) goto out_unlock; nfsd4_file_init(fhp, new); err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist, nfs4_file_rhash_params); if (err) goto out_unlock; new->fi_aliased = alias_found; ret = new; out_unlock: spin_unlock(&inode->i_lock); rcu_read_unlock(); return ret; } static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) { rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist, nfs4_file_rhash_params); } /* * Called to check deny when READ with all zero stateid or * WRITE with all zero or all one stateid */ static __be32 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) { struct nfs4_file *fp; __be32 ret = nfs_ok; fp = nfsd4_file_hash_lookup(current_fh); if (!fp) return ret; /* Check for conflicting share reservations */ spin_lock(&fp->fi_lock); if (fp->fi_share_deny & deny_type) ret = nfserr_locked; spin_unlock(&fp->fi_lock); put_nfs4_file(fp); return ret; } static bool nfsd4_deleg_present(const struct inode *inode) { struct file_lock_context *ctx = locks_inode_context(inode); return ctx && !list_empty_careful(&ctx->flc_lease); } /** * nfsd_wait_for_delegreturn - wait for delegations to be returned * @rqstp: the RPC transaction being executed * @inode: in-core inode of the file being waited for * * The timeout prevents deadlock if all nfsd threads happen to be * tied up waiting for returning delegations. * * Return values: * %true: delegation was returned * %false: timed out waiting for delegreturn */ bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode) { long __maybe_unused timeo; timeo = wait_var_event_timeout(inode, !nfsd4_deleg_present(inode), NFSD_DELEGRETURN_TIMEOUT); trace_nfsd_delegret_wakeup(rqstp, inode, timeo); return timeo > 0; } static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) { struct nfs4_delegation *dp = cb_to_delegation(cb); struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, nfsd_net_id); block_delegations(&dp->dl_stid.sc_file->fi_fhandle); /* * We can't do this in nfsd_break_deleg_cb because it is * already holding inode->i_lock. * * If the dl_time != 0, then we know that it has already been * queued for a lease break. Don't queue it again. */ spin_lock(&state_lock); if (delegation_hashed(dp) && dp->dl_time == 0) { dp->dl_time = ktime_get_boottime_seconds(); list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); } spin_unlock(&state_lock); } static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, struct rpc_task *task) { struct nfs4_delegation *dp = cb_to_delegation(cb); trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task); if (dp->dl_stid.sc_status) /* CLOSED or REVOKED */ return 1; switch (task->tk_status) { case 0: return 1; case -NFS4ERR_DELAY: rpc_delay(task, 2 * HZ); return 0; case -EBADHANDLE: case -NFS4ERR_BAD_STATEID: /* * Race: client probably got cb_recall before open reply * granting delegation. */ if (dp->dl_retries--) { rpc_delay(task, 2 * HZ); return 0; } fallthrough; default: return 1; } } static void nfsd4_cb_recall_release(struct nfsd4_callback *cb) { struct nfs4_delegation *dp = cb_to_delegation(cb); nfs4_put_stid(&dp->dl_stid); } static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = { .prepare = nfsd4_cb_recall_prepare, .done = nfsd4_cb_recall_done, .release = nfsd4_cb_recall_release, .opcode = OP_CB_RECALL, }; static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { bool queued; if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags)) return; /* * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the * flc_lock) we know the server hasn't removed the lease yet, and * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); queued = nfsd4_run_cb(&dp->dl_recall); WARN_ON_ONCE(!queued); if (!queued) refcount_dec(&dp->dl_stid.sc_count); } /* Called from break_lease() with flc_lock held. */ static bool nfsd_break_deleg_cb(struct file_lease *fl) { struct nfs4_delegation *dp = (struct nfs4_delegation *) fl->c.flc_owner; struct nfs4_file *fp = dp->dl_stid.sc_file; struct nfs4_client *clp = dp->dl_stid.sc_client; struct nfsd_net *nn; trace_nfsd_cb_recall(&dp->dl_stid); dp->dl_recalled = true; atomic_inc(&clp->cl_delegs_in_recall); if (try_to_expire_client(clp)) { nn = net_generic(clp->net, nfsd_net_id); mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); } /* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if a delegation isn't returned * in time: */ fl->fl_break_time = 0; fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); return false; } /** * nfsd_breaker_owns_lease - Check if lease conflict was resolved * @fl: Lock state to check * * Return values: * %true: Lease conflict was resolved * %false: Lease conflict was not resolved. */ static bool nfsd_breaker_owns_lease(struct file_lease *fl) { struct nfs4_delegation *dl = fl->c.flc_owner; struct svc_rqst *rqst; struct nfs4_client *clp; rqst = nfsd_current_rqst(); if (!nfsd_v4client(rqst)) return false; clp = *(rqst->rq_lease_breaker); return dl->dl_stid.sc_client == clp; } static int nfsd_change_deleg_cb(struct file_lease *onlist, int arg, struct list_head *dispose) { struct nfs4_delegation *dp = (struct nfs4_delegation *) onlist->c.flc_owner; struct nfs4_client *clp = dp->dl_stid.sc_client; if (arg & F_UNLCK) { if (dp->dl_recalled) atomic_dec(&clp->cl_delegs_in_recall); return lease_modify(onlist, arg, dispose); } else return -EAGAIN; } /** * nfsd4_deleg_lm_open_conflict - see if the given file points to an inode that has * an existing open that would conflict with the * desired lease. * @filp: file to check * @arg: type of lease that we're trying to acquire * * The kernel will call into this operation to determine whether there * are conflicting opens that may prevent the deleg from being granted. * For nfsd, that check is done at a higher level, so this trivially * returns 0. */ static int nfsd4_deleg_lm_open_conflict(struct file *filp, int arg) { return 0; } static const struct lease_manager_operations nfsd_lease_mng_ops = { .lm_breaker_owns_lease = nfsd_breaker_owns_lease, .lm_break = nfsd_break_deleg_cb, .lm_change = nfsd_change_deleg_cb, .lm_open_conflict = nfsd4_deleg_lm_open_conflict, }; static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid) { if (nfsd4_has_session(cstate)) return nfs_ok; if (seqid == so->so_seqid - 1) return nfserr_replay_me; if (seqid == so->so_seqid) return nfs_ok; return nfserr_bad_seqid; } static struct nfs4_client *lookup_clientid(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct nfs4_client *found; spin_lock(&nn->client_lock); found = find_confirmed_client(clid, sessions, nn); if (found) atomic_inc(&found->cl_rpc_users); spin_unlock(&nn->client_lock); return found; } static __be32 set_client(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn) { if (cstate->clp) { if (!same_clid(&cstate->clp->cl_clientid, clid)) return nfserr_stale_clientid; return nfs_ok; } if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; /* * We're in the 4.0 case (otherwise the SEQUENCE op would have * set cstate->clp), so session = false: */ cstate->clp = lookup_clientid(clid, false, nn); if (!cstate->clp) return nfserr_expired; return nfs_ok; } __be32 nfsd4_process_open1(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct nfsd_net *nn) { clientid_t *clientid = &open->op_clientid; struct nfs4_client *clp = NULL; unsigned int strhashval; struct nfs4_openowner *oo = NULL; __be32 status; /* * In case we need it later, after we've already created the * file and don't want to risk a further failure: */ open->op_file = nfsd4_alloc_file(); if (open->op_file == NULL) return nfserr_jukebox; status = set_client(clientid, cstate, nn); if (status) return status; clp = cstate->clp; strhashval = ownerstr_hashval(&open->op_owner); retry: oo = find_or_alloc_open_stateowner(strhashval, open, cstate); open->op_openowner = oo; if (!oo) return nfserr_jukebox; if (nfsd4_cstate_assign_replay(cstate, &oo->oo_owner) == -EAGAIN) { nfs4_put_stateowner(&oo->oo_owner); goto retry; } status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); if (status) return status; open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) return nfserr_jukebox; if (nfsd4_has_session(cstate) && (cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) { open->op_odstate = alloc_clnt_odstate(clp); if (!open->op_odstate) return nfserr_jukebox; } return nfs_ok; } static inline __be32 nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) { if (!(flags & RD_STATE) && deleg_is_read(dp->dl_type)) return nfserr_openmode; else return nfs_ok; } static int share_access_to_flags(u32 share_access) { return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; } static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s) { struct nfs4_stid *ret; ret = find_stateid_by_type(cl, s, SC_TYPE_DELEG, SC_STATUS_REVOKED); if (!ret) return NULL; return delegstateid(ret); } static bool nfsd4_is_deleg_cur(struct nfsd4_open *open) { return open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR || open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH; } static __be32 nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, struct nfs4_delegation **dp) { int flags; __be32 status = nfserr_bad_stateid; struct nfs4_delegation *deleg; deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); if (deleg == NULL) goto out; if (deleg->dl_stid.sc_status & SC_STATUS_ADMIN_REVOKED) { nfs4_put_stid(&deleg->dl_stid); status = nfserr_admin_revoked; goto out; } if (deleg->dl_stid.sc_status & SC_STATUS_REVOKED) { nfs4_put_stid(&deleg->dl_stid); nfsd40_drop_revoked_stid(cl, &open->op_delegate_stateid); status = nfserr_deleg_revoked; goto out; } flags = share_access_to_flags(open->op_share_access); status = nfs4_check_delegmode(deleg, flags); if (status) { nfs4_put_stid(&deleg->dl_stid); goto out; } *dp = deleg; out: if (!nfsd4_is_deleg_cur(open)) return nfs_ok; if (status) return status; open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; return nfs_ok; } static inline int nfs4_access_to_access(u32 nfs4_access) { int flags = 0; if (nfs4_access & NFS4_SHARE_ACCESS_READ) flags |= NFSD_MAY_READ; if (nfs4_access & NFS4_SHARE_ACCESS_WRITE) flags |= NFSD_MAY_WRITE; return flags; } static inline __be32 nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, struct nfsd4_open *open) { struct iattr iattr = { .ia_valid = ATTR_SIZE, .ia_size = 0, }; struct nfsd_attrs attrs = { .na_iattr = &iattr, }; if (!open->op_truncate) return 0; if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; return nfsd_setattr(rqstp, fh, &attrs, NULL); } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open, bool new_stp) { struct nfsd_file *nf = NULL; __be32 status; int oflag = nfs4_access_to_omode(open->op_share_access); int access = nfs4_access_to_access(open->op_share_access); unsigned char old_access_bmap, old_deny_bmap; spin_lock(&fp->fi_lock); /* * Are we trying to set a deny mode that would conflict with * current access? */ status = nfs4_file_check_deny(fp, open->op_share_deny); if (status != nfs_ok) { if (status != nfserr_share_denied) { spin_unlock(&fp->fi_lock); goto out; } if (nfs4_resolve_deny_conflicts_locked(fp, new_stp, stp, open->op_share_deny, false)) status = nfserr_jukebox; spin_unlock(&fp->fi_lock); goto out; } /* set access to the file */ status = nfs4_file_get_access(fp, open->op_share_access); if (status != nfs_ok) { if (status != nfserr_share_denied) { spin_unlock(&fp->fi_lock); goto out; } if (nfs4_resolve_deny_conflicts_locked(fp, new_stp, stp, open->op_share_access, true)) status = nfserr_jukebox; spin_unlock(&fp->fi_lock); goto out; } /* Set access bits in stateid */ old_access_bmap = stp->st_access_bmap; set_access(open->op_share_access, stp); /* Set new deny mask */ old_deny_bmap = stp->st_deny_bmap; set_deny(open->op_share_deny, stp); fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH); if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); status = nfsd_file_acquire_opened(rqstp, cur_fh, access, open->op_filp, &nf); if (status != nfs_ok) goto out_put_access; spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { fp->fi_fds[oflag] = nf; nf = NULL; } } spin_unlock(&fp->fi_lock); if (nf) nfsd_file_put(nf); status = nfserrno(nfsd_open_break_lease(cur_fh->fh_dentry->d_inode, access)); if (status) goto out_put_access; status = nfsd4_truncate(rqstp, cur_fh, open); if (status) goto out_put_access; out: return status; out_put_access: stp->st_access_bmap = old_access_bmap; nfs4_file_put_access(fp, open->op_share_access); reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp); goto out; } static __be32 nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { __be32 status; unsigned char old_deny_bmap = stp->st_deny_bmap; if (!test_access(open->op_share_access, stp)) return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open, false); /* test and set deny mode */ spin_lock(&fp->fi_lock); status = nfs4_file_check_deny(fp, open->op_share_deny); switch (status) { case nfs_ok: set_deny(open->op_share_deny, stp); fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH); break; case nfserr_share_denied: if (nfs4_resolve_deny_conflicts_locked(fp, false, stp, open->op_share_deny, false)) status = nfserr_jukebox; break; } spin_unlock(&fp->fi_lock); if (status != nfs_ok) return status; status = nfsd4_truncate(rqstp, cur_fh, open); if (status != nfs_ok) reset_union_bmap_deny(old_deny_bmap, stp); return status; } /* Should we give out recallable state?: */ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) { if (clp->cl_cb_state == NFSD4_CB_UP) return true; /* * In the sessions case, since we don't have to establish a * separate connection for callbacks, we assume it's OK * until we hear otherwise: */ return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp) { struct file_lease *fl; fl = locks_alloc_lease(); if (!fl) return NULL; fl->fl_lmops = &nfsd_lease_mng_ops; fl->c.flc_flags = FL_DELEG; fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK; fl->c.flc_owner = (fl_owner_t)dp; fl->c.flc_pid = current->tgid; fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file; return fl; } static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, struct nfs4_file *fp) { struct nfs4_ol_stateid *st; struct file *f = fp->fi_deleg_file->nf_file; struct inode *ino = file_inode(f); int writes; writes = atomic_read(&ino->i_writecount); if (!writes) return 0; /* * There could be multiple filehandles (hence multiple * nfs4_files) referencing this file, but that's not too * common; let's just give up in that case rather than * trying to go look up all the clients using that other * nfs4_file as well: */ if (fp->fi_aliased) return -EAGAIN; /* * If there's a close in progress, make sure that we see it * clear any fi_fds[] entries before we see it decrement * i_writecount: */ smp_mb__after_atomic(); if (fp->fi_fds[O_WRONLY]) writes--; if (fp->fi_fds[O_RDWR]) writes--; if (writes > 0) return -EAGAIN; /* There may be non-NFSv4 writers */ /* * It's possible there are non-NFSv4 write opens in progress, * but if they haven't incremented i_writecount yet then they * also haven't called break lease yet; so, they'll break this * lease soon enough. So, all that's left to check for is NFSv4 * opens: */ spin_lock(&fp->fi_lock); list_for_each_entry(st, &fp->fi_stateids, st_perfile) { if (st->st_openstp == NULL /* it's an open */ && access_permit_write(st) && st->st_stid.sc_client != clp) { spin_unlock(&fp->fi_lock); return -EAGAIN; } } spin_unlock(&fp->fi_lock); /* * There's a small chance that we could be racing with another * NFSv4 open. However, any open that hasn't added itself to * the fi_stateids list also hasn't called break_lease yet; so, * they'll break this lease soon enough. */ return 0; } /* * It's possible that between opening the dentry and setting the delegation, * that it has been renamed or unlinked. Redo the lookup to verify that this * hasn't happened. */ static int nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp, struct svc_fh *parent) { struct svc_export *exp; struct dentry *child; __be32 err; err = nfsd_lookup_dentry(open->op_rqstp, parent, open->op_fname, open->op_fnamelen, &exp, &child); if (err) return -EAGAIN; exp_put(exp); dput(child); if (child != file_dentry(fp->fi_deleg_file->nf_file)) return -EAGAIN; return 0; } /* * We avoid breaking delegations held by a client due to its own activity, but * clearing setuid/setgid bits on a write is an implicit activity and the client * may not notice and continue using the old mode. Avoid giving out a delegation * on setuid/setgid files when the client is requesting an open for write. */ static int nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf) { struct inode *inode = file_inode(nf->nf_file); if ((open->op_share_access & NFS4_SHARE_ACCESS_WRITE) && (inode->i_mode & (S_ISUID|S_ISGID))) return -EAGAIN; return 0; } #ifdef CONFIG_NFSD_V4_DELEG_TIMESTAMPS static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) { return open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS; } #else /* CONFIG_NFSD_V4_DELEG_TIMESTAMPS */ static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) { return false; } #endif /* CONFIG NFSD_V4_DELEG_TIMESTAMPS */ static struct nfs4_delegation * nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *parent) { bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct nfs4_file *fp = stp->st_stid.sc_file; struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; struct nfs4_delegation *dp; struct nfsd_file *nf = NULL; struct file_lease *fl; int status = 0; u32 dl_type; /* * The fi_had_conflict and nfs_get_existing_delegation checks * here are just optimizations; we'll need to recheck them at * the end: */ if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); /* * Try for a write delegation first. RFC8881 section 10.4 says: * * "An OPEN_DELEGATE_WRITE delegation allows the client to handle, * on its own, all opens." * * Furthermore, section 9.1.2 says: * * "In the case of READ, the server may perform the corresponding * check on the access mode, or it may choose to allow READ for * OPEN4_SHARE_ACCESS_WRITE, to accommodate clients whose WRITE * implementation may unavoidably do reads (e.g., due to buffer * cache constraints)." * * We choose to offer a write delegation for OPEN with the * OPEN4_SHARE_ACCESS_WRITE access mode to accommodate such clients. */ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { nf = find_writeable_file(fp); dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE; } /* * If the file is being opened O_RDONLY or we couldn't get a O_RDWR * file for some reason, then try for a read delegation instead. */ if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) { nf = find_readable_file(fp); dl_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ; } if (!nf) return ERR_PTR(-EAGAIN); /* * File delegations and associated locks cannot be recovered if the * export is from an NFS proxy server. */ if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { nfsd_file_put(nf); return ERR_PTR(-EOPNOTSUPP); } spin_lock(&state_lock); spin_lock(&fp->fi_lock); if (nfs4_delegation_exists(clp, fp)) status = -EAGAIN; else if (nfsd4_verify_setuid_write(open, nf)) status = -EAGAIN; else if (!fp->fi_deleg_file) { fp->fi_deleg_file = nf; /* increment early to prevent fi_deleg_file from being * cleared */ fp->fi_delegees = 1; nf = NULL; } else fp->fi_delegees++; spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); if (nf) nfsd_file_put(nf); if (status) return ERR_PTR(status); status = -ENOMEM; dp = alloc_init_deleg(clp, fp, odstate, dl_type); if (!dp) goto out_delegees; fl = nfs4_alloc_init_lease(dp); if (!fl) goto out_clnt_odstate; status = kernel_setlease(fp->fi_deleg_file->nf_file, fl->c.flc_type, &fl, NULL); if (fl) locks_free_lease(fl); if (status) goto out_clnt_odstate; if (parent) { status = nfsd4_verify_deleg_dentry(open, fp, parent); if (status) goto out_unlock; } status = nfsd4_check_conflicting_opens(clp, fp); if (status) goto out_unlock; /* * Now that the deleg is set, check again to ensure that nothing * raced in and changed the mode while we weren't looking. */ status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file); if (status) goto out_unlock; status = -EAGAIN; if (fp->fi_had_conflict) goto out_unlock; spin_lock(&state_lock); spin_lock(&clp->cl_lock); spin_lock(&fp->fi_lock); status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&clp->cl_lock); spin_unlock(&state_lock); if (status) goto out_unlock; return dp; out_unlock: kernel_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp); out_clnt_odstate: put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_stid(&dp->dl_stid); out_delegees: put_deleg_file(fp); return ERR_PTR(status); } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) { open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; if (status == -EAGAIN) open->op_why_no_deleg = WND4_CONTENTION; else { open->op_why_no_deleg = WND4_RESOURCE; switch (open->op_deleg_want) { case OPEN4_SHARE_ACCESS_WANT_READ_DELEG: case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG: case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG: break; case OPEN4_SHARE_ACCESS_WANT_CANCEL: open->op_why_no_deleg = WND4_CANCELLED; break; case OPEN4_SHARE_ACCESS_WANT_NO_DELEG: WARN_ON_ONCE(1); } } } static bool nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, struct kstat *stat) { struct nfsd_file *nf = find_writeable_file(dp->dl_stid.sc_file); struct path path; int rc; if (!nf) return false; path.mnt = currentfh->fh_export->ex_path.mnt; path.dentry = file_dentry(nf->nf_file); rc = vfs_getattr(&path, stat, STATX_MODE | STATX_SIZE | STATX_ATIME | STATX_MTIME | STATX_CTIME | STATX_CHANGE_COOKIE, AT_STATX_SYNC_AS_STAT); nfsd_file_put(nf); return rc == 0; } /* * Add NFS4_SHARE_ACCESS_READ to the write delegation granted on OPEN * with NFS4_SHARE_ACCESS_WRITE by allocating separate nfsd_file and * struct file to be used for read with delegation stateid. * */ static bool nfsd4_add_rdaccess_to_wrdeleg(struct svc_rqst *rqstp, struct nfsd4_open *open, struct svc_fh *fh, struct nfs4_ol_stateid *stp) { struct nfs4_file *fp; struct nfsd_file *nf = NULL; if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_WRITE) { if (nfsd_file_acquire_opened(rqstp, fh, NFSD_MAY_READ, NULL, &nf)) return (false); fp = stp->st_stid.sc_file; spin_lock(&fp->fi_lock); __nfs4_file_get_access(fp, NFS4_SHARE_ACCESS_READ); if (!fp->fi_fds[O_RDONLY]) { fp->fi_fds[O_RDONLY] = nf; nf = NULL; } fp->fi_rdeleg_file = nfsd_file_get(fp->fi_fds[O_RDONLY]); spin_unlock(&fp->fi_lock); if (nf) nfsd_file_put(nf); } return true; } /* * The Linux NFS server does not offer write delegations to NFSv4.0 * clients in order to avoid conflicts between write delegations and * GETATTRs requesting CHANGE or SIZE attributes. * * With NFSv4.1 and later minorversions, the SEQUENCE operation that * begins each COMPOUND contains a client ID. Delegation recall can * be avoided when the server recognizes the client sending a * GETATTR also holds write delegation it conflicts with. * * However, the NFSv4.0 protocol does not enable a server to * determine that a GETATTR originated from the client holding the * conflicting delegation versus coming from some other client. Per * RFC 7530 Section 16.7.5, the server must recall or send a * CB_GETATTR even when the GETATTR originates from the client that * holds the conflicting delegation. * * An NFSv4.0 client can trigger a pathological situation if it * always sends a DELEGRETURN preceded by a conflicting GETATTR in * the same COMPOUND. COMPOUND execution will always stop at the * GETATTR and the DELEGRETURN will never get executed. The server * eventually revokes the delegation, which can result in loss of * open or lock state. */ static void nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *currentfh, struct svc_fh *fh) { struct nfs4_openowner *oo = openowner(stp->st_stateowner); bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct svc_fh *parent = NULL; struct nfs4_delegation *dp; struct kstat stat; int status = 0; int cb_up; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); open->op_recall = false; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_PREVIOUS: if (!cb_up) open->op_recall = true; break; case NFS4_OPEN_CLAIM_NULL: parent = currentfh; fallthrough; case NFS4_OPEN_CLAIM_FH: /* * Let's not give out any delegations till everyone's * had the chance to reclaim theirs, *and* until * NLM locks have all been reclaimed: */ if (locks_in_grace(clp->net)) goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out_no_deleg; if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE && !clp->cl_minorversion) goto out_no_deleg; break; default: goto out_no_deleg; } dp = nfs4_set_delegation(open, stp, parent); if (IS_ERR(dp)) goto out_no_deleg; memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { struct file *f = dp->dl_stid.sc_file->fi_deleg_file->nf_file; if (!nfsd4_add_rdaccess_to_wrdeleg(rqstp, open, fh, stp) || !nfs4_delegation_stat(dp, currentfh, &stat)) { nfs4_put_stid(&dp->dl_stid); destroy_delegation(dp); goto out_no_deleg; } open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE; dp->dl_cb_fattr.ncf_cur_fsize = stat.size; dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat); dp->dl_atime = stat.atime; dp->dl_ctime = stat.ctime; dp->dl_mtime = stat.mtime; spin_lock(&f->f_lock); if (deleg_ts) f->f_mode |= FMODE_NOCMTIME; spin_unlock(&f->f_lock); trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid); } else { open->op_delegate_type = deleg_ts && nfs4_delegation_stat(dp, currentfh, &stat) ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ; dp->dl_atime = stat.atime; trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid); } nfs4_put_stid(&dp->dl_stid); return; out_no_deleg: open->op_delegate_type = OPEN_DELEGATE_NONE; /* 4.1 client asking for a delegation? */ if (open->op_deleg_want) nfsd4_open_deleg_none_ext(open, status); return; } static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, struct nfs4_delegation *dp) { if (deleg_is_write(dp->dl_type)) { if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) { open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; } else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) { open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; } } /* Otherwise the client must be confused wanting a delegation * it already has, therefore we don't return * OPEN_DELEGATE_NONE_EXT and reason. */ } /* Are we returning only a delegation stateid? */ static bool open_xor_delegation(struct nfsd4_open *open) { if (!(open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION)) return false; /* Did we actually get a delegation? */ if (!deleg_is_read(open->op_delegate_type) && !deleg_is_write(open->op_delegate_type)) return false; return true; } /** * nfsd4_process_open2 - finish open processing * @rqstp: the RPC transaction being executed * @current_fh: NFSv4 COMPOUND's current filehandle * @open: OPEN arguments * * If successful, (1) truncate the file if open->op_truncate was * set, (2) set open->op_stateid, (3) set open->op_delegation. * * Returns %nfs_ok on success; otherwise an nfs4stat value in * network byte order is returned. */ __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; bool new_stp = false; /* * Lookup file; if found, lookup stateid and check open request, * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ fp = nfsd4_file_hash_insert(open->op_file, current_fh); if (unlikely(!fp)) return nfserr_jukebox; if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; if (dp && nfsd4_is_deleg_cur(open) && (dp->dl_stid.sc_file != fp)) { /* * RFC8881 section 8.2.4 mandates the server to return * NFS4ERR_BAD_STATEID if the selected table entry does * not match the current filehandle. However returning * NFS4ERR_BAD_STATEID in the OPEN can cause the client * to repeatedly retry the operation with the same * stateid, since the stateid itself is valid. To avoid * this situation NFSD returns NFS4ERR_INVAL instead. */ status = nfserr_inval; goto out; } stp = nfsd4_find_and_lock_existing_open(fp, open); } else { open->op_file = NULL; status = nfserr_bad_stateid; if (nfsd4_is_deleg_cur(open)) goto out; } if (!stp) { stp = init_open_stateid(fp, open); if (!stp) { status = nfserr_jukebox; goto out; } if (!open->op_stp) new_stp = true; } /* * OPEN the file, or upgrade an existing OPEN. * If truncate fails, the OPEN fails. * * stp is already locked. */ if (!new_stp) { /* Stateid was found, this is an OPEN upgrade */ status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) { mutex_unlock(&stp->st_mutex); goto out; } } else { status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true); if (status) { release_open_stateid(stp); mutex_unlock(&stp->st_mutex); goto out; } stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp, open->op_odstate); if (stp->st_clnt_odstate == open->op_odstate) open->op_odstate = NULL; } nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_NO_DELEG) { open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_WANTED; goto nodeleg; } } /* * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ nfs4_open_delegation(rqstp, open, stp, &resp->cstate.current_fh, current_fh); /* * If there is an existing open stateid, it must be updated and * returned. Only respect WANT_OPEN_XOR_DELEGATION when a new * open stateid would have to be created. */ if (new_stp && open_xor_delegation(open)) { memcpy(&open->op_stateid, &zero_stateid, sizeof(open->op_stateid)); open->op_rflags |= OPEN4_RESULT_NO_OPEN_STATEID; release_open_stateid(stp); } nodeleg: status = nfs_ok; trace_nfsd_open(&stp->st_stid.sc_stateid); out: /* 4.1 client trying to upgrade/downgrade delegation? */ if (open->op_delegate_type == OPEN_DELEGATE_NONE && dp && open->op_deleg_want) nfsd4_deleg_xgrade_none_ext(open, dp); if (fp) put_nfs4_file(fp); if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; /* * To finish the open response, we just need to set the rflags. */ open->op_rflags |= NFS4_OPEN_RESULT_LOCKTYPE_POSIX; if (nfsd4_has_session(&resp->cstate)) open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK; else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED)) open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; if (dp) nfs4_put_stid(&dp->dl_stid); if (stp) nfs4_put_stid(&stp->st_stid); return status; } void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { if (open->op_openowner) nfs4_put_stateowner(&open->op_openowner->oo_owner); if (open->op_file) kmem_cache_free(file_slab, open->op_file); if (open->op_stp) nfs4_put_stid(&open->op_stp->st_stid); if (open->op_odstate) kmem_cache_free(odstate_slab, open->op_odstate); } __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { clientid_t *clid = &u->renew; struct nfs4_client *clp; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); trace_nfsd_clid_renew(clid); status = set_client(clid, cstate, nn); if (status) return status; clp = cstate->clp; if (!list_empty(&clp->cl_delegations) && clp->cl_cb_state != NFSD4_CB_UP) return nfserr_cb_path_down; return nfs_ok; } static void nfsd4_end_grace(struct nfsd_net *nn) { /* do nothing if grace period already ended */ if (nn->grace_ended) return; trace_nfsd_grace_complete(nn); nn->grace_ended = true; /* * If the server goes down again right now, an NFSv4 * client will still be allowed to reclaim after it comes back up, * even if it hasn't yet had a chance to reclaim state this time. * */ nfsd4_record_grace_done(nn); /* * At this point, NFSv4 clients can still reclaim. But if the * server crashes, any that have not yet reclaimed will be out * of luck on the next boot. * * (NFSv4.1+ clients are considered to have reclaimed once they * call RECLAIM_COMPLETE. NFSv4.0 clients are considered to * have reclaimed after their first OPEN.) */ locks_end_grace(&nn->nfsd4_manager); /* * At this point, and once lockd and/or any other containers * exit their grace period, further reclaims will fail and * regular locking can resume. */ } /** * nfsd4_force_end_grace - forcibly end the NFSv4 grace period * @nn: network namespace for the server instance to be updated * * Forces bypass of normal grace period completion, then schedules * the laundromat to end the grace period immediately. Does not wait * for the grace period to fully terminate before returning. * * Return values: * %true: Grace termination schedule * %false: No action was taken */ bool nfsd4_force_end_grace(struct nfsd_net *nn) { if (!nn->client_tracking_ops) return false; if (READ_ONCE(nn->grace_ended)) return false; /* laundromat_work must be initialised now, though it might be disabled */ WRITE_ONCE(nn->grace_end_forced, true); /* mod_delayed_work() doesn't queue work after * nfs4_state_shutdown_net() has called disable_delayed_work_sync() */ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); return true; } /* * If we've waited a lease period but there are still clients trying to * reclaim, wait a little longer to give them a chance to finish. */ static bool clients_still_reclaiming(struct nfsd_net *nn) { time64_t double_grace_period_end = nn->boot_time + 2 * nn->nfsd4_lease; if (READ_ONCE(nn->grace_end_forced)) return false; if (nn->track_reclaim_completes && atomic_read(&nn->nr_reclaim_complete) == nn->reclaim_str_hashtbl_size) return false; if (!nn->somebody_reclaimed) return false; nn->somebody_reclaimed = false; /* * If we've given them *two* lease times to reclaim, and they're * still not done, give up: */ if (ktime_get_boottime_seconds() > double_grace_period_end) return false; return true; } struct laundry_time { time64_t cutoff; time64_t new_timeo; }; static bool state_expired(struct laundry_time *lt, time64_t last_refresh) { time64_t time_remaining; if (last_refresh < lt->cutoff) return true; time_remaining = last_refresh - lt->cutoff; lt->new_timeo = min(lt->new_timeo, time_remaining); return false; } #ifdef CONFIG_NFSD_V4_2_INTER_SSC void nfsd4_ssc_init_umount_work(struct nfsd_net *nn) { spin_lock_init(&nn->nfsd_ssc_lock); INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list); init_waitqueue_head(&nn->nfsd_ssc_waitq); } /* * This is called when nfsd is being shutdown, after all inter_ssc * cleanup were done, to destroy the ssc delayed unmount list. */ static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn) { struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *tmp; spin_lock(&nn->nfsd_ssc_lock); list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { list_del(&ni->nsui_list); spin_unlock(&nn->nfsd_ssc_lock); mntput(ni->nsui_vfsmount); kfree(ni); spin_lock(&nn->nfsd_ssc_lock); } spin_unlock(&nn->nfsd_ssc_lock); } static void nfsd4_ssc_expire_umount(struct nfsd_net *nn) { bool do_wakeup = false; struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *tmp; spin_lock(&nn->nfsd_ssc_lock); list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { if (time_after(jiffies, ni->nsui_expire)) { if (refcount_read(&ni->nsui_refcnt) > 1) continue; /* mark being unmount */ ni->nsui_busy = true; spin_unlock(&nn->nfsd_ssc_lock); mntput(ni->nsui_vfsmount); spin_lock(&nn->nfsd_ssc_lock); /* waiters need to start from begin of list */ list_del(&ni->nsui_list); kfree(ni); /* wakeup ssc_connect waiters */ do_wakeup = true; continue; } break; } if (do_wakeup) wake_up_all(&nn->nfsd_ssc_waitq); spin_unlock(&nn->nfsd_ssc_lock); } #endif /* Check if any lock belonging to this lockowner has any blockers */ static bool nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo) { struct file_lock_context *ctx; struct nfs4_ol_stateid *stp; struct nfs4_file *nf; list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { nf = stp->st_stid.sc_file; ctx = locks_inode_context(nf->fi_inode); if (!ctx) continue; if (locks_owner_has_blockers(ctx, lo)) return true; } return false; } static bool nfs4_anylock_blockers(struct nfs4_client *clp) { int i; struct nfs4_stateowner *so; struct nfs4_lockowner *lo; if (atomic_read(&clp->cl_delegs_in_recall)) return true; spin_lock(&clp->cl_lock); for (i = 0; i < OWNER_HASH_SIZE; i++) { list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i], so_strhash) { if (so->so_is_open_owner) continue; lo = lockowner(so); if (nfs4_lockowner_has_blockers(lo)) { spin_unlock(&clp->cl_lock); return true; } } } spin_unlock(&clp->cl_lock); return false; } static void nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, struct laundry_time *lt) { unsigned int maxreap, reapcnt = 0; struct list_head *pos, *next; struct nfs4_client *clp; maxreap = (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) ? NFSD_CLIENT_MAX_TRIM_PER_RUN : 0; INIT_LIST_HEAD(reaplist); spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (clp->cl_state == NFSD4_EXPIRABLE) goto exp_client; if (!state_expired(lt, clp->cl_time)) break; if (!atomic_read(&clp->cl_rpc_users)) { if (clp->cl_state == NFSD4_ACTIVE) atomic_inc(&nn->nfsd_courtesy_clients); clp->cl_state = NFSD4_COURTESY; } if (!client_has_state(clp)) goto exp_client; if (!nfs4_anylock_blockers(clp)) if (reapcnt >= maxreap) continue; exp_client: if (!mark_client_expired_locked(clp)) { list_add(&clp->cl_lru, reaplist); reapcnt++; } } spin_unlock(&nn->client_lock); } static void nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist) { unsigned int maxreap = 0, reapcnt = 0; struct list_head *pos, *next; struct nfs4_client *clp; maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN; INIT_LIST_HEAD(reaplist); spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (clp->cl_state == NFSD4_ACTIVE) break; if (reapcnt >= maxreap) break; if (!mark_client_expired_locked(clp)) { list_add(&clp->cl_lru, reaplist); reapcnt++; } } spin_unlock(&nn->client_lock); } static void nfs4_process_client_reaplist(struct list_head *reaplist) { struct list_head *pos, *next; struct nfs4_client *clp; list_for_each_safe(pos, next, reaplist) { clp = list_entry(pos, struct nfs4_client, cl_lru); trace_nfsd_clid_purged(&clp->cl_clientid); list_del_init(&clp->cl_lru); expire_client(clp); } } static void nfs40_clean_admin_revoked(struct nfsd_net *nn, struct laundry_time *lt) { struct nfs4_client *clp; spin_lock(&nn->client_lock); if (nn->nfs40_last_revoke == 0 || nn->nfs40_last_revoke > lt->cutoff) { spin_unlock(&nn->client_lock); return; } nn->nfs40_last_revoke = 0; retry: list_for_each_entry(clp, &nn->client_lru, cl_lru) { unsigned long id, tmp; struct nfs4_stid *stid; if (atomic_read(&clp->cl_admin_revoked) == 0) continue; spin_lock(&clp->cl_lock); idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id) if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) { refcount_inc(&stid->sc_count); spin_unlock(&nn->client_lock); /* this function drops ->cl_lock */ nfsd4_drop_revoked_stid(stid); nfs4_put_stid(stid); spin_lock(&nn->client_lock); goto retry; } spin_unlock(&clp->cl_lock); } spin_unlock(&nn->client_lock); } static time64_t nfs4_laundromat(struct nfsd_net *nn) { struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct nfs4_ol_stateid *stp; struct nfsd4_blocked_lock *nbl; struct list_head *pos, *next, reaplist; struct laundry_time lt = { .cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease, .new_timeo = nn->nfsd4_lease }; struct nfs4_cpntf_state *cps; copy_stateid_t *cps_t; int i; if (clients_still_reclaiming(nn)) { lt.new_timeo = 0; goto out; } nfsd4_end_grace(nn); spin_lock(&nn->s2s_cp_lock); idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID && state_expired(&lt, cps->cpntf_time)) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); nfsd4_async_copy_reaper(nn); nfs4_get_client_reaplist(nn, &reaplist, &lt); nfs4_process_client_reaplist(&reaplist); nfs40_clean_admin_revoked(nn, &lt); spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); if (!state_expired(&lt, dp->dl_time)) break; refcount_inc(&dp->dl_stid.sc_count); unhash_delegation_locked(dp, SC_STATUS_REVOKED); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_first_entry(&reaplist, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); revoke_delegation(dp); } spin_lock(&nn->client_lock); while (!list_empty(&nn->close_lru)) { oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, oo_close_lru); if (!state_expired(&lt, oo->oo_time)) break; list_del_init(&oo->oo_close_lru); stp = oo->oo_last_closed_stid; oo->oo_last_closed_stid = NULL; spin_unlock(&nn->client_lock); nfs4_put_stid(&stp->st_stid); spin_lock(&nn->client_lock); } spin_unlock(&nn->client_lock); /* * It's possible for a client to try and acquire an already held lock * that is being held for a long time, and then lose interest in it. * So, we clean out any un-revisited request after a lease period * under the assumption that the client is no longer interested. * * RFC5661, sec. 9.6 states that the client must not rely on getting * notifications and must continue to poll for locks, even when the * server supports them. Thus this shouldn't lead to clients blocking * indefinitely once the lock does become free. */ BUG_ON(!list_empty(&reaplist)); spin_lock(&nn->blocked_locks_lock); while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); if (!state_expired(&lt, nbl->nbl_time)) break; list_move(&nbl->nbl_lru, &reaplist); list_del_init(&nbl->nbl_list); } spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru); list_del_init(&nbl->nbl_lru); free_blocked_lock(nbl); } #ifdef CONFIG_NFSD_V4_2_INTER_SSC /* service the server-to-server copy delayed unmount list */ nfsd4_ssc_expire_umount(nn); #endif if (atomic_long_read(&num_delegations) >= max_delegations) deleg_reaper(nn); out: return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); } static void laundromat_main(struct work_struct *); static void laundromat_main(struct work_struct *laundry) { time64_t t; struct delayed_work *dwork = to_delayed_work(laundry); struct nfsd_net *nn = container_of(dwork, struct nfsd_net, laundromat_work); t = nfs4_laundromat(nn); queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } static void courtesy_client_reaper(struct nfsd_net *nn) { struct list_head reaplist; nfs4_get_courtesy_client_reaplist(nn, &reaplist); nfs4_process_client_reaplist(&reaplist); } static void deleg_reaper(struct nfsd_net *nn) { struct list_head *pos, *next; struct nfs4_client *clp; spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (clp->cl_state != NFSD4_ACTIVE) continue; if (list_empty(&clp->cl_delegations)) continue; if (atomic_read(&clp->cl_delegs_in_recall)) continue; if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &clp->cl_ra->ra_cb.cb_flags)) continue; if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5) continue; if (clp->cl_cb_state != NFSD4_CB_UP) continue; /* release in nfsd4_cb_recall_any_release */ kref_get(&clp->cl_nfsdfs.cl_ref); clp->cl_ra_time = ktime_get_boottime_seconds(); clp->cl_ra->ra_keep = 0; clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) | BIT(RCA4_TYPE_MASK_WDATA_DLG); trace_nfsd_cb_recall_any(clp->cl_ra); nfsd4_run_cb(&clp->cl_ra->ra_cb); } spin_unlock(&nn->client_lock); } static void nfsd4_state_shrinker_worker(struct work_struct *work) { struct nfsd_net *nn = container_of(work, struct nfsd_net, nfsd_shrinker_work); courtesy_client_reaper(nn); deleg_reaper(nn); } static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) return nfserr_bad_stateid; return nfs_ok; } static __be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags) { __be32 status = nfserr_openmode; /* For lock stateid's, we test the parent open, not the lock: */ if (stp->st_openstp) stp = stp->st_openstp; if ((flags & WR_STATE) && !access_permit_write(stp)) goto out; if ((flags & RD_STATE) && !access_permit_read(stp)) goto out; status = nfs_ok; out: return status; } static inline __be32 check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, int flags) { if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; else if (opens_in_grace(net)) { /* Answer in remaining cases depends on existence of * conflicting state; so we must wait out the grace period. */ return nfserr_grace; } else if (flags & WR_STATE) return nfs4_share_conflict(current_fh, NFS4_SHARE_DENY_WRITE); else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */ return nfs4_share_conflict(current_fh, NFS4_SHARE_DENY_READ); } static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) { /* * When sessions are used the stateid generation number is ignored * when it is zero. */ if (has_session && in->si_generation == 0) return nfs_ok; if (in->si_generation == ref->si_generation) return nfs_ok; /* If the client sends us a stateid from the future, it's buggy: */ if (nfsd4_stateid_generation_after(in, ref)) return nfserr_bad_stateid; /* * However, we could see a stateid from the past, even from a * non-buggy client. For example, if the client sends a lock * while some IO is outstanding, the lock may bump si_generation * while the IO is still in flight. The client could avoid that * situation by waiting for responses on all the IO requests, * but better performance may result in retrying IO that * receives an old_stateid error if requests are rarely * reordered in flight: */ return nfserr_old_stateid; } static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_stid *s, bool has_session) { __be32 ret; spin_lock(&s->sc_lock); ret = nfsd4_verify_open_stid(s); if (ret == nfs_ok) ret = check_stateid_generation(in, &s->sc_stateid, has_session); spin_unlock(&s->sc_lock); if (ret == nfserr_admin_revoked) nfsd40_drop_revoked_stid(s->sc_client, &s->sc_stateid); return ret; } static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols) { if (ols->st_stateowner->so_is_open_owner && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) return nfserr_bad_stateid; return nfs_ok; } static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) { struct nfs4_stid *s; __be32 status = nfserr_bad_stateid; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return status; spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, stateid); if (!s) goto out_unlock; status = nfsd4_stid_check_stateid_generation(stateid, s, 1); if (status) goto out_unlock; status = nfsd4_verify_open_stid(s); if (status) goto out_unlock; switch (s->sc_type) { case SC_TYPE_DELEG: status = nfs_ok; break; case SC_TYPE_OPEN: case SC_TYPE_LOCK: status = nfsd4_check_openowner_confirmed(openlockstateid(s)); break; default: printk("unknown stateid type %x\n", s->sc_type); status = nfserr_bad_stateid; } out_unlock: spin_unlock(&cl->cl_lock); if (status == nfserr_admin_revoked) nfsd40_drop_revoked_stid(cl, stateid); return status; } __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned short typemask, unsigned short statusmask, struct nfs4_stid **s, struct nfsd_net *nn) { __be32 status; struct nfs4_stid *stid; bool return_revoked = false; /* * only return revoked delegations if explicitly asked. * otherwise we report revoked or bad_stateid status. */ if (statusmask & SC_STATUS_REVOKED) return_revoked = true; if (typemask & SC_TYPE_DELEG) /* Always allow REVOKED for DELEG so we can * return the appropriate error. */ statusmask |= SC_STATUS_REVOKED; statusmask |= SC_STATUS_ADMIN_REVOKED | SC_STATUS_FREEABLE; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return nfserr_bad_stateid; status = set_client(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { if (cstate->session) return nfserr_bad_stateid; return nfserr_stale_stateid; } if (status) return status; stid = find_stateid_by_type(cstate->clp, stateid, typemask, statusmask); if (!stid) return nfserr_bad_stateid; if ((stid->sc_status & SC_STATUS_REVOKED) && !return_revoked) { nfs4_put_stid(stid); return nfserr_deleg_revoked; } if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) { nfsd40_drop_revoked_stid(cstate->clp, stateid); nfs4_put_stid(stid); return nfserr_admin_revoked; } *s = stid; return nfs_ok; } static struct nfsd_file * nfs4_find_file(struct nfs4_stid *s, int flags) { struct nfsd_file *ret = NULL; if (!s || s->sc_status) return NULL; switch (s->sc_type) { case SC_TYPE_DELEG: case SC_TYPE_OPEN: case SC_TYPE_LOCK: if (flags & RD_STATE) ret = find_readable_file(s->sc_file); else ret = find_writeable_file(s->sc_file); } return ret; } static __be32 nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags) { __be32 status; status = nfsd4_check_openowner_confirmed(ols); if (status) return status; return nfs4_check_openmode(ols, flags); } static __be32 nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, struct nfsd_file **nfp, int flags) { int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE; struct nfsd_file *nf; __be32 status; nf = nfs4_find_file(s, flags); if (nf) { status = nfsd_permission(&rqstp->rq_cred, fhp->fh_export, fhp->fh_dentry, acc | NFSD_MAY_OWNER_OVERRIDE); if (status) { nfsd_file_put(nf); goto out; } } else { status = nfsd_file_acquire(rqstp, fhp, acc, &nf); if (status) return status; } *nfp = nf; out: return status; } static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) { WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID); if (!refcount_dec_and_test(&cps->cp_stateid.cs_count)) return; list_del(&cps->cp_list); idr_remove(&nn->s2s_cp_stateids, cps->cp_stateid.cs_stid.si_opaque.so_id); kfree(cps); } /* * A READ from an inter server to server COPY will have a * copy stateid. Look up the copy notify stateid from the * idr structure and take a reference on it. */ __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, struct nfs4_client *clp, struct nfs4_cpntf_state **cps) { copy_stateid_t *cps_t; struct nfs4_cpntf_state *state = NULL; if (st->si_opaque.so_clid.cl_id != nn->s2s_cp_cl_id) return nfserr_bad_stateid; spin_lock(&nn->s2s_cp_lock); cps_t = idr_find(&nn->s2s_cp_stateids, st->si_opaque.so_id); if (cps_t) { state = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); if (state->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID) { state = NULL; goto unlock; } if (!clp) refcount_inc(&state->cp_stateid.cs_count); else _free_cpntf_state_locked(nn, state); } unlock: spin_unlock(&nn->s2s_cp_lock); if (!state) return nfserr_bad_stateid; if (!clp) *cps = state; return 0; } static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, struct nfs4_stid **stid) { __be32 status; struct nfs4_cpntf_state *cps = NULL; struct nfs4_client *found; status = manage_cpntf_state(nn, st, NULL, &cps); if (status) return status; cps->cpntf_time = ktime_get_boottime_seconds(); status = nfserr_expired; found = lookup_clientid(&cps->cp_p_clid, true, nn); if (!found) goto out; *stid = find_stateid_by_type(found, &cps->cp_p_stateid, SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK, 0); if (*stid) status = nfs_ok; else status = nfserr_bad_stateid; put_client_renew(found); out: nfs4_put_cpntf_state(nn, cps); return status; } void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) { spin_lock(&nn->s2s_cp_lock); _free_cpntf_state_locked(nn, cps); spin_unlock(&nn->s2s_cp_lock); } /** * nfs4_preprocess_stateid_op - find and prep stateid for an operation * @rqstp: incoming request from client * @cstate: current compound state * @fhp: filehandle associated with requested stateid * @stateid: stateid (provided by client) * @flags: flags describing type of operation to be done * @nfp: optional nfsd_file return pointer (may be NULL) * @cstid: optional returned nfs4_stid pointer (may be NULL) * * Given info from the client, look up a nfs4_stid for the operation. On * success, it returns a reference to the nfs4_stid and/or the nfsd_file * associated with it. */ __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp, stateid_t *stateid, int flags, struct nfsd_file **nfp, struct nfs4_stid **cstid) { struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfs4_stid *s = NULL; __be32 status; if (nfp) *nfp = NULL; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { status = check_special_stateids(net, fhp, stateid, flags); goto done; } status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK, 0, &s, nn); if (status == nfserr_bad_stateid) status = find_cpntf_state(nn, stateid, &s); if (status) return status; status = nfsd4_stid_check_stateid_generation(stateid, s, nfsd4_has_session(cstate)); if (status) goto out; switch (s->sc_type) { case SC_TYPE_DELEG: status = nfs4_check_delegmode(delegstateid(s), flags); break; case SC_TYPE_OPEN: case SC_TYPE_LOCK: status = nfs4_check_olstateid(openlockstateid(s), flags); break; } if (status) goto out; status = nfs4_check_fh(fhp, s); done: if (status == nfs_ok && nfp) status = nfs4_check_file(rqstp, fhp, s, nfp, flags); out: if (s) { if (!status && cstid) *cstid = s; else nfs4_put_stid(s); } return status; } /* * Test if the stateid is valid */ __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct nfsd4_test_stateid_id *stateid; struct nfs4_client *cl = cstate->clp; list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) stateid->ts_id_status = nfsd4_validate_stateid(cl, &stateid->ts_id_stateid); return nfs_ok; } static __be32 nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s) { struct nfs4_ol_stateid *stp = openlockstateid(s); __be32 ret; ret = nfsd4_lock_ol_stateid(stp); if (ret) goto out_put_stid; ret = check_stateid_generation(stateid, &s->sc_stateid, 1); if (ret) goto out; ret = nfserr_locks_held; if (check_for_locks(stp->st_stid.sc_file, lockowner(stp->st_stateowner))) goto out; release_lock_stateid(stp); ret = nfs_ok; out: mutex_unlock(&stp->st_mutex); out_put_stid: nfs4_put_stid(s); return ret; } __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_free_stateid *free_stateid = &u->free_stateid; stateid_t *stateid = &free_stateid->fr_stateid; struct nfs4_stid *s; struct nfs4_delegation *dp; struct nfs4_client *cl = cstate->clp; __be32 ret = nfserr_bad_stateid; spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, stateid); if (!s || s->sc_status & SC_STATUS_CLOSED) goto out_unlock; if (s->sc_status & SC_STATUS_ADMIN_REVOKED) { nfsd4_drop_revoked_stid(s); ret = nfs_ok; goto out; } spin_lock(&s->sc_lock); switch (s->sc_type) { case SC_TYPE_DELEG: if (s->sc_status & SC_STATUS_REVOKED) { s->sc_status |= SC_STATUS_CLOSED; spin_unlock(&s->sc_lock); dp = delegstateid(s); if (s->sc_status & SC_STATUS_FREEABLE) list_del_init(&dp->dl_recall_lru); s->sc_status |= SC_STATUS_FREED; spin_unlock(&cl->cl_lock); nfs4_put_stid(s); ret = nfs_ok; goto out; } ret = nfserr_locks_held; break; case SC_TYPE_OPEN: ret = check_stateid_generation(stateid, &s->sc_stateid, 1); if (ret) break; ret = nfserr_locks_held; break; case SC_TYPE_LOCK: spin_unlock(&s->sc_lock); refcount_inc(&s->sc_count); spin_unlock(&cl->cl_lock); ret = nfsd4_free_lock_stateid(stateid, s); goto out; } spin_unlock(&s->sc_lock); out_unlock: spin_unlock(&cl->cl_lock); out: return ret; } static inline int setlkflg (int type) { return (type == NFS4_READW_LT || type == NFS4_READ_LT) ? RD_STATE : WR_STATE; } static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_t *stateid, u32 seqid, struct nfs4_ol_stateid *stp) { struct svc_fh *current_fh = &cstate->current_fh; struct nfs4_stateowner *sop = stp->st_stateowner; __be32 status; status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status; status = nfsd4_lock_ol_stateid(stp); if (status != nfs_ok) return status; status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok) status = nfs4_check_fh(current_fh, &stp->st_stid); if (status != nfs_ok) mutex_unlock(&stp->st_mutex); return status; } /** * nfs4_preprocess_seqid_op - find and prep an ol_stateid for a seqid-morphing op * @cstate: compund state * @seqid: seqid (provided by client) * @stateid: stateid (provided by client) * @typemask: mask of allowable types for this operation * @statusmask: mask of allowed states: 0 or STID_CLOSED * @stpp: return pointer for the stateid found * @nn: net namespace for request * * Given a stateid+seqid from a client, look up an nfs4_ol_stateid and * return it in @stpp. On a nfs_ok return, the returned stateid will * have its st_mutex locked. */ static __be32 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, unsigned short typemask, unsigned short statusmask, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn) { __be32 status; struct nfs4_stid *s; struct nfs4_ol_stateid *stp = NULL; trace_nfsd_preprocess(seqid, stateid); *stpp = NULL; retry: status = nfsd4_lookup_stateid(cstate, stateid, typemask, statusmask, &s, nn); if (status) return status; stp = openlockstateid(s); if (nfsd4_cstate_assign_replay(cstate, stp->st_stateowner) == -EAGAIN) { nfs4_put_stateowner(stp->st_stateowner); goto retry; } status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); if (!status) *stpp = stp; else nfs4_put_stid(&stp->st_stid); return status; } static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn) { __be32 status; struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, SC_TYPE_OPEN, 0, &stp, nn); if (status) return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } *stpp = stp; return nfs_ok; } __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_open_confirm *oc = &u->open_confirm; __be32 status; struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("NFSD: nfsd4_open_confirm on file %pd\n", cstate->current_fh.fh_dentry); status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status) return status; status = nfs4_preprocess_seqid_op(cstate, oc->oc_seqid, &oc->oc_req_stateid, SC_TYPE_OPEN, 0, &stp, nn); if (status) goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; if (oo->oo_flags & NFS4_OO_CONFIRMED) { mutex_unlock(&stp->st_mutex); goto put_stateid; } oo->oo_flags |= NFS4_OO_CONFIRMED; nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); trace_nfsd_open_confirm(oc->oc_seqid, &stp->st_stid.sc_stateid); nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; put_stateid: nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); return status; } static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access) { if (!test_access(access, stp)) return; nfs4_file_put_access(stp->st_stid.sc_file, access); clear_access(access, stp); } static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access) { switch (to_access) { case NFS4_SHARE_ACCESS_READ: nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_WRITE); nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); break; case NFS4_SHARE_ACCESS_WRITE: nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_READ); nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); break; case NFS4_SHARE_ACCESS_BOTH: break; default: WARN_ON_ONCE(1); } } __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_open_downgrade *od = &u->open_downgrade; __be32 status; struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("NFSD: nfsd4_open_downgrade on file %pd\n", cstate->current_fh.fh_dentry); /* We don't yet support WANT bits: */ if (od->od_deleg_want) dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__, od->od_deleg_want); status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, &od->od_stateid, &stp, nn); if (status) goto out; status = nfserr_inval; if (!test_access(od->od_share_access, stp)) { dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n", stp->st_access_bmap, od->od_share_access); goto put_stateid; } if (!test_deny(od->od_share_deny, stp)) { dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n", stp->st_deny_bmap, od->od_share_deny); goto put_stateid; } nfs4_stateid_downgrade(stp, od->od_share_access); reset_union_bmap_deny(od->od_share_deny, stp); nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid); status = nfs_ok; put_stateid: mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); return status; } static bool nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { struct nfs4_client *clp = s->st_stid.sc_client; bool unhashed; LIST_HEAD(reaplist); struct nfs4_ol_stateid *stp; spin_lock(&clp->cl_lock); unhashed = unhash_open_stateid(s, &reaplist); if (clp->cl_minorversion) { if (unhashed) put_ol_stateid_locked(s, &reaplist); spin_unlock(&clp->cl_lock); list_for_each_entry(stp, &reaplist, st_locks) nfs4_free_cpntf_statelist(clp->net, &stp->st_stid); free_ol_stateid_reaplist(&reaplist); return false; } else { spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); return unhashed; } } /* * nfs4_unlock_state() called after encode */ __be32 nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_close *close = &u->close; __be32 status; struct nfs4_ol_stateid *stp; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); bool need_move_to_close_list; dprintk("NFSD: nfsd4_close on file %pd\n", cstate->current_fh.fh_dentry); status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, &close->cl_stateid, SC_TYPE_OPEN, SC_STATUS_CLOSED, &stp, nn); nfsd4_bump_seqid(cstate, status); if (status) goto out; spin_lock(&stp->st_stid.sc_client->cl_lock); stp->st_stid.sc_status |= SC_STATUS_CLOSED; spin_unlock(&stp->st_stid.sc_client->cl_lock); /* * Technically we don't _really_ have to increment or copy it, since * it should just be gone after this operation and we clobber the * copied value below, but we continue to do so here just to ensure * that racing ops see that there was a state change. */ nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); need_move_to_close_list = nfsd4_close_open_stateid(stp); mutex_unlock(&stp->st_mutex); if (need_move_to_close_list) move_to_close_lru(stp, net); /* v4.1+ suggests that we send a special stateid in here, since the * clients should just ignore this anyway. Since this is not useful * for v4.0 clients either, we set it to the special close_stateid * universally. * * See RFC5661 section 18.2.4, and RFC7530 section 16.2.5 */ memcpy(&close->cl_stateid, &close_stateid, sizeof(close->cl_stateid)); /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); out: return status; } __be32 nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_delegreturn *dr = &u->delegreturn; struct nfs4_delegation *dp; stateid_t *stateid = &dr->dr_stateid; struct nfs4_stid *s; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); status = fh_verify(rqstp, &cstate->current_fh, 0, 0); if (status) return status; status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn); if (status) goto out; dp = delegstateid(s); status = nfsd4_stid_check_stateid_generation(stateid, &dp->dl_stid, nfsd4_has_session(cstate)); if (status) goto put_stateid; trace_nfsd_deleg_return(stateid); destroy_delegation(dp); smp_mb__after_atomic(); wake_up_var(d_inode(cstate->current_fh.fh_dentry)); put_stateid: nfs4_put_stid(&dp->dl_stid); out: return status; } /* last octet in a range */ static inline u64 last_byte_offset(u64 start, u64 len) { u64 end; WARN_ON_ONCE(!len); end = start + len; return end > start ? end - 1: NFS4_MAX_UINT64; } /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th * byte, because of sign extension problems. Since NFSv4 calls for 64-bit * locking, this prevents us from being completely protocol-compliant. The * real solution to this problem is to start using unsigned file offsets in * the VFS, but this is a very deep change! */ static inline void nfs4_transform_lock_offset(struct file_lock *lock) { if (lock->fl_start < 0) lock->fl_start = OFFSET_MAX; if (lock->fl_end < 0) lock->fl_end = OFFSET_MAX; } static fl_owner_t nfsd4_lm_get_owner(fl_owner_t owner) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; nfs4_get_stateowner(&lo->lo_owner); return owner; } static void nfsd4_lm_put_owner(fl_owner_t owner) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; if (lo) nfs4_put_stateowner(&lo->lo_owner); } /* return pointer to struct nfs4_client if client is expirable */ static bool nfsd4_lm_lock_expirable(struct file_lock *cfl) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *) cfl->c.flc_owner; struct nfs4_client *clp = lo->lo_owner.so_client; struct nfsd_net *nn; if (try_to_expire_client(clp)) { nn = net_generic(clp->net, nfsd_net_id); mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); return true; } return false; } /* schedule laundromat to run immediately and wait for it to complete */ static void nfsd4_lm_expire_lock(void) { flush_workqueue(laundry_wq); } static void nfsd4_lm_notify(struct file_lock *fl) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *) fl->c.flc_owner; struct net *net = lo->lo_owner.so_client->net; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfsd4_blocked_lock *nbl = container_of(fl, struct nfsd4_blocked_lock, nbl_lock); bool queue = false; /* An empty list means that something else is going to be using it */ spin_lock(&nn->blocked_locks_lock); if (!list_empty(&nbl->nbl_list)) { list_del_init(&nbl->nbl_list); list_del_init(&nbl->nbl_lru); queue = true; } spin_unlock(&nn->blocked_locks_lock); if (queue) { trace_nfsd_cb_notify_lock(lo, nbl); nfsd4_try_run_cb(&nbl->nbl_cb); } } static const struct lock_manager_operations nfsd_posix_mng_ops = { .lm_mod_owner = THIS_MODULE, .lm_notify = nfsd4_lm_notify, .lm_get_owner = nfsd4_lm_get_owner, .lm_put_owner = nfsd4_lm_put_owner, .lm_lock_expirable = nfsd4_lm_lock_expirable, .lm_expire_lock = nfsd4_lm_expire_lock, }; static inline void nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) { struct nfs4_lockowner *lo; if (fl->fl_lmops == &nfsd_posix_mng_ops) { lo = (struct nfs4_lockowner *) fl->c.flc_owner; xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner, GFP_KERNEL); if (!deny->ld_owner.data) /* We just don't care that much */ goto nevermind; deny->ld_clientid = lo->lo_owner.so_client->cl_clientid; } else { nevermind: deny->ld_owner.len = 0; deny->ld_owner.data = NULL; deny->ld_clientid.cl_boot = 0; deny->ld_clientid.cl_id = 0; } deny->ld_start = fl->fl_start; deny->ld_length = NFS4_MAX_UINT64; if (fl->fl_end != NFS4_MAX_UINT64) deny->ld_length = fl->fl_end - fl->fl_start + 1; deny->ld_type = NFS4_READ_LT; if (fl->c.flc_type != F_RDLCK) deny->ld_type = NFS4_WRITE_LT; } static struct nfs4_lockowner * find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner) { unsigned int strhashval = ownerstr_hashval(owner); struct nfs4_stateowner *so; lockdep_assert_held(&clp->cl_lock); list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval], so_strhash) { if (so->so_is_open_owner) continue; if (same_owner_str(so, owner)) return lockowner(nfs4_get_stateowner(so)); } return NULL; } static struct nfs4_lockowner * find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj *owner) { struct nfs4_lockowner *lo; spin_lock(&clp->cl_lock); lo = find_lockowner_str_locked(clp, owner); spin_unlock(&clp->cl_lock); return lo; } static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop) { unhash_lockowner_locked(lockowner(sop)); } static void nfs4_free_lockowner(struct nfs4_stateowner *sop) { struct nfs4_lockowner *lo = lockowner(sop); kmem_cache_free(lockowner_slab, lo); } static const struct nfs4_stateowner_operations lockowner_ops = { .so_unhash = nfs4_unhash_lockowner, .so_free = nfs4_free_lockowner, }; /* * Alloc a lock owner structure. * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has * occurred. * * strhashval = ownerstr_hashval */ static struct nfs4_lockowner * alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { struct nfs4_lockowner *lo, *ret; lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); if (!lo) return NULL; INIT_LIST_HEAD(&lo->lo_blocked); INIT_LIST_HEAD(&lo->lo_owner.so_stateids); lo->lo_owner.so_is_open_owner = 0; lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; lo->lo_owner.so_ops = &lockowner_ops; spin_lock(&clp->cl_lock); ret = find_lockowner_str_locked(clp, &lock->lk_new_owner); if (ret == NULL) { list_add(&lo->lo_owner.so_strhash, &clp->cl_ownerstr_hashtbl[strhashval]); ret = lo; } else nfs4_free_stateowner(&lo->lo_owner); spin_unlock(&clp->cl_lock); return ret; } static struct nfs4_ol_stateid * find_lock_stateid(const struct nfs4_lockowner *lo, const struct nfs4_ol_stateid *ost) { struct nfs4_ol_stateid *lst; lockdep_assert_held(&ost->st_stid.sc_client->cl_lock); /* If ost is not hashed, ost->st_locks will not be valid */ if (!nfs4_ol_stateid_unhashed(ost)) list_for_each_entry(lst, &ost->st_locks, st_locks) { if (lst->st_stateowner == &lo->lo_owner) { refcount_inc(&lst->st_stid.sc_count); return lst; } } return NULL; } static struct nfs4_ol_stateid * init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, struct nfs4_file *fp, struct inode *inode, struct nfs4_ol_stateid *open_stp) { struct nfs4_client *clp = lo->lo_owner.so_client; struct nfs4_ol_stateid *retstp; mutex_init(&stp->st_mutex); mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); retry: spin_lock(&clp->cl_lock); if (nfs4_ol_stateid_unhashed(open_stp)) goto out_close; retstp = find_lock_stateid(lo, open_stp); if (retstp) goto out_found; refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = SC_TYPE_LOCK; stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; spin_lock(&fp->fi_lock); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); spin_unlock(&fp->fi_lock); spin_unlock(&clp->cl_lock); return stp; out_found: spin_unlock(&clp->cl_lock); if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { nfs4_put_stid(&retstp->st_stid); goto retry; } /* To keep mutex tracking happy */ mutex_unlock(&stp->st_mutex); return retstp; out_close: spin_unlock(&clp->cl_lock); mutex_unlock(&stp->st_mutex); return NULL; } static struct nfs4_ol_stateid * find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, struct inode *inode, struct nfs4_ol_stateid *ost, bool *new) { struct nfs4_stid *ns = NULL; struct nfs4_ol_stateid *lst; struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *clp = oo->oo_owner.so_client; *new = false; spin_lock(&clp->cl_lock); lst = find_lock_stateid(lo, ost); spin_unlock(&clp->cl_lock); if (lst != NULL) { if (nfsd4_lock_ol_stateid(lst) == nfs_ok) goto out; nfs4_put_stid(&lst->st_stid); } ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); if (ns == NULL) return NULL; lst = init_lock_stateid(openlockstateid(ns), lo, fi, inode, ost); if (lst == openlockstateid(ns)) *new = true; else nfs4_put_stid(ns); out: return lst; } static int check_lock_length(u64 offset, u64 length) { return ((length == 0) || ((length != NFS4_MAX_UINT64) && (length > ~offset))); } static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) { struct nfs4_file *fp = lock_stp->st_stid.sc_file; lockdep_assert_held(&fp->fi_lock); if (test_access(access, lock_stp)) return; __nfs4_file_get_access(fp, access); set_access(access, lock_stp); } static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **plst, bool *new) { __be32 status; struct nfs4_file *fi = ost->st_stid.sc_file; struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *cl = oo->oo_owner.so_client; struct inode *inode = d_inode(cstate->current_fh.fh_dentry); struct nfs4_lockowner *lo; struct nfs4_ol_stateid *lst; unsigned int strhashval; lo = find_lockowner_str(cl, &lock->lk_new_owner); if (!lo) { strhashval = ownerstr_hashval(&lock->lk_new_owner); lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); if (lo == NULL) return nfserr_jukebox; } else { /* with an existing lockowner, seqids must be the same */ status = nfserr_bad_seqid; if (!cstate->minorversion && lock->lk_new_lock_seqid != lo->lo_owner.so_seqid) goto out; } lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); if (lst == NULL) { status = nfserr_jukebox; goto out; } status = nfs_ok; *plst = lst; out: nfs4_put_stateowner(&lo->lo_owner); return status; } /* * LOCK operation */ __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_lock *lock = &u->lock; struct nfs4_openowner *open_sop = NULL; struct nfs4_lockowner *lock_sop = NULL; struct nfs4_ol_stateid *lock_stp = NULL; struct nfs4_ol_stateid *open_stp = NULL; struct nfs4_file *fp; struct nfsd_file *nf = NULL; struct nfsd4_blocked_lock *nbl = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; __be32 status = 0; int lkflg; int err; bool new = false; unsigned char type; unsigned int flags = FL_POSIX; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", (long long) lock->lk_offset, (long long) lock->lk_length); if (check_lock_length(lock->lk_offset, lock->lk_length)) return nfserr_inval; status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status != nfs_ok) return status; if (exportfs_cannot_lock(cstate->current_fh.fh_dentry->d_sb->s_export_op)) { status = nfserr_notsupp; goto out; } if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) /* See rfc 5661 18.10.3: given clientid is ignored: */ memcpy(&lock->lk_new_clientid, &cstate->clp->cl_clientid, sizeof(clientid_t)); /* validate and update open stateid and open seqid */ status = nfs4_preprocess_confirmed_seqid_op(cstate, lock->lk_new_open_seqid, &lock->lk_new_open_stateid, &open_stp, nn); if (status) goto out; mutex_unlock(&open_stp->st_mutex); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, &lock->lk_new_clientid)) goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, SC_TYPE_LOCK, 0, &lock_stp, nn); } if (status) goto out; lock_sop = lockowner(lock_stp->st_stateowner); lkflg = setlkflg(lock->lk_type); status = nfs4_check_openmode(lock_stp, lkflg); if (status) goto out; status = nfserr_grace; if (locks_in_grace(net) && !lock->lk_reclaim) goto out; status = nfserr_no_grace; if (!locks_in_grace(net) && lock->lk_reclaim) goto out; if (lock->lk_reclaim) flags |= FL_RECLAIM; fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { case NFS4_READW_LT: fallthrough; case NFS4_READ_LT: spin_lock(&fp->fi_lock); nf = find_readable_file_locked(fp); if (nf) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); spin_unlock(&fp->fi_lock); type = F_RDLCK; break; case NFS4_WRITEW_LT: fallthrough; case NFS4_WRITE_LT: spin_lock(&fp->fi_lock); nf = find_writeable_file_locked(fp); if (nf) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); spin_unlock(&fp->fi_lock); type = F_WRLCK; break; default: status = nfserr_inval; goto out; } if (!nf) { status = nfserr_openmode; goto out; } if (lock->lk_type & (NFS4_READW_LT | NFS4_WRITEW_LT) && nfsd4_has_session(cstate) && locks_can_async_lock(nf->nf_file->f_op)) flags |= FL_SLEEP; nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); if (!nbl) { dprintk("NFSD: %s: unable to allocate block!\n", __func__); status = nfserr_jukebox; goto out; } file_lock = &nbl->nbl_lock; file_lock->c.flc_type = type; file_lock->c.flc_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); file_lock->c.flc_pid = current->tgid; file_lock->c.flc_file = nf->nf_file; file_lock->c.flc_flags = flags; file_lock->fl_lmops = &nfsd_posix_mng_ops; file_lock->fl_start = lock->lk_offset; file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); nfs4_transform_lock_offset(file_lock); conflock = locks_alloc_lock(); if (!conflock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); status = nfserr_jukebox; goto out; } if (flags & FL_SLEEP) { nbl->nbl_time = ktime_get_boottime_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); kref_get(&nbl->nbl_kref); spin_unlock(&nn->blocked_locks_lock); } err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock); switch (err) { case 0: /* success! */ nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); status = 0; if (lock->lk_reclaim) nn->somebody_reclaimed = true; break; case FILE_LOCK_DEFERRED: kref_put(&nbl->nbl_kref, free_nbl); nbl = NULL; fallthrough; case -EAGAIN: /* conflock holds conflicting lock */ status = nfserr_denied; dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); nfs4_set_lock_denied(conflock, &lock->lk_denied); break; case -EDEADLK: status = nfserr_deadlock; break; default: dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err); status = nfserrno(err); break; } out: if (nbl) { /* dequeue it if we queued it before */ if (flags & FL_SLEEP) { spin_lock(&nn->blocked_locks_lock); if (!list_empty(&nbl->nbl_list) && !list_empty(&nbl->nbl_lru)) { list_del_init(&nbl->nbl_list); list_del_init(&nbl->nbl_lru); kref_put(&nbl->nbl_kref, free_nbl); } /* nbl can use one of lists to be linked to reaplist */ spin_unlock(&nn->blocked_locks_lock); } free_blocked_lock(nbl); } if (nf) nfsd_file_put(nf); if (lock_stp) { /* Bump seqid manually if the 4.0 replay owner is openowner */ if (cstate->replay_owner && cstate->replay_owner != &lock_sop->lo_owner && seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. */ if (status && new) release_lock_stateid(lock_stp); mutex_unlock(&lock_stp->st_mutex); nfs4_put_stid(&lock_stp->st_stid); } if (open_stp) nfs4_put_stid(&open_stp->st_stid); nfsd4_bump_seqid(cstate, status); if (conflock) locks_free_lock(conflock); return status; } void nfsd4_lock_release(union nfsd4_op_u *u) { struct nfsd4_lock *lock = &u->lock; struct nfsd4_lock_denied *deny = &lock->lk_denied; kfree(deny->ld_owner.data); } /* * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN, * so we do a temporary open here just to get an open file to pass to * vfs_test_lock. */ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) { struct nfsd_file *nf; struct inode *inode; __be32 err; err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; inode = fhp->fh_dentry->d_inode; inode_lock(inode); /* to block new leases till after test_lock: */ err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (err) goto out; lock->c.flc_file = nf->nf_file; err = nfserrno(vfs_test_lock(nf->nf_file, lock)); lock->c.flc_file = NULL; out: inode_unlock(inode); nfsd_file_put(nf); return err; } /* * LOCKT operation */ __be32 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_lockt *lockt = &u->lockt; struct file_lock *file_lock = NULL; struct nfs4_lockowner *lo = NULL; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (locks_in_grace(SVC_NET(rqstp))) return nfserr_grace; if (check_lock_length(lockt->lt_offset, lockt->lt_length)) return nfserr_inval; if (!nfsd4_has_session(cstate)) { status = set_client(&lockt->lt_clientid, cstate, nn); if (status) goto out; } if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) goto out; file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); status = nfserr_jukebox; goto out; } switch (lockt->lt_type) { case NFS4_READ_LT: case NFS4_READW_LT: file_lock->c.flc_type = F_RDLCK; break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: file_lock->c.flc_type = F_WRLCK; break; default: dprintk("NFSD: nfs4_lockt: bad lock type!\n"); status = nfserr_inval; goto out; } lo = find_lockowner_str(cstate->clp, &lockt->lt_owner); if (lo) file_lock->c.flc_owner = (fl_owner_t)lo; file_lock->c.flc_pid = current->tgid; file_lock->c.flc_flags = FL_POSIX; file_lock->fl_start = lockt->lt_offset; file_lock->fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length); nfs4_transform_lock_offset(file_lock); status = nfsd_test_lock(rqstp, &cstate->current_fh, file_lock); if (status) goto out; if (file_lock->c.flc_type != F_UNLCK) { status = nfserr_denied; nfs4_set_lock_denied(file_lock, &lockt->lt_denied); } out: if (lo) nfs4_put_stateowner(&lo->lo_owner); if (file_lock) locks_free_lock(file_lock); return status; } void nfsd4_lockt_release(union nfsd4_op_u *u) { struct nfsd4_lockt *lockt = &u->lockt; struct nfsd4_lock_denied *deny = &lockt->lt_denied; kfree(deny->ld_owner.data); } __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_locku *locku = &u->locku; struct nfs4_ol_stateid *stp; struct nfsd_file *nf = NULL; struct file_lock *file_lock = NULL; __be32 status; int err; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n", (long long) locku->lu_offset, (long long) locku->lu_length); if (check_lock_length(locku->lu_offset, locku->lu_length)) return nfserr_inval; status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, &locku->lu_stateid, SC_TYPE_LOCK, 0, &stp, nn); if (status) goto out; nf = find_any_file(stp->st_stid.sc_file); if (!nf) { status = nfserr_lock_range; goto put_stateid; } if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { status = nfserr_notsupp; goto put_file; } file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); status = nfserr_jukebox; goto put_file; } file_lock->c.flc_type = F_UNLCK; file_lock->c.flc_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner)); file_lock->c.flc_pid = current->tgid; file_lock->c.flc_file = nf->nf_file; file_lock->c.flc_flags = FL_POSIX; file_lock->fl_lmops = &nfsd_posix_mng_ops; file_lock->fl_start = locku->lu_offset; file_lock->fl_end = last_byte_offset(locku->lu_offset, locku->lu_length); nfs4_transform_lock_offset(file_lock); err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL); if (err) { dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); goto out_nfserr; } nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid); put_file: nfsd_file_put(nf); put_stateid: mutex_unlock(&stp->st_mutex); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); if (file_lock) locks_free_lock(file_lock); return status; out_nfserr: status = nfserrno(err); goto put_file; } /* * returns * true: locks held by lockowner * false: no locks held by lockowner */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock *fl; int status = false; struct nfsd_file *nf; struct inode *inode; struct file_lock_context *flctx; spin_lock(&fp->fi_lock); nf = find_any_file_locked(fp); if (!nf) { /* Any valid lock stateid should have some sort of access */ WARN_ON_ONCE(1); goto out; } inode = file_inode(nf->nf_file); flctx = locks_inode_context(inode); if (flctx && !list_empty_careful(&flctx->flc_posix)) { spin_lock(&flctx->flc_lock); for_each_file_lock(fl, &flctx->flc_posix) { if (fl->c.flc_owner == (fl_owner_t)lowner) { status = true; break; } } spin_unlock(&flctx->flc_lock); } out: spin_unlock(&fp->fi_lock); return status; } /** * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations * @rqstp: RPC transaction * @cstate: NFSv4 COMPOUND state * @u: RELEASE_LOCKOWNER arguments * * Check if there are any locks still held and if not, free the lockowner * and any lock state that is owned. * * Return values: * %nfs_ok: lockowner released or not found * %nfserr_locks_held: lockowner still in use * %nfserr_stale_clientid: clientid no longer active * %nfserr_expired: clientid not recognized */ __be32 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); clientid_t *clid = &rlockowner->rl_clientid; struct nfs4_ol_stateid *stp; struct nfs4_lockowner *lo; struct nfs4_client *clp; LIST_HEAD(reaplist); __be32 status; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); status = set_client(clid, cstate, nn); if (status) return status; clp = cstate->clp; spin_lock(&clp->cl_lock); lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner); if (!lo) { spin_unlock(&clp->cl_lock); return nfs_ok; } list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { if (check_for_locks(stp->st_stid.sc_file, lo)) { spin_unlock(&clp->cl_lock); nfs4_put_stateowner(&lo->lo_owner); return nfserr_locks_held; } } unhash_lockowner_locked(lo); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); unhash_lock_stateid(stp); put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); remove_blocked_locks(lo); nfs4_put_stateowner(&lo->lo_owner); return nfs_ok; } static inline struct nfs4_client_reclaim * alloc_reclaim(void) { return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); } bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn) { struct nfs4_client_reclaim *crp; crp = nfsd4_find_reclaim_client(name, nn); return (crp && crp->cr_clp); } /* * failure => all reset bets are off, nfserr_no_grace... */ struct nfs4_client_reclaim * nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn) { unsigned int strhashval; struct nfs4_client_reclaim *crp; name.data = kmemdup(name.data, name.len, GFP_KERNEL); if (!name.data) { dprintk("%s: failed to allocate memory for name.data!\n", __func__); return NULL; } if (princhash.len) { princhash.data = kmemdup(princhash.data, princhash.len, GFP_KERNEL); if (!princhash.data) { dprintk("%s: failed to allocate memory for princhash.data!\n", __func__); kfree(name.data); return NULL; } } else princhash.data = NULL; crp = alloc_reclaim(); if (crp) { strhashval = clientstr_hashval(name); INIT_LIST_HEAD(&crp->cr_strhash); list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]); crp->cr_name.data = name.data; crp->cr_name.len = name.len; crp->cr_princhash.data = princhash.data; crp->cr_princhash.len = princhash.len; crp->cr_clp = NULL; nn->reclaim_str_hashtbl_size++; } else { kfree(name.data); kfree(princhash.data); } return crp; } void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn) { list_del(&crp->cr_strhash); kfree(crp->cr_name.data); kfree(crp->cr_princhash.data); kfree(crp); nn->reclaim_str_hashtbl_size--; } void nfs4_release_reclaim(struct nfsd_net *nn) { struct nfs4_client_reclaim *crp = NULL; int i; for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->reclaim_str_hashtbl[i])) { crp = list_entry(nn->reclaim_str_hashtbl[i].next, struct nfs4_client_reclaim, cr_strhash); nfs4_remove_reclaim_record(crp, nn); } } WARN_ON_ONCE(nn->reclaim_str_hashtbl_size); } /* * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ struct nfs4_client_reclaim * nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn) { unsigned int strhashval; struct nfs4_client_reclaim *crp = NULL; strhashval = clientstr_hashval(name); list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) { if (compare_blob(&crp->cr_name, &name) == 0) { return crp; } } return NULL; } __be32 nfs4_check_open_reclaim(struct nfs4_client *clp) { if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags)) return nfserr_no_grace; if (nfsd4_client_record_check(clp)) return nfserr_reclaim_bad; return nfs_ok; } /* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has * the inode cached. This becomes an obvious problem the first time a * client's inode cache approaches the size of the server's total memory. * * For now we avoid this problem by imposing a hard limit on the number * of delegations, which varies according to the server's memory size. */ static void set_max_delegations(void) { /* * Allow at most 4 delegations per megabyte of RAM. Quick * estimates suggest that in the worst case (where every delegation * is for a different inode), a delegation could take about 1.5K, * giving a worst case usage of about 6% of memory. */ max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT); } static int nfs4_state_create_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int i; nn->conf_id_hashtbl = kmalloc_array(CLIENT_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); if (!nn->conf_id_hashtbl) goto err; nn->unconf_id_hashtbl = kmalloc_array(CLIENT_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); if (!nn->unconf_id_hashtbl) goto err_unconf_id; nn->sessionid_hashtbl = kmalloc_array(SESSION_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); if (!nn->sessionid_hashtbl) goto err_sessionid; for (i = 0; i < CLIENT_HASH_SIZE; i++) { INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]); INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]); } for (i = 0; i < SESSION_HASH_SIZE; i++) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; nn->unconf_name_tree = RB_ROOT; nn->boot_time = ktime_get_real_seconds(); nn->grace_ended = false; nn->grace_end_forced = false; nn->nfsd4_manager.block_opens = true; INIT_LIST_HEAD(&nn->nfsd4_manager.list); INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); spin_lock_init(&nn->client_lock); spin_lock_init(&nn->s2s_cp_lock); idr_init(&nn->s2s_cp_stateids); atomic_set(&nn->pending_async_copies, 0); spin_lock_init(&nn->blocked_locks_lock); INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); /* Make sure this cannot run until client tracking is initialised */ disable_delayed_work(&nn->laundromat_work); INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); nn->nfsd_client_shrinker = shrinker_alloc(0, "nfsd-client"); if (!nn->nfsd_client_shrinker) goto err_shrinker; nn->nfsd_client_shrinker->scan_objects = nfsd4_state_shrinker_scan; nn->nfsd_client_shrinker->count_objects = nfsd4_state_shrinker_count; nn->nfsd_client_shrinker->private_data = nn; shrinker_register(nn->nfsd_client_shrinker); return 0; err_shrinker: put_net(net); kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: kfree(nn->conf_id_hashtbl); err: return -ENOMEM; } static void nfs4_state_destroy_net(struct net *net) { int i; struct nfs4_client *clp = NULL; struct nfsd_net *nn = net_generic(net, nfsd_net_id); for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->conf_id_hashtbl[i])) { clp = list_entry(nn->conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); destroy_client(clp); } } WARN_ON(!list_empty(&nn->blocked_locks_lru)); for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->unconf_id_hashtbl[i])) { clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); destroy_client(clp); } } kfree(nn->sessionid_hashtbl); kfree(nn->unconf_id_hashtbl); kfree(nn->conf_id_hashtbl); put_net(net); } int nfs4_state_start_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; ret = nfs4_state_create_net(net); if (ret) return ret; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); /* safe for laundromat to run now */ enable_delayed_work(&nn->laundromat_work); if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) goto skip_grace; printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n", nn->nfsd4_grace, net->ns.inum); trace_nfsd_grace_start(nn); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; skip_grace: printk(KERN_INFO "NFSD: no clients to reclaim, skipping NFSv4 grace period (net %x)\n", net->ns.inum); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_lease * HZ); nfsd4_end_grace(nn); return 0; } /* initialization to perform when the nfsd service is started: */ int nfs4_state_start(void) { int ret; ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params); if (ret) return ret; nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot"); if (!nfsd_slot_shrinker) { rhltable_destroy(&nfs4_file_rhltable); return -ENOMEM; } nfsd_slot_shrinker->count_objects = nfsd_slot_count; nfsd_slot_shrinker->scan_objects = nfsd_slot_scan; shrinker_register(nfsd_slot_shrinker); set_max_delegations(); return 0; } void nfs4_state_shutdown_net(struct net *net) { struct nfs4_delegation *dp = NULL; struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); shrinker_free(nn->nfsd_client_shrinker); cancel_work_sync(&nn->nfsd_shrinker_work); disable_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); INIT_LIST_HEAD(&reaplist); spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); unhash_delegation_locked(dp, SC_STATUS_CLOSED); list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); destroy_unhashed_deleg(dp); } nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_shutdown_umount(nn); #endif } void nfs4_state_shutdown(void) { rhltable_destroy(&nfs4_file_rhltable); shrinker_free(nfsd_slot_shrinker); } static void get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) { if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid)) memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t)); } static void put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) { if (cstate->minorversion) { memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t)); SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } } void clear_current_stateid(struct nfsd4_compound_state *cstate) { CLEAR_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } /* * functions to set current state id */ void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { put_stateid(cstate, &u->open_downgrade.od_stateid); } void nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { put_stateid(cstate, &u->open.op_stateid); } void nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { put_stateid(cstate, &u->close.cl_stateid); } void nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { put_stateid(cstate, &u->lock.lk_resp_stateid); } /* * functions to consume current state id */ void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->open_downgrade.od_stateid); } void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->delegreturn.dr_stateid); } void nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->free_stateid.fr_stateid); } void nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->setattr.sa_stateid); } void nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->close.cl_stateid); } void nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->locku.lu_stateid); } void nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->read.rd_stateid); } void nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { get_stateid(cstate, &u->write.wr_stateid); } /** * nfsd4_vet_deleg_time - vet and set the timespec for a delegated timestamp update * @req: timestamp from the client * @orig: original timestamp in the inode * @now: current time * * Given a timestamp from the client response, check it against the * current timestamp in the inode and the current time. Returns true * if the inode's timestamp needs to be updated, and false otherwise. * @req may also be changed if the timestamp needs to be clamped. */ bool nfsd4_vet_deleg_time(struct timespec64 *req, const struct timespec64 *orig, const struct timespec64 *now) { /* * "When the time presented is before the original time, then the * update is ignored." Also no need to update if there is no change. */ if (timespec64_compare(req, orig) <= 0) return false; /* * "When the time presented is in the future, the server can either * clamp the new time to the current time, or it may * return NFS4ERR_DELAY to the client, allowing it to retry." */ if (timespec64_compare(req, now) > 0) *req = *now; return true; } static int cb_getattr_update_times(struct dentry *dentry, struct nfs4_delegation *dp) { struct inode *inode = d_inode(dentry); struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr; struct iattr attrs = { }; int ret; if (deleg_attrs_deleg(dp->dl_type)) { struct timespec64 now = current_time(inode); attrs.ia_atime = ncf->ncf_cb_atime; attrs.ia_mtime = ncf->ncf_cb_mtime; if (nfsd4_vet_deleg_time(&attrs.ia_atime, &dp->dl_atime, &now)) attrs.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; if (nfsd4_vet_deleg_time(&attrs.ia_mtime, &dp->dl_mtime, &now)) { attrs.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET; attrs.ia_ctime = attrs.ia_mtime; if (nfsd4_vet_deleg_time(&attrs.ia_ctime, &dp->dl_ctime, &now)) attrs.ia_valid |= ATTR_CTIME | ATTR_CTIME_SET; } } else { attrs.ia_valid |= ATTR_MTIME | ATTR_CTIME; } if (!attrs.ia_valid) return 0; attrs.ia_valid |= ATTR_DELEG; inode_lock(inode); ret = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL); inode_unlock(inode); return ret; } /** * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict * @rqstp: RPC transaction context * @dentry: dentry of inode to be checked for a conflict * @pdp: returned WRITE delegation, if one was found * * This function is called when there is a conflict between a write * delegation and a change/size GETATTR from another client. The server * must either use the CB_GETATTR to get the current values of the * attributes from the client that holds the delegation or recall the * delegation before replying to the GETATTR. See RFC 8881 section * 18.7.4. * * Returns 0 if there is no conflict; otherwise an nfs_stat * code is returned. If @pdp is set to a non-NULL value, then the * caller must put the reference. */ __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_delegation **pdp) { __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file_lock_context *ctx; struct nfs4_delegation *dp = NULL; struct file_lease *fl; struct nfs4_cb_fattr *ncf; struct inode *inode = d_inode(dentry); ctx = locks_inode_context(inode); if (!ctx) return nfs_ok; #define NON_NFSD_LEASE ((void *)1) spin_lock(&ctx->flc_lock); for_each_file_lock(fl, &ctx->flc_lease) { if (fl->c.flc_flags == FL_LAYOUT) continue; if (fl->c.flc_type == F_WRLCK) { if (fl->fl_lmops == &nfsd_lease_mng_ops) dp = fl->c.flc_owner; else dp = NON_NFSD_LEASE; } break; } if (dp == NULL || dp == NON_NFSD_LEASE || dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) { spin_unlock(&ctx->flc_lock); if (dp == NON_NFSD_LEASE) { status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (status != nfserr_jukebox || !nfsd_wait_for_delegreturn(rqstp, inode)) return status; } return 0; } nfsd_stats_wdeleg_getattr_inc(nn); refcount_inc(&dp->dl_stid.sc_count); ncf = &dp->dl_cb_fattr; nfs4_cb_getattr(&dp->dl_cb_fattr); spin_unlock(&ctx->flc_lock); wait_on_bit_timeout(&ncf->ncf_getattr.cb_flags, NFSD4_CALLBACK_RUNNING, TASK_UNINTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); if (ncf->ncf_cb_status) { /* Recall delegation only if client didn't respond */ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (status != nfserr_jukebox || !nfsd_wait_for_delegreturn(rqstp, inode)) goto out_status; } if (!ncf->ncf_file_modified && (ncf->ncf_initial_cinfo != ncf->ncf_cb_change || ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) ncf->ncf_file_modified = true; if (ncf->ncf_file_modified) { int err; /* * Per section 10.4.3 of RFC 8881, the server would * not update the file's metadata with the client's * modified size */ err = cb_getattr_update_times(dentry, dp); if (err) { status = nfserrno(err); goto out_status; } ncf->ncf_cur_fsize = ncf->ncf_cb_fsize; *pdp = dp; return nfs_ok; } status = nfs_ok; out_status: nfs4_put_stid(&dp->dl_stid); return status; } /** * nfsd_get_dir_deleg - attempt to get a directory delegation * @cstate: compound state * @gdd: GET_DIR_DELEGATION arg/resp structure * @nf: nfsd_file opened on the directory * * Given a GET_DIR_DELEGATION request @gdd, attempt to acquire a delegation * on the directory to which @nf refers. Note that this does not set up any * sort of async notifications for the delegation. */ struct nfs4_delegation * nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate, struct nfsd4_get_dir_delegation *gdd, struct nfsd_file *nf) { struct nfs4_client *clp = cstate->clp; struct nfs4_delegation *dp; struct file_lease *fl; struct nfs4_file *fp, *rfp; int status = 0; fp = nfsd4_alloc_file(); if (!fp) return ERR_PTR(-ENOMEM); nfsd4_file_init(&cstate->current_fh, fp); rfp = nfsd4_file_hash_insert(fp, &cstate->current_fh); if (unlikely(!rfp)) { put_nfs4_file(fp); return ERR_PTR(-ENOMEM); } if (rfp != fp) { put_nfs4_file(fp); fp = rfp; } /* if this client already has one, return that it's unavailable */ spin_lock(&state_lock); spin_lock(&fp->fi_lock); /* existing delegation? */ if (nfs4_delegation_exists(clp, fp)) { status = -EAGAIN; } else if (!fp->fi_deleg_file) { fp->fi_deleg_file = nfsd_file_get(nf); fp->fi_delegees = 1; } else { ++fp->fi_delegees; } spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); if (status) { put_nfs4_file(fp); return ERR_PTR(status); } /* Try to set up the lease */ status = -ENOMEM; dp = alloc_init_deleg(clp, fp, NULL, NFS4_OPEN_DELEGATE_READ); if (!dp) goto out_delegees; fl = nfs4_alloc_init_lease(dp); if (!fl) goto out_put_stid; status = kernel_setlease(nf->nf_file, fl->c.flc_type, &fl, NULL); if (fl) locks_free_lease(fl); if (status) goto out_put_stid; /* * Now, try to hash it. This can fail if we race another nfsd task * trying to set a delegation on the same file. If that happens, * then just say UNAVAIL. */ spin_lock(&state_lock); spin_lock(&clp->cl_lock); spin_lock(&fp->fi_lock); status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&clp->cl_lock); spin_unlock(&state_lock); if (!status) { put_nfs4_file(fp); return dp; } /* Something failed. Drop the lease and clean up the stid */ kernel_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp); out_put_stid: nfs4_put_stid(&dp->dl_stid); out_delegees: put_deleg_file(fp); put_nfs4_file(fp); return ERR_PTR(status); }
47 8 20 11879 908 16 2 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NODEMASK_H #define __LINUX_NODEMASK_H /* * Nodemasks provide a bitmap suitable for representing the * set of Node's in a system, one bit position per Node number. * * See detailed comments in the file linux/bitmap.h describing the * data type on which these nodemasks are based. * * For details of nodemask_parse_user(), see bitmap_parse_user() in * lib/bitmap.c. For details of nodelist_parse(), see bitmap_parselist(), * also in bitmap.c. For details of node_remap(), see bitmap_bitremap in * lib/bitmap.c. For details of nodes_remap(), see bitmap_remap in * lib/bitmap.c. For details of nodes_onto(), see bitmap_onto in * lib/bitmap.c. For details of nodes_fold(), see bitmap_fold in * lib/bitmap.c. * * The available nodemask operations are: * * void node_set(node, mask) turn on bit 'node' in mask * void node_clear(node, mask) turn off bit 'node' in mask * void nodes_setall(mask) set all bits * void nodes_clear(mask) clear all bits * int node_isset(node, mask) true iff bit 'node' set in mask * int node_test_and_set(node, mask) test and set bit 'node' in mask * * void nodes_and(dst, src1, src2) dst = src1 & src2 [intersection] * void nodes_or(dst, src1, src2) dst = src1 | src2 [union] * void nodes_xor(dst, src1, src2) dst = src1 ^ src2 * void nodes_andnot(dst, src1, src2) dst = src1 & ~src2 * void nodes_complement(dst, src) dst = ~src * * int nodes_equal(mask1, mask2) Does mask1 == mask2? * int nodes_intersects(mask1, mask2) Do mask1 and mask2 intersect? * int nodes_subset(mask1, mask2) Is mask1 a subset of mask2? * int nodes_empty(mask) Is mask empty (no bits sets)? * int nodes_full(mask) Is mask full (all bits sets)? * int nodes_weight(mask) Hamming weight - number of set bits * * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, * or MAX_NUMNODES * unsigned int first_unset_node(mask) First node not set in mask, or * MAX_NUMNODES * * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set * NODE_MASK_ALL Initializer - all bits set * NODE_MASK_NONE Initializer - no bits set * unsigned long *nodes_addr(mask) Array of unsigned long's in mask * * int nodemask_parse_user(ubuf, ulen, mask) Parse ascii string as nodemask * int nodelist_parse(buf, map) Parse ascii string as nodelist * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) * void nodes_remap(dst, src, old, new) *dst = map(old, new)(src) * void nodes_onto(dst, orig, relmap) *dst = orig relative to relmap * void nodes_fold(dst, orig, sz) dst bits = orig bits mod sz * * for_each_node_mask(node, mask) for-loop node over mask * * int num_online_nodes() Number of online Nodes * int num_possible_nodes() Number of all possible Nodes * * int node_random(mask) Random node with set bit in mask * * int node_online(node) Is some node online? * int node_possible(node) Is some node possible? * * node_set_online(node) set bit 'node' in node_online_map * node_set_offline(node) clear bit 'node' in node_online_map * * for_each_node(node) for-loop node over node_possible_map * for_each_online_node(node) for-loop node over node_online_map * * Subtlety: * 1) The 'type-checked' form of node_isset() causes gcc (3.3.2, anyway) * to generate slightly worse code. So use a simple one-line #define * for node_isset(), instead of wrapping an inline inside a macro, the * way we do the other calls. * * NODEMASK_SCRATCH * When doing above logical AND, OR, XOR, Remap operations the callers tend to * need temporary nodemask_t's on the stack. But if NODES_SHIFT is large, * nodemask_t's consume too much stack space. NODEMASK_SCRATCH is a helper * for such situations. See below and CPUMASK_ALLOC also. */ #include <linux/threads.h> #include <linux/bitmap.h> #include <linux/minmax.h> #include <linux/nodemask_types.h> #include <linux/random.h> extern nodemask_t _unused_nodemask_arg_; /** * nodemask_pr_args - printf args to output a nodemask * @maskp: nodemask to be printed * * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. */ #define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \ __nodemask_pr_bits(maskp) static __always_inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) { return m ? MAX_NUMNODES : 0; } static __always_inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) { return m ? m->bits : NULL; } /* * The inline keyword gives the compiler room to decide to inline, or * not inline a function as it sees best. However, as these functions * are called in both __init and non-__init functions, if they are not * inlined we will end up with a section mismatch error (of the type of * freeable items not being freed). So we must use __always_inline here * to fix the problem. If other functions in the future also end up in * this situation they will also need to be annotated as __always_inline */ #define node_set(node, dst) __node_set((node), &(dst)) static __always_inline void __node_set(int node, volatile nodemask_t *dstp) { set_bit(node, dstp->bits); } #define node_clear(node, dst) __node_clear((node), &(dst)) static __always_inline void __node_clear(int node, volatile nodemask_t *dstp) { clear_bit(node, dstp->bits); } #define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES) static __always_inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits) { bitmap_fill(dstp->bits, nbits); } #define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES) static __always_inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) { bitmap_zero(dstp->bits, nbits); } /* No static inline type checking - see Subtlety (1) above. */ #define node_isset(node, nodemask) test_bit((node), (nodemask).bits) #define node_test_and_set(node, nodemask) \ __node_test_and_set((node), &(nodemask)) static __always_inline bool __node_test_and_set(int node, nodemask_t *addr) { return test_and_set_bit(node, addr->bits); } #define nodes_and(dst, src1, src2) \ __nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_or(dst, src1, src2) \ __nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_xor(dst, src1, src2) \ __nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_andnot(dst, src1, src2) \ __nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_copy(dst, src) __nodes_copy(&(dst), &(src), MAX_NUMNODES) static __always_inline void __nodes_copy(nodemask_t *dstp, const nodemask_t *srcp, unsigned int nbits) { bitmap_copy(dstp->bits, srcp->bits, nbits); } #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) static __always_inline void __nodes_complement(nodemask_t *dstp, const nodemask_t *srcp, unsigned int nbits) { bitmap_complement(dstp->bits, srcp->bits, nbits); } #define nodes_equal(src1, src2) \ __nodes_equal(&(src1), &(src2), MAX_NUMNODES) static __always_inline bool __nodes_equal(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_equal(src1p->bits, src2p->bits, nbits); } #define nodes_intersects(src1, src2) \ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) static __always_inline bool __nodes_intersects(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_intersects(src1p->bits, src2p->bits, nbits); } #define nodes_subset(src1, src2) \ __nodes_subset(&(src1), &(src2), MAX_NUMNODES) static __always_inline bool __nodes_subset(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_subset(src1p->bits, src2p->bits, nbits); } #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) static __always_inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) { return bitmap_empty(srcp->bits, nbits); } #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) static __always_inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) { return bitmap_full(srcp->bits, nbits); } #define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES) static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits) { return bitmap_weight(srcp->bits, nbits); } /* FIXME: better would be to fix all architectures to never return > MAX_NUMNODES, then the silly min()s could be dropped. */ #define first_node(src) __first_node(&(src)) static __always_inline unsigned int __first_node(const nodemask_t *srcp) { return min(MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); } #define next_node(n, src) __next_node((n), &(src)) static __always_inline unsigned int __next_node(int n, const nodemask_t *srcp) { return min(MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); } /* * Find the next present node in src, starting after node n, wrapping around to * the first node in src if needed. Returns MAX_NUMNODES if src is empty. */ #define next_node_in(n, src) __next_node_in((n), &(src)) static __always_inline unsigned int __next_node_in(int node, const nodemask_t *srcp) { unsigned int ret = __next_node(node, srcp); if (ret == MAX_NUMNODES) ret = __first_node(srcp); return ret; } static __always_inline void init_nodemask_of_node(nodemask_t *mask, int node) { nodes_clear(*mask); node_set(node, *mask); } #define nodemask_of_node(node) \ ({ \ typeof(_unused_nodemask_arg_) m; \ if (sizeof(m) == sizeof(unsigned long)) { \ m.bits[0] = 1UL << (node); \ } else { \ init_nodemask_of_node(&m, (node)); \ } \ m; \ }) #define first_unset_node(mask) __first_unset_node(&(mask)) static __always_inline unsigned int __first_unset_node(const nodemask_t *maskp) { return min(MAX_NUMNODES, find_first_zero_bit(maskp->bits, MAX_NUMNODES)); } #define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES) #if MAX_NUMNODES <= BITS_PER_LONG #define NODE_MASK_ALL \ ((nodemask_t) { { \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ } }) #else #define NODE_MASK_ALL \ ((nodemask_t) { { \ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL, \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ } }) #endif #define NODE_MASK_NONE \ ((nodemask_t) { { \ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] = 0UL \ } }) #define nodes_addr(src) ((src).bits) #define nodemask_parse_user(ubuf, ulen, dst) \ __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) static __always_inline int __nodemask_parse_user(const char __user *buf, int len, nodemask_t *dstp, int nbits) { return bitmap_parse_user(buf, len, dstp->bits, nbits); } #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES) static __always_inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) { return bitmap_parselist(buf, dstp->bits, nbits); } #define node_remap(oldbit, old, new) \ __node_remap((oldbit), &(old), &(new), MAX_NUMNODES) static __always_inline int __node_remap(int oldbit, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); } #define nodes_remap(dst, src, old, new) \ __nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES) static __always_inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); } #define nodes_onto(dst, orig, relmap) \ __nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES) static __always_inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, const nodemask_t *relmapp, int nbits) { bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); } #define nodes_fold(dst, orig, sz) \ __nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES) static __always_inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, int sz, int nbits) { bitmap_fold(dstp->bits, origp->bits, sz, nbits); } #if MAX_NUMNODES > 1 #define for_each_node_mask(node, mask) \ for ((node) = first_node(mask); \ (node) < MAX_NUMNODES; \ (node) = next_node((node), (mask))) #else /* MAX_NUMNODES == 1 */ #define for_each_node_mask(node, mask) \ for ((node) = 0; (node) < 1 && !nodes_empty(mask); (node)++) #endif /* MAX_NUMNODES */ /* * Bitmasks that are kept for all the nodes. */ enum node_states { N_POSSIBLE, /* The node could become online at some point */ N_ONLINE, /* The node is online */ N_NORMAL_MEMORY, /* The node has regular memory */ #ifdef CONFIG_HIGHMEM N_HIGH_MEMORY, /* The node has regular or high memory */ #else N_HIGH_MEMORY = N_NORMAL_MEMORY, #endif N_MEMORY, /* The node has memory(regular, high, movable) */ N_CPU, /* The node has one or more cpus */ N_GENERIC_INITIATOR, /* The node has one or more Generic Initiators */ NR_NODE_STATES }; /* * The following particular system nodemasks and operations * on them manage all possible and online nodes. */ extern nodemask_t node_states[NR_NODE_STATES]; #if MAX_NUMNODES > 1 static __always_inline int node_state(int node, enum node_states state) { return node_isset(node, node_states[state]); } static __always_inline void node_set_state(int node, enum node_states state) { __node_set(node, &node_states[state]); } static __always_inline void node_clear_state(int node, enum node_states state) { __node_clear(node, &node_states[state]); } static __always_inline int num_node_state(enum node_states state) { return nodes_weight(node_states[state]); } #define for_each_node_state(__node, __state) \ for_each_node_mask((__node), node_states[__state]) #define first_online_node first_node(node_states[N_ONLINE]) #define first_memory_node first_node(node_states[N_MEMORY]) static __always_inline unsigned int next_online_node(int nid) { return next_node(nid, node_states[N_ONLINE]); } static __always_inline unsigned int next_memory_node(int nid) { return next_node(nid, node_states[N_MEMORY]); } extern unsigned int nr_node_ids; extern unsigned int nr_online_nodes; static __always_inline void node_set_online(int nid) { node_set_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } static __always_inline void node_set_offline(int nid) { node_clear_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } #else static __always_inline int node_state(int node, enum node_states state) { return node == 0; } static __always_inline void node_set_state(int node, enum node_states state) { } static __always_inline void node_clear_state(int node, enum node_states state) { } static __always_inline int num_node_state(enum node_states state) { return 1; } #define for_each_node_state(node, __state) \ for ( (node) = 0; (node) == 0; (node) = 1) #define first_online_node 0 #define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) #define next_memory_node(nid) (MAX_NUMNODES) #define nr_node_ids 1U #define nr_online_nodes 1U #define node_set_online(node) node_set_state((node), N_ONLINE) #define node_set_offline(node) node_clear_state((node), N_ONLINE) #endif static __always_inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) int node = find_random_bit(maskp->bits, MAX_NUMNODES); return node < MAX_NUMNODES ? node : NUMA_NO_NODE; #else return 0; #endif } #define node_online_map node_states[N_ONLINE] #define node_possible_map node_states[N_POSSIBLE] #define num_online_nodes() num_node_state(N_ONLINE) #define num_possible_nodes() num_node_state(N_POSSIBLE) #define node_online(node) node_state((node), N_ONLINE) #define node_possible(node) node_state((node), N_POSSIBLE) #define for_each_node(node) for_each_node_state(node, N_POSSIBLE) #define for_each_online_node(node) for_each_node_state(node, N_ONLINE) #define for_each_node_with_cpus(node) for_each_node_state(node, N_CPU) /* * For nodemask scratch area. * NODEMASK_ALLOC(type, name) allocates an object with a specified type and * name. */ #if NODES_SHIFT > 8 /* nodemask_t > 32 bytes */ #define NODEMASK_ALLOC(type, name, gfp_flags) \ type *name = kmalloc(sizeof(*name), gfp_flags) #define NODEMASK_FREE(m) kfree(m) #else #define NODEMASK_ALLOC(type, name, gfp_flags) type _##name, *name = &_##name #define NODEMASK_FREE(m) do {} while (0) #endif /* Example structure for using NODEMASK_ALLOC, used in mempolicy. */ struct nodemask_scratch { nodemask_t mask1; nodemask_t mask2; }; #define NODEMASK_SCRATCH(x) \ NODEMASK_ALLOC(struct nodemask_scratch, x, \ GFP_KERNEL | __GFP_NORETRY) #define NODEMASK_SCRATCH_FREE(x) NODEMASK_FREE(x) #endif /* __LINUX_NODEMASK_H */
6 6 2 7 7 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 // SPDX-License-Identifier: GPL-2.0-only /* * Line 6 Linux USB driver * * Copyright (C) 2004-2010 Markus Grabner (line6@grabner-graz.at) */ #include <linux/slab.h> #include <linux/wait.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/usb.h> #include <sound/core.h> #include <sound/control.h> #include "capture.h" #include "driver.h" #include "playback.h" /* Locate name in binary program dump */ #define POD_NAME_OFFSET 0 #define POD_NAME_LENGTH 16 /* Other constants */ #define POD_CONTROL_SIZE 0x80 #define POD_BUFSIZE_DUMPREQ 7 #define POD_STARTUP_DELAY 1000 /* Stages of POD startup procedure */ enum { POD_STARTUP_VERSIONREQ, POD_STARTUP_SETUP, POD_STARTUP_DONE, }; enum { LINE6_BASSPODXT, LINE6_BASSPODXTLIVE, LINE6_BASSPODXTPRO, LINE6_POCKETPOD, LINE6_PODXT, LINE6_PODXTLIVE_POD, LINE6_PODXTPRO, }; struct usb_line6_pod { /* Generic Line 6 USB data */ struct usb_line6 line6; /* Instrument monitor level */ int monitor_level; /* Current progress in startup procedure */ int startup_progress; /* Serial number of device */ u32 serial_number; /* Firmware version (x 100) */ int firmware_version; /* Device ID */ int device_id; }; #define line6_to_pod(x) container_of(x, struct usb_line6_pod, line6) #define POD_SYSEX_CODE 3 /* *INDENT-OFF* */ enum { POD_SYSEX_SAVE = 0x24, POD_SYSEX_SYSTEM = 0x56, POD_SYSEX_SYSTEMREQ = 0x57, /* POD_SYSEX_UPDATE = 0x6c, */ /* software update! */ POD_SYSEX_STORE = 0x71, POD_SYSEX_FINISH = 0x72, POD_SYSEX_DUMPMEM = 0x73, POD_SYSEX_DUMP = 0x74, POD_SYSEX_DUMPREQ = 0x75 /* dumps entire internal memory of PODxt Pro */ /* POD_SYSEX_DUMPMEM2 = 0x76 */ }; enum { POD_MONITOR_LEVEL = 0x04, POD_SYSTEM_INVALID = 0x10000 }; /* *INDENT-ON* */ enum { POD_DUMP_MEMORY = 2 }; enum { POD_BUSY_READ, POD_BUSY_WRITE, POD_CHANNEL_DIRTY, POD_SAVE_PRESSED, POD_BUSY_MIDISEND }; static const struct snd_ratden pod_ratden = { .num_min = 78125, .num_max = 78125, .num_step = 1, .den = 2 }; static struct line6_pcm_properties pod_pcm_properties = { .playback_hw = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_SYNC_START), .formats = SNDRV_PCM_FMTBIT_S24_3LE, .rates = SNDRV_PCM_RATE_KNOT, .rate_min = 39062, .rate_max = 39063, .channels_min = 2, .channels_max = 2, .buffer_bytes_max = 60000, .period_bytes_min = 64, .period_bytes_max = 8192, .periods_min = 1, .periods_max = 1024}, .capture_hw = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_SYNC_START), .formats = SNDRV_PCM_FMTBIT_S24_3LE, .rates = SNDRV_PCM_RATE_KNOT, .rate_min = 39062, .rate_max = 39063, .channels_min = 2, .channels_max = 2, .buffer_bytes_max = 60000, .period_bytes_min = 64, .period_bytes_max = 8192, .periods_min = 1, .periods_max = 1024}, .rates = { .nrats = 1, .rats = &pod_ratden}, .bytes_per_channel = 3 /* SNDRV_PCM_FMTBIT_S24_3LE */ }; static const char pod_version_header[] = { 0xf0, 0x7e, 0x7f, 0x06, 0x02 }; static char *pod_alloc_sysex_buffer(struct usb_line6_pod *pod, int code, int size) { return line6_alloc_sysex_buffer(&pod->line6, POD_SYSEX_CODE, code, size); } /* Process a completely received message. */ static void line6_pod_process_message(struct usb_line6 *line6) { struct usb_line6_pod *pod = line6_to_pod(line6); const unsigned char *buf = pod->line6.buffer_message; if (memcmp(buf, pod_version_header, sizeof(pod_version_header)) == 0) { pod->firmware_version = buf[13] * 100 + buf[14] * 10 + buf[15]; pod->device_id = ((int)buf[8] << 16) | ((int)buf[9] << 8) | (int) buf[10]; if (pod->startup_progress == POD_STARTUP_VERSIONREQ) { pod->startup_progress = POD_STARTUP_SETUP; schedule_delayed_work(&line6->startup_work, 0); } return; } /* Only look for sysex messages from this device */ if (buf[0] != (LINE6_SYSEX_BEGIN | LINE6_CHANNEL_DEVICE) && buf[0] != (LINE6_SYSEX_BEGIN | LINE6_CHANNEL_UNKNOWN)) { return; } if (memcmp(buf + 1, line6_midi_id, sizeof(line6_midi_id)) != 0) return; if (buf[5] == POD_SYSEX_SYSTEM && buf[6] == POD_MONITOR_LEVEL) { short value = ((int)buf[7] << 12) | ((int)buf[8] << 8) | ((int)buf[9] << 4) | (int)buf[10]; pod->monitor_level = value; } } /* Send system parameter (from integer). */ static int pod_set_system_param_int(struct usb_line6_pod *pod, int value, int code) { char *sysex; static const int size = 5; sysex = pod_alloc_sysex_buffer(pod, POD_SYSEX_SYSTEM, size); if (!sysex) return -ENOMEM; sysex[SYSEX_DATA_OFS] = code; sysex[SYSEX_DATA_OFS + 1] = (value >> 12) & 0x0f; sysex[SYSEX_DATA_OFS + 2] = (value >> 8) & 0x0f; sysex[SYSEX_DATA_OFS + 3] = (value >> 4) & 0x0f; sysex[SYSEX_DATA_OFS + 4] = (value) & 0x0f; line6_send_sysex_message(&pod->line6, sysex, size); kfree(sysex); return 0; } /* "read" request on "serial_number" special file. */ static ssize_t serial_number_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_card *card = dev_to_snd_card(dev); struct usb_line6_pod *pod = card->private_data; return sysfs_emit(buf, "%u\n", pod->serial_number); } /* "read" request on "firmware_version" special file. */ static ssize_t firmware_version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_card *card = dev_to_snd_card(dev); struct usb_line6_pod *pod = card->private_data; return sysfs_emit(buf, "%d.%02d\n", pod->firmware_version / 100, pod->firmware_version % 100); } /* "read" request on "device_id" special file. */ static ssize_t device_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_card *card = dev_to_snd_card(dev); struct usb_line6_pod *pod = card->private_data; return sysfs_emit(buf, "%d\n", pod->device_id); } /* POD startup procedure. This is a sequence of functions with special requirements (e.g., must not run immediately after initialization, must not run in interrupt context). After the last one has finished, the device is ready to use. */ static void pod_startup(struct usb_line6 *line6) { struct usb_line6_pod *pod = line6_to_pod(line6); switch (pod->startup_progress) { case POD_STARTUP_VERSIONREQ: /* request firmware version: */ line6_version_request_async(line6); break; case POD_STARTUP_SETUP: /* serial number: */ line6_read_serial_number(&pod->line6, &pod->serial_number); /* ALSA audio interface: */ if (snd_card_register(line6->card)) dev_err(line6->ifcdev, "Failed to register POD card.\n"); pod->startup_progress = POD_STARTUP_DONE; break; default: break; } } /* POD special files: */ static DEVICE_ATTR_RO(device_id); static DEVICE_ATTR_RO(firmware_version); static DEVICE_ATTR_RO(serial_number); static struct attribute *pod_dev_attrs[] = { &dev_attr_device_id.attr, &dev_attr_firmware_version.attr, &dev_attr_serial_number.attr, NULL }; static const struct attribute_group pod_dev_attr_group = { .name = "pod", .attrs = pod_dev_attrs, }; /* control info callback */ static int snd_pod_control_monitor_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->count = 1; uinfo->value.integer.min = 0; uinfo->value.integer.max = 65535; return 0; } /* control get callback */ static int snd_pod_control_monitor_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_line6_pcm *line6pcm = snd_kcontrol_chip(kcontrol); struct usb_line6_pod *pod = line6_to_pod(line6pcm->line6); ucontrol->value.integer.value[0] = pod->monitor_level; return 0; } /* control put callback */ static int snd_pod_control_monitor_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_line6_pcm *line6pcm = snd_kcontrol_chip(kcontrol); struct usb_line6_pod *pod = line6_to_pod(line6pcm->line6); if (ucontrol->value.integer.value[0] == pod->monitor_level) return 0; pod->monitor_level = ucontrol->value.integer.value[0]; pod_set_system_param_int(pod, ucontrol->value.integer.value[0], POD_MONITOR_LEVEL); return 1; } /* control definition */ static const struct snd_kcontrol_new pod_control_monitor = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Monitor Playback Volume", .index = 0, .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, .info = snd_pod_control_monitor_info, .get = snd_pod_control_monitor_get, .put = snd_pod_control_monitor_put }; /* Try to init POD device. */ static int pod_init(struct usb_line6 *line6, const struct usb_device_id *id) { int err; struct usb_line6_pod *pod = line6_to_pod(line6); line6->process_message = line6_pod_process_message; line6->startup = pod_startup; /* create sysfs entries: */ err = snd_card_add_dev_attr(line6->card, &pod_dev_attr_group); if (err < 0) return err; /* initialize PCM subsystem: */ err = line6_init_pcm(line6, &pod_pcm_properties); if (err < 0) return err; /* register monitor control: */ err = snd_ctl_add(line6->card, snd_ctl_new1(&pod_control_monitor, line6->line6pcm)); if (err < 0) return err; /* When the sound card is registered at this point, the PODxt Live displays "Invalid Code Error 07", so we do it later in the event handler. */ if (pod->line6.properties->capabilities & LINE6_CAP_CONTROL) { pod->monitor_level = POD_SYSTEM_INVALID; /* initiate startup procedure: */ schedule_delayed_work(&line6->startup_work, msecs_to_jiffies(POD_STARTUP_DELAY)); } return 0; } #define LINE6_DEVICE(prod) USB_DEVICE(0x0e41, prod) #define LINE6_IF_NUM(prod, n) USB_DEVICE_INTERFACE_NUMBER(0x0e41, prod, n) /* table of devices that work with this driver */ static const struct usb_device_id pod_id_table[] = { { LINE6_DEVICE(0x4250), .driver_info = LINE6_BASSPODXT }, { LINE6_DEVICE(0x4642), .driver_info = LINE6_BASSPODXTLIVE }, { LINE6_DEVICE(0x4252), .driver_info = LINE6_BASSPODXTPRO }, { LINE6_IF_NUM(0x5051, 1), .driver_info = LINE6_POCKETPOD }, { LINE6_DEVICE(0x5044), .driver_info = LINE6_PODXT }, { LINE6_IF_NUM(0x4650, 0), .driver_info = LINE6_PODXTLIVE_POD }, { LINE6_DEVICE(0x5050), .driver_info = LINE6_PODXTPRO }, {} }; MODULE_DEVICE_TABLE(usb, pod_id_table); static const struct line6_properties pod_properties_table[] = { [LINE6_BASSPODXT] = { .id = "BassPODxt", .name = "BassPODxt", .capabilities = LINE6_CAP_CONTROL | LINE6_CAP_CONTROL_MIDI | LINE6_CAP_PCM | LINE6_CAP_HWMON, .altsetting = 5, .ep_ctrl_r = 0x84, .ep_ctrl_w = 0x03, .ep_audio_r = 0x82, .ep_audio_w = 0x01, }, [LINE6_BASSPODXTLIVE] = { .id = "BassPODxtLive", .name = "BassPODxt Live", .capabilities = LINE6_CAP_CONTROL | LINE6_CAP_CONTROL_MIDI | LINE6_CAP_PCM | LINE6_CAP_HWMON, .altsetting = 1, .ep_ctrl_r = 0x84, .ep_ctrl_w = 0x03, .ep_audio_r = 0x82, .ep_audio_w = 0x01, }, [LINE6_BASSPODXTPRO] = { .id = "BassPODxtPro", .name = "BassPODxt Pro", .capabilities = LINE6_CAP_CONTROL | LINE6_CAP_CONTROL_MIDI | LINE6_CAP_PCM | LINE6_CAP_HWMON, .altsetting = 5, .ep_ctrl_r = 0x84, .ep_ctrl_w = 0x03, .ep_audio_r = 0x82, .ep_audio_w = 0x01, }, [LINE6_POCKETPOD] = { .id = "PocketPOD", .name = "Pocket POD", .capabilities = LINE6_CAP_CONTROL | LINE6_CAP_CONTROL_MIDI, .altsetting = 0, .ep_ctrl_r = 0x82, .ep_ctrl_w = 0x02, /* no audio channel */ }, [LINE6_PODXT] = { .id = "PODxt", .name = "PODxt", .capabilities = LINE6_CAP_CONTROL | LINE6_CAP_CONTROL_MIDI | LINE6_CAP_PCM | LINE6_CAP_HWMON, .altsetting = 5, .ep_ctrl_r = 0x84, .ep_ctrl_w = 0x03, .ep_audio_r = 0x82, .ep_audio_w = 0x01, }, [LINE6_PODXTLIVE_POD] = { .id = "PODxtLive", .name = "PODxt Live", .capabilities = LINE6_CAP_CONTROL | LINE6_CAP_CONTROL_MIDI | LINE6_CAP_PCM | LINE6_CAP_HWMON, .altsetting = 1, .ep_ctrl_r = 0x84, .ep_ctrl_w = 0x03, .ep_audio_r = 0x82, .ep_audio_w = 0x01, }, [LINE6_PODXTPRO] = { .id = "PODxtPro", .name = "PODxt Pro", .capabilities = LINE6_CAP_CONTROL | LINE6_CAP_CONTROL_MIDI | LINE6_CAP_PCM | LINE6_CAP_HWMON, .altsetting = 5, .ep_ctrl_r = 0x84, .ep_ctrl_w = 0x03, .ep_audio_r = 0x82, .ep_audio_w = 0x01, }, }; /* Probe USB device. */ static int pod_probe(struct usb_interface *interface, const struct usb_device_id *id) { return line6_probe(interface, id, "Line6-POD", &pod_properties_table[id->driver_info], pod_init, sizeof(struct usb_line6_pod)); } static struct usb_driver pod_driver = { .name = KBUILD_MODNAME, .probe = pod_probe, .disconnect = line6_disconnect, #ifdef CONFIG_PM .suspend = line6_suspend, .resume = line6_resume, .reset_resume = line6_resume, #endif .id_table = pod_id_table, }; module_usb_driver(pod_driver); MODULE_DESCRIPTION("Line 6 POD USB driver"); MODULE_LICENSE("GPL");
421 608 421 601 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 // SPDX-License-Identifier: GPL-2.0-only /* * IPv6 packet mangling table, a port of the IPv4 mangle table to IPv6 * * Copyright (C) 2000-2001 by Harald Welte <laforge@gnumonks.org> * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/module.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> #include <net/ipv6.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("ip6tables mangle table"); #define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ (1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_MANGLE, }; static unsigned int ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct in6_addr saddr, daddr; unsigned int ret, verdict; u32 flowlabel, mark; u8 hop_limit; int err; /* save source/dest address, mark, hoplimit, flowlabel, priority, */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); mark = skb->mark; hop_limit = ipv6_hdr(skb)->hop_limit; /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); ret = ip6t_do_table(priv, skb, state); verdict = ret & NF_VERDICT_MASK; if (verdict != NF_DROP && verdict != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } return ret; } /* The work comes in here from netfilter.c. */ static unsigned int ip6table_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT) return ip6t_mangle_out(priv, skb, state); return ip6t_do_table(priv, skb, state); } static struct nf_hook_ops *mangle_ops __read_mostly; static int ip6table_mangle_table_init(struct net *net) { struct ip6t_replace *repl; int ret; repl = ip6t_alloc_initial_table(&packet_mangler); if (repl == NULL) return -ENOMEM; ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops); kfree(repl); return ret; } static void __net_exit ip6table_mangle_net_pre_exit(struct net *net) { ip6t_unregister_table_pre_exit(net, "mangle"); } static void __net_exit ip6table_mangle_net_exit(struct net *net) { ip6t_unregister_table_exit(net, "mangle"); } static struct pernet_operations ip6table_mangle_net_ops = { .pre_exit = ip6table_mangle_net_pre_exit, .exit = ip6table_mangle_net_exit, }; static int __init ip6table_mangle_init(void) { int ret = xt_register_template(&packet_mangler, ip6table_mangle_table_init); if (ret < 0) return ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); if (IS_ERR(mangle_ops)) { xt_unregister_template(&packet_mangler); return PTR_ERR(mangle_ops); } ret = register_pernet_subsys(&ip6table_mangle_net_ops); if (ret < 0) { xt_unregister_template(&packet_mangler); kfree(mangle_ops); return ret; } return ret; } static void __exit ip6table_mangle_fini(void) { unregister_pernet_subsys(&ip6table_mangle_net_ops); xt_unregister_template(&packet_mangler); kfree(mangle_ops); } module_init(ip6table_mangle_init); module_exit(ip6table_mangle_fini);
6 6 127 4 32 328 118 108 12 120 1 119 12 104 111 111 9 103 120 1 120 116 113 3 3 3 3 3 2 3 123 118 3 222 125 104 224 222 1 223 223 109 121 227 1 1 24 2 24 24 15 9 15 15 9 6 6 15 15 1 14 25 25 25 24 9 4 12 8 7 15 9 24 8 15 24 15 9 8 7 7 15 9 6 2790 2789 2791 2794 2791 2 2 2 2 2 2 6 6 6 8 8 1 2 5 4 1 2 3 2559 2562 2560 2559 1 2563 771 2558 2988 2537 2988 2544 2988 2987 2975 17 2989 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 // SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/file.c - kernfs file implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> */ #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> #include <linux/fsnotify.h> #include <linux/uio.h> #include "kernfs-internal.h" struct kernfs_open_node { struct rcu_head rcu_head; atomic_t event; wait_queue_head_t poll; struct list_head files; /* goes through kernfs_open_file.list */ unsigned int nr_mmapped; unsigned int nr_to_release; }; /* * kernfs_notify() may be called from any context and bounces notifications * through a work item. To minimize space overhead in kernfs_node, the * pending queue is implemented as a singly linked list of kernfs_nodes. * The list is terminated with the self pointer so that whether a * kernfs_node is on the list or not can be determined by testing the next * pointer for %NULL. */ #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) static DEFINE_SPINLOCK(kernfs_notify_lock); static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn) { int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS); return &kernfs_locks->open_file_mutex[idx]; } static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn) { struct mutex *lock; lock = kernfs_open_file_mutex_ptr(kn); mutex_lock(lock); return lock; } /** * of_on - Get the kernfs_open_node of the specified kernfs_open_file * @of: target kernfs_open_file * * Return: the kernfs_open_node of the kernfs_open_file */ static struct kernfs_open_node *of_on(struct kernfs_open_file *of) { return rcu_dereference_protected(of->kn->attr.open, !list_empty(&of->list)); } /* Get active reference to kernfs node for an open file */ static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of) { /* Skip if file was already released */ if (unlikely(of->released)) return NULL; if (!kernfs_get_active(of->kn)) return NULL; return of; } static void kernfs_put_active_of(struct kernfs_open_file *of) { return kernfs_put_active(of->kn); } /** * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn * * @kn: target kernfs_node. * * Fetch and return ->attr.open of @kn when caller holds the * kernfs_open_file_mutex_ptr(kn). * * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when * the caller guarantees that this mutex is being held, other updaters can't * change ->attr.open and this means that we can safely deref ->attr.open * outside RCU read-side critical section. * * The caller needs to make sure that kernfs_open_file_mutex is held. * * Return: @kn->attr.open when kernfs_open_file_mutex is held. */ static struct kernfs_open_node * kernfs_deref_open_node_locked(struct kernfs_node *kn) { return rcu_dereference_protected(kn->attr.open, lockdep_is_held(kernfs_open_file_mutex_ptr(kn))); } static struct kernfs_open_file *kernfs_of(struct file *file) { return ((struct seq_file *)file->private_data)->private; } /* * Determine the kernfs_ops for the given kernfs_node. This function must * be called while holding an active reference. */ static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) { if (kn->flags & KERNFS_LOCKDEP) lockdep_assert_held(kn); return kn->attr.ops; } /* * As kernfs_seq_stop() is also called after kernfs_seq_start() or * kernfs_seq_next() failure, it needs to distinguish whether it's stopping * a seq_file iteration which is fully initialized with an active reference * or an aborted kernfs_seq_start() due to get_active failure. The * position pointer is the only context for each seq_file iteration and * thus the stop condition should be encoded in it. As the return value is * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable * choice to indicate get_active failure. * * Unfortunately, this is complicated due to the optional custom seq_file * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or * custom seq_file operations and thus can't decide whether put_active * should be performed or not only on ERR_PTR(-ENODEV). * * This is worked around by factoring out the custom seq_stop() and * put_active part into kernfs_seq_stop_active(), skipping it from * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures * that kernfs_seq_stop_active() is skipped only after get_active failure. */ static void kernfs_seq_stop_active(struct seq_file *sf, void *v) { struct kernfs_open_file *of = sf->private; const struct kernfs_ops *ops = kernfs_ops(of->kn); if (ops->seq_stop) ops->seq_stop(sf, v); kernfs_put_active_of(of); } static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) { struct kernfs_open_file *of = sf->private; const struct kernfs_ops *ops; /* * @of->mutex nests outside active ref and is primarily to ensure that * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); if (!kernfs_get_active_of(of)) return ERR_PTR(-ENODEV); ops = kernfs_ops(of->kn); if (ops->seq_start) { void *next = ops->seq_start(sf, ppos); /* see the comment above kernfs_seq_stop_active() */ if (next == ERR_PTR(-ENODEV)) kernfs_seq_stop_active(sf, next); return next; } return single_start(sf, ppos); } static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) { struct kernfs_open_file *of = sf->private; const struct kernfs_ops *ops = kernfs_ops(of->kn); if (ops->seq_next) { void *next = ops->seq_next(sf, v, ppos); /* see the comment above kernfs_seq_stop_active() */ if (next == ERR_PTR(-ENODEV)) kernfs_seq_stop_active(sf, next); return next; } else { /* * The same behavior and code as single_open(), always * terminate after the initial read. */ ++*ppos; return NULL; } } static void kernfs_seq_stop(struct seq_file *sf, void *v) { struct kernfs_open_file *of = sf->private; if (v != ERR_PTR(-ENODEV)) kernfs_seq_stop_active(sf, v); mutex_unlock(&of->mutex); } static int kernfs_seq_show(struct seq_file *sf, void *v) { struct kernfs_open_file *of = sf->private; of->event = atomic_read(&of_on(of)->event); return of->kn->attr.ops->seq_show(sf, v); } static const struct seq_operations kernfs_seq_ops = { .start = kernfs_seq_start, .next = kernfs_seq_next, .stop = kernfs_seq_stop, .show = kernfs_seq_show, }; /* * As reading a bin file can have side-effects, the exact offset and bytes * specified in read(2) call should be passed to the read callback making * it difficult to use seq_file. Implement simplistic custom buffering for * bin files. */ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); const struct kernfs_ops *ops; char *buf; buf = of->prealloc_buf; if (buf) mutex_lock(&of->prealloc_mutex); else buf = kmalloc(len, GFP_KERNEL); if (!buf) return -ENOMEM; /* * @of->mutex nests outside active ref and is used both to ensure that * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); if (!kernfs_get_active_of(of)) { len = -ENODEV; mutex_unlock(&of->mutex); goto out_free; } of->event = atomic_read(&of_on(of)->event); ops = kernfs_ops(of->kn); if (ops->read) len = ops->read(of, buf, len, iocb->ki_pos); else len = -EINVAL; kernfs_put_active_of(of); mutex_unlock(&of->mutex); if (len < 0) goto out_free; if (copy_to_iter(buf, len, iter) != len) { len = -EFAULT; goto out_free; } iocb->ki_pos += len; out_free: if (buf == of->prealloc_buf) mutex_unlock(&of->prealloc_mutex); else kfree(buf); return len; } static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) { if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW) return seq_read_iter(iocb, iter); return kernfs_file_read_iter(iocb, iter); } /* * Copy data in from userland and pass it to the matching kernfs write * operation. * * There is no easy way for us to know if userspace is only doing a partial * write, so we don't support them. We expect the entire buffer to come on * the first write. Hint: if you're writing a value, first read the file, * modify only the value you're changing, then write entire buffer * back. */ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) { struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); ssize_t len = iov_iter_count(iter); const struct kernfs_ops *ops; char *buf; if (of->atomic_write_len) { if (len > of->atomic_write_len) return -E2BIG; } else { len = min_t(size_t, len, PAGE_SIZE); } buf = of->prealloc_buf; if (buf) mutex_lock(&of->prealloc_mutex); else buf = kmalloc(len + 1, GFP_KERNEL); if (!buf) return -ENOMEM; if (copy_from_iter(buf, len, iter) != len) { len = -EFAULT; goto out_free; } buf[len] = '\0'; /* guarantee string termination */ /* * @of->mutex nests outside active ref and is used both to ensure that * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); if (!kernfs_get_active_of(of)) { mutex_unlock(&of->mutex); len = -ENODEV; goto out_free; } ops = kernfs_ops(of->kn); if (ops->write) len = ops->write(of, buf, len, iocb->ki_pos); else len = -EINVAL; kernfs_put_active_of(of); mutex_unlock(&of->mutex); if (len > 0) iocb->ki_pos += len; out_free: if (buf == of->prealloc_buf) mutex_unlock(&of->prealloc_mutex); else kfree(buf); return len; } static void kernfs_vma_open(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); if (!of->vm_ops) return; if (!kernfs_get_active_of(of)) return; if (of->vm_ops->open) of->vm_ops->open(vma); kernfs_put_active_of(of); } static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); vm_fault_t ret; if (!of->vm_ops) return VM_FAULT_SIGBUS; if (!kernfs_get_active_of(of)) return VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS; if (of->vm_ops->fault) ret = of->vm_ops->fault(vmf); kernfs_put_active_of(of); return ret; } static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); vm_fault_t ret; if (!of->vm_ops) return VM_FAULT_SIGBUS; if (!kernfs_get_active_of(of)) return VM_FAULT_SIGBUS; ret = 0; if (of->vm_ops->page_mkwrite) ret = of->vm_ops->page_mkwrite(vmf); else file_update_time(file); kernfs_put_active_of(of); return ret; } static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write) { struct file *file = vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); int ret; if (!of->vm_ops) return -EINVAL; if (!kernfs_get_active_of(of)) return -EINVAL; ret = -EINVAL; if (of->vm_ops->access) ret = of->vm_ops->access(vma, addr, buf, len, write); kernfs_put_active_of(of); return ret; } static const struct vm_operations_struct kernfs_vm_ops = { .open = kernfs_vma_open, .fault = kernfs_vma_fault, .page_mkwrite = kernfs_vma_page_mkwrite, .access = kernfs_vma_access, }; static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; int rc; /* * mmap path and of->mutex are prone to triggering spurious lockdep * warnings and we don't want to add spurious locking dependency * between the two. Check whether mmap is actually implemented * without grabbing @of->mutex by testing HAS_MMAP flag. See the * comment in kernfs_fop_open() for more details. */ if (!(of->kn->flags & KERNFS_HAS_MMAP)) return -ENODEV; mutex_lock(&of->mutex); rc = -ENODEV; if (!kernfs_get_active_of(of)) goto out_unlock; ops = kernfs_ops(of->kn); rc = ops->mmap(of, vma); if (rc) goto out_put; /* * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() * to satisfy versions of X which crash if the mmap fails: that * substitutes a new vm_file, and we don't then want bin_vm_ops. */ if (vma->vm_file != file) goto out_put; rc = -EINVAL; if (of->mmapped && of->vm_ops != vma->vm_ops) goto out_put; /* * It is not possible to successfully wrap close. * So error if someone is trying to use close. */ if (vma->vm_ops && vma->vm_ops->close) goto out_put; rc = 0; if (!of->mmapped) { of->mmapped = true; of_on(of)->nr_mmapped++; of->vm_ops = vma->vm_ops; } vma->vm_ops = &kernfs_vm_ops; out_put: kernfs_put_active_of(of); out_unlock: mutex_unlock(&of->mutex); return rc; } /** * kernfs_get_open_node - get or create kernfs_open_node * @kn: target kernfs_node * @of: kernfs_open_file for this instance of open * * If @kn->attr.open exists, increment its reference count; otherwise, * create one. @of is chained to the files list. * * Locking: * Kernel thread context (may sleep). * * Return: * %0 on success, -errno on failure. */ static int kernfs_get_open_node(struct kernfs_node *kn, struct kernfs_open_file *of) { struct kernfs_open_node *on; struct mutex *mutex; mutex = kernfs_open_file_mutex_lock(kn); on = kernfs_deref_open_node_locked(kn); if (!on) { /* not there, initialize a new one */ on = kzalloc(sizeof(*on), GFP_KERNEL); if (!on) { mutex_unlock(mutex); return -ENOMEM; } atomic_set(&on->event, 1); init_waitqueue_head(&on->poll); INIT_LIST_HEAD(&on->files); rcu_assign_pointer(kn->attr.open, on); } list_add_tail(&of->list, &on->files); if (kn->flags & KERNFS_HAS_RELEASE) on->nr_to_release++; mutex_unlock(mutex); return 0; } /** * kernfs_unlink_open_file - Unlink @of from @kn. * * @kn: target kernfs_node * @of: associated kernfs_open_file * @open_failed: ->open() failed, cancel ->release() * * Unlink @of from list of @kn's associated open files. If list of * associated open files becomes empty, disassociate and free * kernfs_open_node. * * LOCKING: * None. */ static void kernfs_unlink_open_file(struct kernfs_node *kn, struct kernfs_open_file *of, bool open_failed) { struct kernfs_open_node *on; struct mutex *mutex; mutex = kernfs_open_file_mutex_lock(kn); on = kernfs_deref_open_node_locked(kn); if (!on) { mutex_unlock(mutex); return; } if (of) { if (kn->flags & KERNFS_HAS_RELEASE) { WARN_ON_ONCE(of->released == open_failed); if (open_failed) on->nr_to_release--; } if (of->mmapped) on->nr_mmapped--; list_del(&of->list); } if (list_empty(&on->files)) { rcu_assign_pointer(kn->attr.open, NULL); kfree_rcu(on, rcu_head); } mutex_unlock(mutex); } static int kernfs_fop_open(struct inode *inode, struct file *file) { struct kernfs_node *kn = inode->i_private; struct kernfs_root *root = kernfs_root(kn); const struct kernfs_ops *ops; struct kernfs_open_file *of; bool has_read, has_write, has_mmap; int error = -EACCES; if (!kernfs_get_active(kn)) return -ENODEV; ops = kernfs_ops(kn); has_read = ops->seq_show || ops->read || ops->mmap; has_write = ops->write || ops->mmap; has_mmap = ops->mmap; /* see the flag definition for details */ if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { if ((file->f_mode & FMODE_WRITE) && (!(inode->i_mode & S_IWUGO) || !has_write)) goto err_out; if ((file->f_mode & FMODE_READ) && (!(inode->i_mode & S_IRUGO) || !has_read)) goto err_out; } /* allocate a kernfs_open_file for the file */ error = -ENOMEM; of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); if (!of) goto err_out; /* * The following is done to give a different lockdep key to * @of->mutex for files which implement mmap. This is a rather * crude way to avoid false positive lockdep warning around * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under * which mm->mmap_lock nests, while holding @of->mutex. As each * open file has a separate mutex, it's okay as long as those don't * happen on the same file. At this point, we can't easily give * each file a separate locking class. Let's differentiate on * whether the file has mmap or not for now. * * For similar reasons, writable and readonly files are given different * lockdep key, because the writable file /sys/power/resume may call vfs * lookup helpers for arbitrary paths and readonly files can be read by * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs. * * All three cases look the same. They're supposed to * look that way and give @of->mutex different static lockdep keys. */ if (has_mmap) mutex_init(&of->mutex); else if (file->f_mode & FMODE_WRITE) mutex_init(&of->mutex); else mutex_init(&of->mutex); of->kn = kn; of->file = file; /* * Write path needs to atomic_write_len outside active reference. * Cache it in open_file. See kernfs_fop_write_iter() for details. */ of->atomic_write_len = ops->atomic_write_len; error = -EINVAL; /* * ->seq_show is incompatible with ->prealloc, * as seq_read does its own allocation. * ->read must be used instead. */ if (ops->prealloc && ops->seq_show) goto err_free; if (ops->prealloc) { int len = of->atomic_write_len ?: PAGE_SIZE; of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); error = -ENOMEM; if (!of->prealloc_buf) goto err_free; mutex_init(&of->prealloc_mutex); } /* * Always instantiate seq_file even if read access doesn't use * seq_file or is not requested. This unifies private data access * and readable regular files are the vast majority anyway. */ if (ops->seq_show) error = seq_open(file, &kernfs_seq_ops); else error = seq_open(file, NULL); if (error) goto err_free; of->seq_file = file->private_data; of->seq_file->private = of; /* seq_file clears PWRITE unconditionally, restore it if WRITE */ if (file->f_mode & FMODE_WRITE) file->f_mode |= FMODE_PWRITE; /* make sure we have open node struct */ error = kernfs_get_open_node(kn, of); if (error) goto err_seq_release; if (ops->open) { /* nobody has access to @of yet, skip @of->mutex */ error = ops->open(of); if (error) goto err_put_node; } /* open succeeded, put active references */ kernfs_put_active(kn); return 0; err_put_node: kernfs_unlink_open_file(kn, of, true); err_seq_release: seq_release(inode, file); err_free: kfree(of->prealloc_buf); kfree(of); err_out: kernfs_put_active(kn); return error; } /* used from release/drain to ensure that ->release() is called exactly once */ static void kernfs_release_file(struct kernfs_node *kn, struct kernfs_open_file *of) { /* * @of is guaranteed to have no other file operations in flight and * we just want to synchronize release and drain paths. * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used * here because drain path may be called from places which can * cause circular dependency. */ lockdep_assert_held(kernfs_open_file_mutex_ptr(kn)); if (!of->released) { /* * A file is never detached without being released and we * need to be able to release files which are deactivated * and being drained. Don't use kernfs_ops(). */ kn->attr.ops->release(of); of->released = true; of_on(of)->nr_to_release--; } } static int kernfs_fop_release(struct inode *inode, struct file *filp) { struct kernfs_node *kn = inode->i_private; struct kernfs_open_file *of = kernfs_of(filp); if (kn->flags & KERNFS_HAS_RELEASE) { struct mutex *mutex; mutex = kernfs_open_file_mutex_lock(kn); kernfs_release_file(kn, of); mutex_unlock(mutex); } kernfs_unlink_open_file(kn, of, false); seq_release(inode, filp); kfree(of->prealloc_buf); kfree(of); return 0; } bool kernfs_should_drain_open_files(struct kernfs_node *kn) { struct kernfs_open_node *on; bool ret; /* * @kn being deactivated guarantees that @kn->attr.open can't change * beneath us making the lockless test below safe. * Callers post kernfs_unbreak_active_protection may be counted in * kn->active by now, do not WARN_ON because of them. */ rcu_read_lock(); on = rcu_dereference(kn->attr.open); ret = on && (on->nr_mmapped || on->nr_to_release); rcu_read_unlock(); return ret; } void kernfs_drain_open_files(struct kernfs_node *kn) { struct kernfs_open_node *on; struct kernfs_open_file *of; struct mutex *mutex; mutex = kernfs_open_file_mutex_lock(kn); on = kernfs_deref_open_node_locked(kn); if (!on) { mutex_unlock(mutex); return; } list_for_each_entry(of, &on->files, list) { struct inode *inode = file_inode(of->file); if (of->mmapped) { unmap_mapping_range(inode->i_mapping, 0, 0, 1); of->mmapped = false; on->nr_mmapped--; } if (kn->flags & KERNFS_HAS_RELEASE) kernfs_release_file(kn, of); } WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release); mutex_unlock(mutex); } /* * Kernfs attribute files are pollable. The idea is that you read * the content and then you use 'poll' or 'select' to wait for * the content to change. When the content changes (assuming the * manager for the kobject supports notification), poll will * return EPOLLERR|EPOLLPRI, and select will return the fd whether * it is waiting for read, write, or exceptions. * Once poll/select indicates that the value has changed, you * need to close and re-open the file, or seek to 0 and read again. * Reminder: this only works for attributes which actively support * it, and it is not possible to test an attribute from userspace * to see if it supports poll (Neither 'poll' nor 'select' return * an appropriate error code). When in doubt, set a suitable timeout value. */ __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) { struct kernfs_open_node *on = of_on(of); poll_wait(of->file, &on->poll, wait); if (of->event != atomic_read(&on->event)) return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; return DEFAULT_POLLMASK; } static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) { struct kernfs_open_file *of = kernfs_of(filp); struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); __poll_t ret; if (!kernfs_get_active_of(of)) return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; if (kn->attr.ops->poll) ret = kn->attr.ops->poll(of, wait); else ret = kernfs_generic_poll(of, wait); kernfs_put_active_of(of); return ret; } static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence) { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; loff_t ret; /* * @of->mutex nests outside active ref and is primarily to ensure that * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); if (!kernfs_get_active_of(of)) { mutex_unlock(&of->mutex); return -ENODEV; } ops = kernfs_ops(of->kn); if (ops->llseek) ret = ops->llseek(of, offset, whence); else ret = generic_file_llseek(file, offset, whence); kernfs_put_active_of(of); mutex_unlock(&of->mutex); return ret; } static void kernfs_notify_workfn(struct work_struct *work) { struct kernfs_node *kn; struct kernfs_super_info *info; struct kernfs_root *root; repeat: /* pop one off the notify_list */ spin_lock_irq(&kernfs_notify_lock); kn = kernfs_notify_list; if (kn == KERNFS_NOTIFY_EOL) { spin_unlock_irq(&kernfs_notify_lock); return; } kernfs_notify_list = kn->attr.notify_next; kn->attr.notify_next = NULL; spin_unlock_irq(&kernfs_notify_lock); root = kernfs_root(kn); /* kick fsnotify */ down_read(&root->kernfs_supers_rwsem); down_read(&root->kernfs_rwsem); list_for_each_entry(info, &kernfs_root(kn)->supers, node) { struct kernfs_node *parent; struct inode *p_inode = NULL; const char *kn_name; struct inode *inode; struct qstr name; /* * We want fsnotify_modify() on @kn but as the * modifications aren't originating from userland don't * have the matching @file available. Look up the inodes * and generate the events manually. */ inode = ilookup(info->sb, kernfs_ino(kn)); if (!inode) continue; kn_name = kernfs_rcu_name(kn); name = QSTR(kn_name); parent = kernfs_get_parent(kn); if (parent) { p_inode = ilookup(info->sb, kernfs_ino(parent)); if (p_inode) { fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD, inode, FSNOTIFY_EVENT_INODE, p_inode, &name, inode, 0); iput(p_inode); } kernfs_put(parent); } if (!p_inode) fsnotify_inode(inode, FS_MODIFY); iput(inode); } up_read(&root->kernfs_rwsem); up_read(&root->kernfs_supers_rwsem); kernfs_put(kn); goto repeat; } /** * kernfs_notify - notify a kernfs file * @kn: file to notify * * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any * context. */ void kernfs_notify(struct kernfs_node *kn) { static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); unsigned long flags; struct kernfs_open_node *on; if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) return; /* kick poll immediately */ rcu_read_lock(); on = rcu_dereference(kn->attr.open); if (on) { atomic_inc(&on->event); wake_up_interruptible(&on->poll); } rcu_read_unlock(); /* schedule work to kick fsnotify */ spin_lock_irqsave(&kernfs_notify_lock, flags); if (!kn->attr.notify_next) { kernfs_get(kn); kn->attr.notify_next = kernfs_notify_list; kernfs_notify_list = kn; schedule_work(&kernfs_notify_work); } spin_unlock_irqrestore(&kernfs_notify_lock, flags); } EXPORT_SYMBOL_GPL(kernfs_notify); const struct file_operations kernfs_file_fops = { .read_iter = kernfs_fop_read_iter, .write_iter = kernfs_fop_write_iter, .llseek = kernfs_fop_llseek, .mmap = kernfs_fop_mmap, .open = kernfs_fop_open, .release = kernfs_fop_release, .poll = kernfs_fop_poll, .fsync = noop_fsync, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, }; /** * __kernfs_create_file - kernfs internal function to create a file * @parent: directory to create the file in * @name: name of the file * @mode: mode of the file * @uid: uid of the file * @gid: gid of the file * @size: size of the file * @ops: kernfs operations for the file * @priv: private data for the file * @ns: optional namespace tag of the file * @key: lockdep key for the file's active_ref, %NULL to disable lockdep * * Return: the created node on success, ERR_PTR() value on error. */ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key) { struct kernfs_node *kn; unsigned flags; int rc; flags = KERNFS_FILE; kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, uid, gid, flags); if (!kn) return ERR_PTR(-ENOMEM); kn->attr.ops = ops; kn->attr.size = size; kn->ns = ns; kn->priv = priv; #ifdef CONFIG_DEBUG_LOCK_ALLOC if (key) { lockdep_init_map(&kn->dep_map, "kn->active", key, 0); kn->flags |= KERNFS_LOCKDEP; } #endif /* * kn->attr.ops is accessible only while holding active ref. We * need to know whether some ops are implemented outside active * ref. Cache their existence in flags. */ if (ops->seq_show) kn->flags |= KERNFS_HAS_SEQ_SHOW; if (ops->mmap) kn->flags |= KERNFS_HAS_MMAP; if (ops->release) kn->flags |= KERNFS_HAS_RELEASE; rc = kernfs_add_one(kn); if (rc) { kernfs_put(kn); return ERR_PTR(rc); } return kn; }
741 740 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 // SPDX-License-Identifier: GPL-2.0 /* * High memory handling common code and variables. * * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de * * * Redesigned the x86 32-bit VM architecture to deal with * 64-bit physical space. With current x86 CPUs this * means up to 64 Gigabytes physical RAM. * * Rewrote high memory support to move the page cache into * high memory. Implemented permanent (schedulable) kmaps * based on Linus' idea. * * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> */ #include <linux/mm.h> #include <linux/export.h> #include <linux/swap.h> #include <linux/bio.h> #include <linux/pagemap.h> #include <linux/mempool.h> #include <linux/init.h> #include <linux/hash.h> #include <linux/highmem.h> #include <linux/kgdb.h> #include <asm/tlbflush.h> #include <linux/vmalloc.h> #ifdef CONFIG_KMAP_LOCAL static inline int kmap_local_calc_idx(int idx) { return idx + KM_MAX_IDX * smp_processor_id(); } #ifndef arch_kmap_local_map_idx #define arch_kmap_local_map_idx(idx, pfn) kmap_local_calc_idx(idx) #endif #endif /* CONFIG_KMAP_LOCAL */ /* * Virtual_count is not a pure "count". * 0 means that it is not mapped, and has not been mapped * since a TLB flush - it is usable. * 1 means that there are no users, but it has been mapped * since the last TLB flush - so we can't use it. * n means that there are (n-1) current users of it. */ #ifdef CONFIG_HIGHMEM /* * Architecture with aliasing data cache may define the following family of * helper functions in its asm/highmem.h to control cache color of virtual * addresses where physical memory pages are mapped by kmap. */ #ifndef get_pkmap_color /* * Determine color of virtual address where the page should be mapped. */ static inline unsigned int get_pkmap_color(const struct page *page) { return 0; } #define get_pkmap_color get_pkmap_color /* * Get next index for mapping inside PKMAP region for page with given color. */ static inline unsigned int get_next_pkmap_nr(unsigned int color) { static unsigned int last_pkmap_nr; last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; return last_pkmap_nr; } /* * Determine if page index inside PKMAP region (pkmap_nr) of given color * has wrapped around PKMAP region end. When this happens an attempt to * flush all unused PKMAP slots is made. */ static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color) { return pkmap_nr == 0; } /* * Get the number of PKMAP entries of the given color. If no free slot is * found after checking that many entries, kmap will sleep waiting for * someone to call kunmap and free PKMAP slot. */ static inline int get_pkmap_entries_count(unsigned int color) { return LAST_PKMAP; } /* * Get head of a wait queue for PKMAP entries of the given color. * Wait queues for different mapping colors should be independent to avoid * unnecessary wakeups caused by freeing of slots of other colors. */ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) { static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); return &pkmap_map_wait; } #endif unsigned long __nr_free_highpages(void) { unsigned long pages = 0; struct zone *zone; for_each_populated_zone(zone) { if (is_highmem(zone)) pages += zone_page_state(zone, NR_FREE_PAGES); } return pages; } unsigned long __totalhigh_pages(void) { unsigned long pages = 0; struct zone *zone; for_each_populated_zone(zone) { if (is_highmem(zone)) pages += zone_managed_pages(zone); } return pages; } EXPORT_SYMBOL(__totalhigh_pages); static int pkmap_count[LAST_PKMAP]; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); pte_t *pkmap_page_table; /* * Most architectures have no use for kmap_high_get(), so let's abstract * the disabling of IRQ out of the locking in that case to save on a * potential useless overhead. */ #ifdef ARCH_NEEDS_KMAP_HIGH_GET #define lock_kmap() spin_lock_irq(&kmap_lock) #define unlock_kmap() spin_unlock_irq(&kmap_lock) #define lock_kmap_any(flags) spin_lock_irqsave(&kmap_lock, flags) #define unlock_kmap_any(flags) spin_unlock_irqrestore(&kmap_lock, flags) #else #define lock_kmap() spin_lock(&kmap_lock) #define unlock_kmap() spin_unlock(&kmap_lock) #define lock_kmap_any(flags) \ do { spin_lock(&kmap_lock); (void)(flags); } while (0) #define unlock_kmap_any(flags) \ do { spin_unlock(&kmap_lock); (void)(flags); } while (0) #endif struct page *__kmap_to_page(void *vaddr) { unsigned long base = (unsigned long) vaddr & PAGE_MASK; struct kmap_ctrl *kctrl = &current->kmap_ctrl; unsigned long addr = (unsigned long)vaddr; int i; /* kmap() mappings */ if (WARN_ON_ONCE(addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP))) return pte_page(ptep_get(&pkmap_page_table[PKMAP_NR(addr)])); /* kmap_local_page() mappings */ if (WARN_ON_ONCE(base >= __fix_to_virt(FIX_KMAP_END) && base < __fix_to_virt(FIX_KMAP_BEGIN))) { for (i = 0; i < kctrl->idx; i++) { unsigned long base_addr; int idx; idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); base_addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); if (base_addr == base) return pte_page(kctrl->pteval[i]); } } return virt_to_page(vaddr); } EXPORT_SYMBOL(__kmap_to_page); static void flush_all_zero_pkmaps(void) { int i; int need_flush = 0; flush_cache_kmaps(); for (i = 0; i < LAST_PKMAP; i++) { struct page *page; pte_t ptent; /* * zero means we don't have anything to do, * >1 means that it is still in use. Only * a count of 1 means that it is free but * needs to be unmapped */ if (pkmap_count[i] != 1) continue; pkmap_count[i] = 0; /* sanity check */ ptent = ptep_get(&pkmap_page_table[i]); BUG_ON(pte_none(ptent)); /* * Don't need an atomic fetch-and-clear op here; * no-one has the page mapped, and cannot get at * its virtual address (and hence PTE) without first * getting the kmap_lock (which is held here). * So no dangers, even with speculative execution. */ page = pte_page(ptent); pte_clear(&init_mm, PKMAP_ADDR(i), &pkmap_page_table[i]); set_page_address(page, NULL); need_flush = 1; } if (need_flush) flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); } void __kmap_flush_unused(void) { lock_kmap(); flush_all_zero_pkmaps(); unlock_kmap(); } static inline unsigned long map_new_virtual(struct page *page) { unsigned long vaddr; int count; unsigned int last_pkmap_nr; unsigned int color = get_pkmap_color(page); start: count = get_pkmap_entries_count(color); /* Find an empty entry */ for (;;) { last_pkmap_nr = get_next_pkmap_nr(color); if (no_more_pkmaps(last_pkmap_nr, color)) { flush_all_zero_pkmaps(); count = get_pkmap_entries_count(color); } if (!pkmap_count[last_pkmap_nr]) break; /* Found a usable entry */ if (--count) continue; /* * Sleep for somebody else to unmap their entries */ { DECLARE_WAITQUEUE(wait, current); wait_queue_head_t *pkmap_map_wait = get_pkmap_wait_queue_head(color); __set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(pkmap_map_wait, &wait); unlock_kmap(); schedule(); remove_wait_queue(pkmap_map_wait, &wait); lock_kmap(); /* Somebody else might have mapped it while we slept */ if (page_address(page)) return (unsigned long)page_address(page); /* Re-start */ goto start; } } vaddr = PKMAP_ADDR(last_pkmap_nr); set_pte_at(&init_mm, vaddr, &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); pkmap_count[last_pkmap_nr] = 1; set_page_address(page, (void *)vaddr); return vaddr; } /** * kmap_high - map a highmem page into memory * @page: &struct page to map * * Returns the page's virtual memory address. * * We cannot call this from interrupts, as it may block. */ void *kmap_high(struct page *page) { unsigned long vaddr; /* * For highmem pages, we can't trust "virtual" until * after we have the lock. */ lock_kmap(); vaddr = (unsigned long)page_address(page); if (!vaddr) vaddr = map_new_virtual(page); pkmap_count[PKMAP_NR(vaddr)]++; BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2); unlock_kmap(); return (void *) vaddr; } EXPORT_SYMBOL(kmap_high); #ifdef ARCH_NEEDS_KMAP_HIGH_GET /** * kmap_high_get - pin a highmem page into memory * @page: &struct page to pin * * Returns the page's current virtual memory address, or NULL if no mapping * exists. If and only if a non null address is returned then a * matching call to kunmap_high() is necessary. * * This can be called from any context. */ void *kmap_high_get(const struct page *page) { unsigned long vaddr, flags; lock_kmap_any(flags); vaddr = (unsigned long)page_address(page); if (vaddr) { BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 1); pkmap_count[PKMAP_NR(vaddr)]++; } unlock_kmap_any(flags); return (void *) vaddr; } #endif /** * kunmap_high - unmap a highmem page into memory * @page: &struct page to unmap * * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called * only from user context. */ void kunmap_high(const struct page *page) { unsigned long vaddr; unsigned long nr; unsigned long flags; int need_wakeup; unsigned int color = get_pkmap_color(page); wait_queue_head_t *pkmap_map_wait; lock_kmap_any(flags); vaddr = (unsigned long)page_address(page); BUG_ON(!vaddr); nr = PKMAP_NR(vaddr); /* * A count must never go down to zero * without a TLB flush! */ need_wakeup = 0; switch (--pkmap_count[nr]) { case 0: BUG(); case 1: /* * Avoid an unnecessary wake_up() function call. * The common case is pkmap_count[] == 1, but * no waiters. * The tasks queued in the wait-queue are guarded * by both the lock in the wait-queue-head and by * the kmap_lock. As the kmap_lock is held here, * no need for the wait-queue-head's lock. Simply * test if the queue is empty. */ pkmap_map_wait = get_pkmap_wait_queue_head(color); need_wakeup = waitqueue_active(pkmap_map_wait); } unlock_kmap_any(flags); /* do wake-up, if needed, race-free outside of the spin lock */ if (need_wakeup) wake_up(pkmap_map_wait); } EXPORT_SYMBOL(kunmap_high); void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { unsigned int i; BUG_ON(end1 > page_size(page) || end2 > page_size(page)); if (start1 >= end1) start1 = end1 = 0; if (start2 >= end2) start2 = end2 = 0; for (i = 0; i < compound_nr(page); i++) { void *kaddr = NULL; if (start1 >= PAGE_SIZE) { start1 -= PAGE_SIZE; end1 -= PAGE_SIZE; } else { unsigned this_end = min_t(unsigned, end1, PAGE_SIZE); if (end1 > start1) { kaddr = kmap_local_page(page + i); memset(kaddr + start1, 0, this_end - start1); } end1 -= this_end; start1 = 0; } if (start2 >= PAGE_SIZE) { start2 -= PAGE_SIZE; end2 -= PAGE_SIZE; } else { unsigned this_end = min_t(unsigned, end2, PAGE_SIZE); if (end2 > start2) { if (!kaddr) kaddr = kmap_local_page(page + i); memset(kaddr + start2, 0, this_end - start2); } end2 -= this_end; start2 = 0; } if (kaddr) { kunmap_local(kaddr); flush_dcache_page(page + i); } if (!end1 && !end2) break; } BUG_ON((start1 | start2 | end1 | end2) != 0); } EXPORT_SYMBOL(zero_user_segments); #endif /* CONFIG_HIGHMEM */ #ifdef CONFIG_KMAP_LOCAL #include <asm/kmap_size.h> /* * With DEBUG_KMAP_LOCAL the stack depth is doubled and every second * slot is unused which acts as a guard page */ #ifdef CONFIG_DEBUG_KMAP_LOCAL # define KM_INCR 2 #else # define KM_INCR 1 #endif static inline int kmap_local_idx_push(void) { WARN_ON_ONCE(in_hardirq() && !irqs_disabled()); current->kmap_ctrl.idx += KM_INCR; BUG_ON(current->kmap_ctrl.idx >= KM_MAX_IDX); return current->kmap_ctrl.idx - 1; } static inline int kmap_local_idx(void) { return current->kmap_ctrl.idx - 1; } static inline void kmap_local_idx_pop(void) { current->kmap_ctrl.idx -= KM_INCR; BUG_ON(current->kmap_ctrl.idx < 0); } #ifndef arch_kmap_local_post_map # define arch_kmap_local_post_map(vaddr, pteval) do { } while (0) #endif #ifndef arch_kmap_local_pre_unmap # define arch_kmap_local_pre_unmap(vaddr) do { } while (0) #endif #ifndef arch_kmap_local_post_unmap # define arch_kmap_local_post_unmap(vaddr) do { } while (0) #endif #ifndef arch_kmap_local_unmap_idx #define arch_kmap_local_unmap_idx(idx, vaddr) kmap_local_calc_idx(idx) #endif #ifndef arch_kmap_local_high_get static inline void *arch_kmap_local_high_get(const struct page *page) { return NULL; } #endif #ifndef arch_kmap_local_set_pte #define arch_kmap_local_set_pte(mm, vaddr, ptep, ptev) \ set_pte_at(mm, vaddr, ptep, ptev) #endif /* Unmap a local mapping which was obtained by kmap_high_get() */ static inline bool kmap_high_unmap_local(unsigned long vaddr) { #ifdef ARCH_NEEDS_KMAP_HIGH_GET if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { kunmap_high(pte_page(ptep_get(&pkmap_page_table[PKMAP_NR(vaddr)]))); return true; } #endif return false; } static pte_t *__kmap_pte; static pte_t *kmap_get_pte(unsigned long vaddr, int idx) { if (IS_ENABLED(CONFIG_KMAP_LOCAL_NON_LINEAR_PTE_ARRAY)) /* * Set by the arch if __kmap_pte[-idx] does not produce * the correct entry. */ return virt_to_kpte(vaddr); if (!__kmap_pte) __kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); return &__kmap_pte[-idx]; } void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot) { pte_t pteval, *kmap_pte; unsigned long vaddr; int idx; /* * Disable migration so resulting virtual address is stable * across preemption. */ migrate_disable(); preempt_disable(); idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); kmap_pte = kmap_get_pte(vaddr, idx); BUG_ON(!pte_none(ptep_get(kmap_pte))); pteval = pfn_pte(pfn, prot); arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte, pteval); arch_kmap_local_post_map(vaddr, pteval); current->kmap_ctrl.pteval[kmap_local_idx()] = pteval; preempt_enable(); return (void *)vaddr; } EXPORT_SYMBOL_GPL(__kmap_local_pfn_prot); void *__kmap_local_page_prot(const struct page *page, pgprot_t prot) { void *kmap; /* * To broaden the usage of the actual kmap_local() machinery always map * pages when debugging is enabled and the architecture has no problems * with alias mappings. */ if (!IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) && !PageHighMem(page)) return page_address(page); /* Try kmap_high_get() if architecture has it enabled */ kmap = arch_kmap_local_high_get(page); if (kmap) return kmap; return __kmap_local_pfn_prot(page_to_pfn(page), prot); } EXPORT_SYMBOL(__kmap_local_page_prot); void kunmap_local_indexed(const void *vaddr) { unsigned long addr = (unsigned long) vaddr & PAGE_MASK; pte_t *kmap_pte; int idx; if (addr < __fix_to_virt(FIX_KMAP_END) || addr > __fix_to_virt(FIX_KMAP_BEGIN)) { if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP)) { /* This _should_ never happen! See above. */ WARN_ON_ONCE(1); return; } /* * Handle mappings which were obtained by kmap_high_get() * first as the virtual address of such mappings is below * PAGE_OFFSET. Warn for all other addresses which are in * the user space part of the virtual address space. */ if (!kmap_high_unmap_local(addr)) WARN_ON_ONCE(addr < PAGE_OFFSET); return; } preempt_disable(); idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr); WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); kmap_pte = kmap_get_pte(addr, idx); arch_kmap_local_pre_unmap(addr); pte_clear(&init_mm, addr, kmap_pte); arch_kmap_local_post_unmap(addr); current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0); kmap_local_idx_pop(); preempt_enable(); migrate_enable(); } EXPORT_SYMBOL(kunmap_local_indexed); /* * Invoked before switch_to(). This is safe even when during or after * clearing the maps an interrupt which needs a kmap_local happens because * the task::kmap_ctrl.idx is not modified by the unmapping code so a * nested kmap_local will use the next unused index and restore the index * on unmap. The already cleared kmaps of the outgoing task are irrelevant * because the interrupt context does not know about them. The same applies * when scheduling back in for an interrupt which happens before the * restore is complete. */ void __kmap_local_sched_out(void) { struct task_struct *tsk = current; pte_t *kmap_pte; int i; /* Clear kmaps */ for (i = 0; i < tsk->kmap_ctrl.idx; i++) { pte_t pteval = tsk->kmap_ctrl.pteval[i]; unsigned long addr; int idx; /* With debug all even slots are unmapped and act as guard */ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { WARN_ON_ONCE(pte_val(pteval) != 0); continue; } if (WARN_ON_ONCE(pte_none(pteval))) continue; /* * This is a horrible hack for XTENSA to calculate the * coloured PTE index. Uses the PFN encoded into the pteval * and the map index calculation because the actual mapped * virtual address is not stored in task::kmap_ctrl. * For any sane architecture this is optimized out. */ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); kmap_pte = kmap_get_pte(addr, idx); arch_kmap_local_pre_unmap(addr); pte_clear(&init_mm, addr, kmap_pte); arch_kmap_local_post_unmap(addr); } } void __kmap_local_sched_in(void) { struct task_struct *tsk = current; pte_t *kmap_pte; int i; /* Restore kmaps */ for (i = 0; i < tsk->kmap_ctrl.idx; i++) { pte_t pteval = tsk->kmap_ctrl.pteval[i]; unsigned long addr; int idx; /* With debug all even slots are unmapped and act as guard */ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { WARN_ON_ONCE(pte_val(pteval) != 0); continue; } if (WARN_ON_ONCE(pte_none(pteval))) continue; /* See comment in __kmap_local_sched_out() */ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); kmap_pte = kmap_get_pte(addr, idx); set_pte_at(&init_mm, addr, kmap_pte, pteval); arch_kmap_local_post_map(addr, pteval); } } void kmap_local_fork(struct task_struct *tsk) { if (WARN_ON_ONCE(tsk->kmap_ctrl.idx)) memset(&tsk->kmap_ctrl, 0, sizeof(tsk->kmap_ctrl)); } #endif #if defined(HASHED_PAGE_VIRTUAL) #define PA_HASH_ORDER 7 /* * Describes one page->virtual association */ struct page_address_map { struct page *page; void *virtual; struct list_head list; }; static struct page_address_map page_address_maps[LAST_PKMAP]; /* * Hash table bucket */ static struct page_address_slot { struct list_head lh; /* List of page_address_maps */ spinlock_t lock; /* Protect this bucket's list */ } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER]; static struct page_address_slot *page_slot(const struct page *page) { return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)]; } /** * page_address - get the mapped virtual address of a page * @page: &struct page to get the virtual address of * * Returns the page's virtual address. */ void *page_address(const struct page *page) { unsigned long flags; void *ret; struct page_address_slot *pas; if (!PageHighMem(page)) return lowmem_page_address(page); pas = page_slot(page); ret = NULL; spin_lock_irqsave(&pas->lock, flags); if (!list_empty(&pas->lh)) { struct page_address_map *pam; list_for_each_entry(pam, &pas->lh, list) { if (pam->page == page) { ret = pam->virtual; break; } } } spin_unlock_irqrestore(&pas->lock, flags); return ret; } EXPORT_SYMBOL(page_address); /** * set_page_address - set a page's virtual address * @page: &struct page to set * @virtual: virtual address to use */ void set_page_address(struct page *page, void *virtual) { unsigned long flags; struct page_address_slot *pas; struct page_address_map *pam; BUG_ON(!PageHighMem(page)); pas = page_slot(page); if (virtual) { /* Add */ pam = &page_address_maps[PKMAP_NR((unsigned long)virtual)]; pam->page = page; pam->virtual = virtual; spin_lock_irqsave(&pas->lock, flags); list_add_tail(&pam->list, &pas->lh); spin_unlock_irqrestore(&pas->lock, flags); } else { /* Remove */ spin_lock_irqsave(&pas->lock, flags); list_for_each_entry(pam, &pas->lh, list) { if (pam->page == page) { list_del(&pam->list); break; } } spin_unlock_irqrestore(&pas->lock, flags); } } void __init page_address_init(void) { int i; for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) { INIT_LIST_HEAD(&page_address_htable[i].lh); spin_lock_init(&page_address_htable[i].lock); } } #endif /* defined(HASHED_PAGE_VIRTUAL) */
2 1 2 2 2 5 2 4 3 3 5 1 5 1 4 5 5 4 6 3 4 2 6 6 6 3 5 1 10 1 1 1 5 7 7 6 6 4 5 5 5 3 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 // SPDX-License-Identifier: GPL-2.0-only /* CAN driver for Geschwister Schneider USB/CAN devices * and bytewerk.org candleLight USB CAN interfaces. * * Copyright (C) 2013-2016 Geschwister Schneider Technologie-, * Entwicklungs- und Vertriebs UG (Haftungsbeschränkt). * Copyright (C) 2016 Hubert Denkmair * Copyright (c) 2023 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de> * * Many thanks to all socketcan devs! */ #include <linux/bitfield.h> #include <linux/clocksource.h> #include <linux/ethtool.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/signal.h> #include <linux/timecounter.h> #include <linux/units.h> #include <linux/usb.h> #include <linux/workqueue.h> #include <linux/can.h> #include <linux/can/dev.h> #include <linux/can/error.h> #include <linux/can/rx-offload.h> /* Device specific constants */ #define USB_GS_USB_1_VENDOR_ID 0x1d50 #define USB_GS_USB_1_PRODUCT_ID 0x606f #define USB_CANDLELIGHT_VENDOR_ID 0x1209 #define USB_CANDLELIGHT_PRODUCT_ID 0x2323 #define USB_CES_CANEXT_FD_VENDOR_ID 0x1cd2 #define USB_CES_CANEXT_FD_PRODUCT_ID 0x606f #define USB_ABE_CANDEBUGGER_FD_VENDOR_ID 0x16d0 #define USB_ABE_CANDEBUGGER_FD_PRODUCT_ID 0x10b8 #define USB_XYLANTA_SAINT3_VENDOR_ID 0x16d0 #define USB_XYLANTA_SAINT3_PRODUCT_ID 0x0f30 #define USB_CANNECTIVITY_VENDOR_ID 0x1209 #define USB_CANNECTIVITY_PRODUCT_ID 0xca01 /* Timestamp 32 bit timer runs at 1 MHz (1 µs tick). Worker accounts * for timer overflow (will be after ~71 minutes) */ #define GS_USB_TIMESTAMP_TIMER_HZ (1 * HZ_PER_MHZ) #define GS_USB_TIMESTAMP_WORK_DELAY_SEC 1800 static_assert(GS_USB_TIMESTAMP_WORK_DELAY_SEC < CYCLECOUNTER_MASK(32) / GS_USB_TIMESTAMP_TIMER_HZ / 2); /* Device specific constants */ enum gs_usb_breq { GS_USB_BREQ_HOST_FORMAT = 0, GS_USB_BREQ_BITTIMING, GS_USB_BREQ_MODE, GS_USB_BREQ_BERR, GS_USB_BREQ_BT_CONST, GS_USB_BREQ_DEVICE_CONFIG, GS_USB_BREQ_TIMESTAMP, GS_USB_BREQ_IDENTIFY, GS_USB_BREQ_GET_USER_ID, GS_USB_BREQ_QUIRK_CANTACT_PRO_DATA_BITTIMING = GS_USB_BREQ_GET_USER_ID, GS_USB_BREQ_SET_USER_ID, GS_USB_BREQ_DATA_BITTIMING, GS_USB_BREQ_BT_CONST_EXT, GS_USB_BREQ_SET_TERMINATION, GS_USB_BREQ_GET_TERMINATION, GS_USB_BREQ_GET_STATE, }; enum gs_can_mode { /* reset a channel. turns it off */ GS_CAN_MODE_RESET = 0, /* starts a channel */ GS_CAN_MODE_START }; enum gs_can_state { GS_CAN_STATE_ERROR_ACTIVE = 0, GS_CAN_STATE_ERROR_WARNING, GS_CAN_STATE_ERROR_PASSIVE, GS_CAN_STATE_BUS_OFF, GS_CAN_STATE_STOPPED, GS_CAN_STATE_SLEEPING }; enum gs_can_identify_mode { GS_CAN_IDENTIFY_OFF = 0, GS_CAN_IDENTIFY_ON }; enum gs_can_termination_state { GS_CAN_TERMINATION_STATE_OFF = 0, GS_CAN_TERMINATION_STATE_ON }; #define GS_USB_TERMINATION_DISABLED CAN_TERMINATION_DISABLED #define GS_USB_TERMINATION_ENABLED 120 /* data types passed between host and device */ /* The firmware on the original USB2CAN by Geschwister Schneider * Technologie Entwicklungs- und Vertriebs UG exchanges all data * between the host and the device in host byte order. This is done * with the struct gs_host_config::byte_order member, which is sent * first to indicate the desired byte order. * * The widely used open source firmware candleLight doesn't support * this feature and exchanges the data in little endian byte order. */ struct gs_host_config { __le32 byte_order; } __packed; struct gs_device_config { u8 reserved1; u8 reserved2; u8 reserved3; u8 icount; __le32 sw_version; __le32 hw_version; } __packed; #define GS_CAN_MODE_NORMAL 0 #define GS_CAN_MODE_LISTEN_ONLY BIT(0) #define GS_CAN_MODE_LOOP_BACK BIT(1) #define GS_CAN_MODE_TRIPLE_SAMPLE BIT(2) #define GS_CAN_MODE_ONE_SHOT BIT(3) #define GS_CAN_MODE_HW_TIMESTAMP BIT(4) /* GS_CAN_FEATURE_IDENTIFY BIT(5) */ /* GS_CAN_FEATURE_USER_ID BIT(6) */ #define GS_CAN_MODE_PAD_PKTS_TO_MAX_PKT_SIZE BIT(7) #define GS_CAN_MODE_FD BIT(8) /* GS_CAN_FEATURE_REQ_USB_QUIRK_LPC546XX BIT(9) */ /* GS_CAN_FEATURE_BT_CONST_EXT BIT(10) */ /* GS_CAN_FEATURE_TERMINATION BIT(11) */ #define GS_CAN_MODE_BERR_REPORTING BIT(12) /* GS_CAN_FEATURE_GET_STATE BIT(13) */ struct gs_device_mode { __le32 mode; __le32 flags; } __packed; struct gs_device_state { __le32 state; __le32 rxerr; __le32 txerr; } __packed; struct gs_device_bittiming { __le32 prop_seg; __le32 phase_seg1; __le32 phase_seg2; __le32 sjw; __le32 brp; } __packed; struct gs_identify_mode { __le32 mode; } __packed; struct gs_device_termination_state { __le32 state; } __packed; #define GS_CAN_FEATURE_LISTEN_ONLY BIT(0) #define GS_CAN_FEATURE_LOOP_BACK BIT(1) #define GS_CAN_FEATURE_TRIPLE_SAMPLE BIT(2) #define GS_CAN_FEATURE_ONE_SHOT BIT(3) #define GS_CAN_FEATURE_HW_TIMESTAMP BIT(4) #define GS_CAN_FEATURE_IDENTIFY BIT(5) #define GS_CAN_FEATURE_USER_ID BIT(6) #define GS_CAN_FEATURE_PAD_PKTS_TO_MAX_PKT_SIZE BIT(7) #define GS_CAN_FEATURE_FD BIT(8) #define GS_CAN_FEATURE_REQ_USB_QUIRK_LPC546XX BIT(9) #define GS_CAN_FEATURE_BT_CONST_EXT BIT(10) #define GS_CAN_FEATURE_TERMINATION BIT(11) #define GS_CAN_FEATURE_BERR_REPORTING BIT(12) #define GS_CAN_FEATURE_GET_STATE BIT(13) #define GS_CAN_FEATURE_MASK GENMASK(13, 0) /* internal quirks - keep in GS_CAN_FEATURE space for now */ /* CANtact Pro original firmware: * BREQ DATA_BITTIMING overlaps with GET_USER_ID */ #define GS_CAN_FEATURE_QUIRK_BREQ_CANTACT_PRO BIT(31) struct gs_device_bt_const { __le32 feature; __le32 fclk_can; __le32 tseg1_min; __le32 tseg1_max; __le32 tseg2_min; __le32 tseg2_max; __le32 sjw_max; __le32 brp_min; __le32 brp_max; __le32 brp_inc; } __packed; struct gs_device_bt_const_extended { __le32 feature; __le32 fclk_can; __le32 tseg1_min; __le32 tseg1_max; __le32 tseg2_min; __le32 tseg2_max; __le32 sjw_max; __le32 brp_min; __le32 brp_max; __le32 brp_inc; __le32 dtseg1_min; __le32 dtseg1_max; __le32 dtseg2_min; __le32 dtseg2_max; __le32 dsjw_max; __le32 dbrp_min; __le32 dbrp_max; __le32 dbrp_inc; } __packed; #define GS_CAN_FLAG_OVERFLOW BIT(0) #define GS_CAN_FLAG_FD BIT(1) #define GS_CAN_FLAG_BRS BIT(2) #define GS_CAN_FLAG_ESI BIT(3) struct classic_can { u8 data[8]; } __packed; struct classic_can_ts { u8 data[8]; __le32 timestamp_us; } __packed; struct classic_can_quirk { u8 data[8]; u8 quirk; } __packed; struct canfd { u8 data[64]; } __packed; struct canfd_ts { u8 data[64]; __le32 timestamp_us; } __packed; struct canfd_quirk { u8 data[64]; u8 quirk; } __packed; /* struct gs_host_frame::echo_id == GS_HOST_FRAME_ECHO_ID_RX indicates * a regular RX'ed CAN frame */ #define GS_HOST_FRAME_ECHO_ID_RX 0xffffffff struct gs_host_frame { struct_group(header, u32 echo_id; __le32 can_id; u8 can_dlc; u8 channel; u8 flags; u8 reserved; ); union { DECLARE_FLEX_ARRAY(struct classic_can, classic_can); DECLARE_FLEX_ARRAY(struct classic_can_ts, classic_can_ts); DECLARE_FLEX_ARRAY(struct classic_can_quirk, classic_can_quirk); DECLARE_FLEX_ARRAY(struct canfd, canfd); DECLARE_FLEX_ARRAY(struct canfd_ts, canfd_ts); DECLARE_FLEX_ARRAY(struct canfd_quirk, canfd_quirk); }; } __packed; /* The GS USB devices make use of the same flags and masks as in * linux/can.h and linux/can/error.h, and no additional mapping is necessary. */ /* Only send a max of GS_MAX_TX_URBS frames per channel at a time. */ #define GS_MAX_TX_URBS 10 /* Only launch a max of GS_MAX_RX_URBS usb requests at a time. */ #define GS_MAX_RX_URBS 30 #define GS_NAPI_WEIGHT 32 struct gs_tx_context { struct gs_can *dev; unsigned int echo_id; }; struct gs_can { struct can_priv can; /* must be the first member */ struct can_rx_offload offload; struct gs_usb *parent; struct net_device *netdev; struct usb_device *udev; struct can_bittiming_const bt_const, data_bt_const; unsigned int channel; /* channel number */ u32 feature; unsigned int hf_size_tx; /* This lock prevents a race condition between xmit and receive. */ spinlock_t tx_ctx_lock; struct gs_tx_context tx_context[GS_MAX_TX_URBS]; struct usb_anchor tx_submitted; atomic_t active_tx_urbs; }; /* usb interface struct */ struct gs_usb { struct usb_anchor rx_submitted; struct usb_device *udev; /* time counter for hardware timestamps */ struct cyclecounter cc; struct timecounter tc; spinlock_t tc_lock; /* spinlock to guard access tc->cycle_last */ struct delayed_work timestamp; unsigned int hf_size_rx; u8 active_channels; u8 channel_cnt; unsigned int pipe_in; unsigned int pipe_out; struct gs_can *canch[] __counted_by(channel_cnt); }; /* 'allocate' a tx context. * returns a valid tx context or NULL if there is no space. */ static struct gs_tx_context *gs_alloc_tx_context(struct gs_can *dev) { int i = 0; unsigned long flags; spin_lock_irqsave(&dev->tx_ctx_lock, flags); for (; i < GS_MAX_TX_URBS; i++) { if (dev->tx_context[i].echo_id == GS_MAX_TX_URBS) { dev->tx_context[i].echo_id = i; spin_unlock_irqrestore(&dev->tx_ctx_lock, flags); return &dev->tx_context[i]; } } spin_unlock_irqrestore(&dev->tx_ctx_lock, flags); return NULL; } /* releases a tx context */ static void gs_free_tx_context(struct gs_tx_context *txc) { txc->echo_id = GS_MAX_TX_URBS; } /* Get a tx context by id. */ static struct gs_tx_context *gs_get_tx_context(struct gs_can *dev, unsigned int id) { unsigned long flags; if (id < GS_MAX_TX_URBS) { spin_lock_irqsave(&dev->tx_ctx_lock, flags); if (dev->tx_context[id].echo_id == id) { spin_unlock_irqrestore(&dev->tx_ctx_lock, flags); return &dev->tx_context[id]; } spin_unlock_irqrestore(&dev->tx_ctx_lock, flags); } return NULL; } static int gs_cmd_reset(struct gs_can *dev) { struct gs_device_mode dm = { .mode = cpu_to_le32(GS_CAN_MODE_RESET), }; return usb_control_msg_send(dev->udev, 0, GS_USB_BREQ_MODE, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, dev->channel, 0, &dm, sizeof(dm), 1000, GFP_KERNEL); } static inline int gs_usb_get_timestamp(const struct gs_usb *parent, u32 *timestamp_p) { __le32 timestamp; int rc; rc = usb_control_msg_recv(parent->udev, 0, GS_USB_BREQ_TIMESTAMP, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 0, 0, &timestamp, sizeof(timestamp), USB_CTRL_GET_TIMEOUT, GFP_KERNEL); if (rc) return rc; *timestamp_p = le32_to_cpu(timestamp); return 0; } static u64 gs_usb_timestamp_read(struct cyclecounter *cc) __must_hold(&dev->tc_lock) { struct gs_usb *parent = container_of(cc, struct gs_usb, cc); u32 timestamp = 0; int err; lockdep_assert_held(&parent->tc_lock); /* drop lock for synchronous USB transfer */ spin_unlock_bh(&parent->tc_lock); err = gs_usb_get_timestamp(parent, &timestamp); spin_lock_bh(&parent->tc_lock); if (err) dev_err(&parent->udev->dev, "Error %d while reading timestamp. HW timestamps may be inaccurate.", err); return timestamp; } static void gs_usb_timestamp_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct gs_usb *parent; parent = container_of(delayed_work, struct gs_usb, timestamp); spin_lock_bh(&parent->tc_lock); timecounter_read(&parent->tc); spin_unlock_bh(&parent->tc_lock); schedule_delayed_work(&parent->timestamp, GS_USB_TIMESTAMP_WORK_DELAY_SEC * HZ); } static void gs_usb_skb_set_timestamp(struct gs_can *dev, struct sk_buff *skb, u32 timestamp) { struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb); struct gs_usb *parent = dev->parent; u64 ns; spin_lock_bh(&parent->tc_lock); ns = timecounter_cyc2time(&parent->tc, timestamp); spin_unlock_bh(&parent->tc_lock); hwtstamps->hwtstamp = ns_to_ktime(ns); } static void gs_usb_timestamp_init(struct gs_usb *parent) { struct cyclecounter *cc = &parent->cc; cc->read = gs_usb_timestamp_read; cc->mask = CYCLECOUNTER_MASK(32); cc->shift = 32 - bits_per(NSEC_PER_SEC / GS_USB_TIMESTAMP_TIMER_HZ); cc->mult = clocksource_hz2mult(GS_USB_TIMESTAMP_TIMER_HZ, cc->shift); spin_lock_init(&parent->tc_lock); spin_lock_bh(&parent->tc_lock); timecounter_init(&parent->tc, &parent->cc, ktime_get_real_ns()); spin_unlock_bh(&parent->tc_lock); INIT_DELAYED_WORK(&parent->timestamp, gs_usb_timestamp_work); schedule_delayed_work(&parent->timestamp, GS_USB_TIMESTAMP_WORK_DELAY_SEC * HZ); } static void gs_usb_timestamp_stop(struct gs_usb *parent) { cancel_delayed_work_sync(&parent->timestamp); } static void gs_update_state(struct gs_can *dev, struct can_frame *cf) { struct can_device_stats *can_stats = &dev->can.can_stats; if (cf->can_id & CAN_ERR_RESTARTED) { dev->can.state = CAN_STATE_ERROR_ACTIVE; can_stats->restarts++; } else if (cf->can_id & CAN_ERR_BUSOFF) { dev->can.state = CAN_STATE_BUS_OFF; can_stats->bus_off++; } else if (cf->can_id & CAN_ERR_CRTL) { if ((cf->data[1] & CAN_ERR_CRTL_TX_WARNING) || (cf->data[1] & CAN_ERR_CRTL_RX_WARNING)) { dev->can.state = CAN_STATE_ERROR_WARNING; can_stats->error_warning++; } else if ((cf->data[1] & CAN_ERR_CRTL_TX_PASSIVE) || (cf->data[1] & CAN_ERR_CRTL_RX_PASSIVE)) { dev->can.state = CAN_STATE_ERROR_PASSIVE; can_stats->error_passive++; } else { dev->can.state = CAN_STATE_ERROR_ACTIVE; } } } static u32 gs_usb_set_timestamp(struct gs_can *dev, struct sk_buff *skb, const struct gs_host_frame *hf) { u32 timestamp; if (hf->flags & GS_CAN_FLAG_FD) timestamp = le32_to_cpu(hf->canfd_ts->timestamp_us); else timestamp = le32_to_cpu(hf->classic_can_ts->timestamp_us); if (skb) gs_usb_skb_set_timestamp(dev, skb, timestamp); return timestamp; } static void gs_usb_rx_offload(struct gs_can *dev, struct sk_buff *skb, const struct gs_host_frame *hf) { struct can_rx_offload *offload = &dev->offload; int rc; if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) { const u32 ts = gs_usb_set_timestamp(dev, skb, hf); rc = can_rx_offload_queue_timestamp(offload, skb, ts); } else { rc = can_rx_offload_queue_tail(offload, skb); } if (rc) dev->netdev->stats.rx_fifo_errors++; } static unsigned int gs_usb_get_echo_skb(struct gs_can *dev, struct sk_buff *skb, const struct gs_host_frame *hf) { struct can_rx_offload *offload = &dev->offload; const u32 echo_id = hf->echo_id; unsigned int len; if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) { const u32 ts = gs_usb_set_timestamp(dev, skb, hf); len = can_rx_offload_get_echo_skb_queue_timestamp(offload, echo_id, ts, NULL); } else { len = can_rx_offload_get_echo_skb_queue_tail(offload, echo_id, NULL); } return len; } static unsigned int gs_usb_get_minimum_rx_length(const struct gs_can *dev, const struct gs_host_frame *hf, unsigned int *data_length_p) { unsigned int minimum_length, data_length = 0; if (hf->flags & GS_CAN_FLAG_FD) { if (hf->echo_id == GS_HOST_FRAME_ECHO_ID_RX) data_length = can_fd_dlc2len(hf->can_dlc); if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) /* timestamp follows data field of max size */ minimum_length = struct_size(hf, canfd_ts, 1); else minimum_length = sizeof(hf->header) + data_length; } else { if (hf->echo_id == GS_HOST_FRAME_ECHO_ID_RX && !(hf->can_id & cpu_to_le32(CAN_RTR_FLAG))) data_length = can_cc_dlc2len(hf->can_dlc); if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) /* timestamp follows data field of max size */ minimum_length = struct_size(hf, classic_can_ts, 1); else minimum_length = sizeof(hf->header) + data_length; } *data_length_p = data_length; return minimum_length; } static void gs_usb_receive_bulk_callback(struct urb *urb) { struct gs_usb *parent = urb->context; struct gs_can *dev; struct net_device *netdev; int rc; struct net_device_stats *stats; struct gs_host_frame *hf = urb->transfer_buffer; unsigned int minimum_length, data_length; struct gs_tx_context *txc; struct can_frame *cf; struct canfd_frame *cfd; struct sk_buff *skb; BUG_ON(!parent); switch (urb->status) { case 0: /* success */ break; case -ENOENT: case -ESHUTDOWN: return; default: /* do not resubmit aborted urbs. eg: when device goes down */ return; } minimum_length = sizeof(hf->header); if (urb->actual_length < minimum_length) { dev_err_ratelimited(&parent->udev->dev, "short read (actual_length=%u, minimum_length=%u)\n", urb->actual_length, minimum_length); goto resubmit_urb; } /* device reports out of range channel id */ if (hf->channel >= parent->channel_cnt) goto device_detach; dev = parent->canch[hf->channel]; netdev = dev->netdev; stats = &netdev->stats; if (!netif_device_present(netdev)) return; if (!netif_running(netdev)) goto resubmit_urb; minimum_length = gs_usb_get_minimum_rx_length(dev, hf, &data_length); if (urb->actual_length < minimum_length) { stats->rx_errors++; stats->rx_length_errors++; if (net_ratelimit()) netdev_err(netdev, "short read (actual_length=%u, minimum_length=%u)\n", urb->actual_length, minimum_length); goto resubmit_urb; } if (hf->echo_id == GS_HOST_FRAME_ECHO_ID_RX) { /* normal rx */ if (hf->flags & GS_CAN_FLAG_FD) { skb = alloc_canfd_skb(netdev, &cfd); if (!skb) return; cfd->can_id = le32_to_cpu(hf->can_id); cfd->len = data_length; if (hf->flags & GS_CAN_FLAG_BRS) cfd->flags |= CANFD_BRS; if (hf->flags & GS_CAN_FLAG_ESI) cfd->flags |= CANFD_ESI; memcpy(cfd->data, hf->canfd->data, data_length); } else { skb = alloc_can_skb(netdev, &cf); if (!skb) return; cf->can_id = le32_to_cpu(hf->can_id); can_frame_set_cc_len(cf, hf->can_dlc, dev->can.ctrlmode); memcpy(cf->data, hf->classic_can->data, data_length); /* ERROR frames tell us information about the controller */ if (le32_to_cpu(hf->can_id) & CAN_ERR_FLAG) gs_update_state(dev, cf); } gs_usb_rx_offload(dev, skb, hf); } else { /* echo_id == hf->echo_id */ if (hf->echo_id >= GS_MAX_TX_URBS) { netdev_err(netdev, "Unexpected out of range echo id %u\n", hf->echo_id); goto resubmit_urb; } txc = gs_get_tx_context(dev, hf->echo_id); /* bad devices send bad echo_ids. */ if (!txc) { netdev_err(netdev, "Unexpected unused echo id %u\n", hf->echo_id); goto resubmit_urb; } skb = dev->can.echo_skb[hf->echo_id]; stats->tx_packets++; stats->tx_bytes += gs_usb_get_echo_skb(dev, skb, hf); gs_free_tx_context(txc); atomic_dec(&dev->active_tx_urbs); netif_wake_queue(netdev); } if (hf->flags & GS_CAN_FLAG_OVERFLOW) { stats->rx_over_errors++; stats->rx_errors++; skb = alloc_can_err_skb(netdev, &cf); if (!skb) goto resubmit_urb; cf->can_id |= CAN_ERR_CRTL; cf->len = CAN_ERR_DLC; cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; gs_usb_rx_offload(dev, skb, hf); } can_rx_offload_irq_finish(&dev->offload); resubmit_urb: usb_fill_bulk_urb(urb, parent->udev, parent->pipe_in, hf, parent->hf_size_rx, gs_usb_receive_bulk_callback, parent); usb_anchor_urb(urb, &parent->rx_submitted); rc = usb_submit_urb(urb, GFP_ATOMIC); /* USB failure take down all interfaces */ if (rc == -ENODEV) { device_detach: for (rc = 0; rc < parent->channel_cnt; rc++) { if (parent->canch[rc]) netif_device_detach(parent->canch[rc]->netdev); } } } static int gs_usb_set_bittiming(struct net_device *netdev) { struct gs_can *dev = netdev_priv(netdev); struct can_bittiming *bt = &dev->can.bittiming; struct gs_device_bittiming dbt = { .prop_seg = cpu_to_le32(bt->prop_seg), .phase_seg1 = cpu_to_le32(bt->phase_seg1), .phase_seg2 = cpu_to_le32(bt->phase_seg2), .sjw = cpu_to_le32(bt->sjw), .brp = cpu_to_le32(bt->brp), }; /* request bit timings */ return usb_control_msg_send(dev->udev, 0, GS_USB_BREQ_BITTIMING, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, dev->channel, 0, &dbt, sizeof(dbt), 1000, GFP_KERNEL); } static int gs_usb_set_data_bittiming(struct net_device *netdev) { struct gs_can *dev = netdev_priv(netdev); struct can_bittiming *bt = &dev->can.fd.data_bittiming; struct gs_device_bittiming dbt = { .prop_seg = cpu_to_le32(bt->prop_seg), .phase_seg1 = cpu_to_le32(bt->phase_seg1), .phase_seg2 = cpu_to_le32(bt->phase_seg2), .sjw = cpu_to_le32(bt->sjw), .brp = cpu_to_le32(bt->brp), }; u8 request = GS_USB_BREQ_DATA_BITTIMING; if (dev->feature & GS_CAN_FEATURE_QUIRK_BREQ_CANTACT_PRO) request = GS_USB_BREQ_QUIRK_CANTACT_PRO_DATA_BITTIMING; /* request data bit timings */ return usb_control_msg_send(dev->udev, 0, request, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, dev->channel, 0, &dbt, sizeof(dbt), 1000, GFP_KERNEL); } static void gs_usb_xmit_callback(struct urb *urb) { struct gs_tx_context *txc = urb->context; struct gs_can *dev = txc->dev; struct net_device *netdev = dev->netdev; if (!urb->status) return; if (urb->status != -ESHUTDOWN && net_ratelimit()) netdev_info(netdev, "failed to xmit URB %u: %pe\n", txc->echo_id, ERR_PTR(urb->status)); netdev->stats.tx_dropped++; netdev->stats.tx_errors++; can_free_echo_skb(netdev, txc->echo_id, NULL); gs_free_tx_context(txc); atomic_dec(&dev->active_tx_urbs); netif_wake_queue(netdev); } static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct gs_can *dev = netdev_priv(netdev); struct net_device_stats *stats = &dev->netdev->stats; struct urb *urb; struct gs_host_frame *hf; struct can_frame *cf; struct canfd_frame *cfd; int rc; unsigned int idx; struct gs_tx_context *txc; if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; /* find an empty context to keep track of transmission */ txc = gs_alloc_tx_context(dev); if (!txc) return NETDEV_TX_BUSY; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) goto nomem_urb; hf = kmalloc(dev->hf_size_tx, GFP_ATOMIC); if (!hf) goto nomem_hf; idx = txc->echo_id; if (idx >= GS_MAX_TX_URBS) { netdev_err(netdev, "Invalid tx context %u\n", idx); goto badidx; } hf->echo_id = idx; hf->channel = dev->channel; hf->flags = 0; hf->reserved = 0; if (can_is_canfd_skb(skb)) { cfd = (struct canfd_frame *)skb->data; hf->can_id = cpu_to_le32(cfd->can_id); hf->can_dlc = can_fd_len2dlc(cfd->len); hf->flags |= GS_CAN_FLAG_FD; if (cfd->flags & CANFD_BRS) hf->flags |= GS_CAN_FLAG_BRS; if (cfd->flags & CANFD_ESI) hf->flags |= GS_CAN_FLAG_ESI; memcpy(hf->canfd->data, cfd->data, cfd->len); } else { cf = (struct can_frame *)skb->data; hf->can_id = cpu_to_le32(cf->can_id); hf->can_dlc = can_get_cc_dlc(cf, dev->can.ctrlmode); memcpy(hf->classic_can->data, cf->data, cf->len); } usb_fill_bulk_urb(urb, dev->udev, dev->parent->pipe_out, hf, dev->hf_size_tx, gs_usb_xmit_callback, txc); urb->transfer_flags |= URB_FREE_BUFFER; usb_anchor_urb(urb, &dev->tx_submitted); can_put_echo_skb(skb, netdev, idx, 0); atomic_inc(&dev->active_tx_urbs); rc = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(rc)) { /* usb send failed */ atomic_dec(&dev->active_tx_urbs); can_free_echo_skb(netdev, idx, NULL); gs_free_tx_context(txc); usb_unanchor_urb(urb); if (rc == -ENODEV) { netif_device_detach(netdev); } else { netdev_err(netdev, "usb_submit failed (err=%d)\n", rc); stats->tx_dropped++; } } else { /* Slow down tx path */ if (atomic_read(&dev->active_tx_urbs) >= GS_MAX_TX_URBS) netif_stop_queue(netdev); } /* let usb core take care of this urb */ usb_free_urb(urb); return NETDEV_TX_OK; badidx: kfree(hf); nomem_hf: usb_free_urb(urb); nomem_urb: gs_free_tx_context(txc); dev_kfree_skb(skb); stats->tx_dropped++; return NETDEV_TX_OK; } static int gs_can_open(struct net_device *netdev) { struct gs_can *dev = netdev_priv(netdev); struct gs_usb *parent = dev->parent; struct gs_device_mode dm = { .mode = cpu_to_le32(GS_CAN_MODE_START), }; struct gs_host_frame *hf; struct urb *urb = NULL; u32 ctrlmode; u32 flags = 0; int rc, i; rc = open_candev(netdev); if (rc) return rc; ctrlmode = dev->can.ctrlmode; if (ctrlmode & CAN_CTRLMODE_FD) { if (dev->feature & GS_CAN_FEATURE_REQ_USB_QUIRK_LPC546XX) dev->hf_size_tx = struct_size(hf, canfd_quirk, 1); else dev->hf_size_tx = struct_size(hf, canfd, 1); } else { if (dev->feature & GS_CAN_FEATURE_REQ_USB_QUIRK_LPC546XX) dev->hf_size_tx = struct_size(hf, classic_can_quirk, 1); else dev->hf_size_tx = struct_size(hf, classic_can, 1); } can_rx_offload_enable(&dev->offload); if (!parent->active_channels) { if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) gs_usb_timestamp_init(parent); for (i = 0; i < GS_MAX_RX_URBS; i++) { u8 *buf; /* alloc rx urb */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { rc = -ENOMEM; goto out_usb_kill_anchored_urbs; } /* alloc rx buffer */ buf = kmalloc(dev->parent->hf_size_rx, GFP_KERNEL); if (!buf) { rc = -ENOMEM; goto out_usb_free_urb; } /* fill, anchor, and submit rx urb */ usb_fill_bulk_urb(urb, dev->udev, dev->parent->pipe_in, buf, dev->parent->hf_size_rx, gs_usb_receive_bulk_callback, parent); urb->transfer_flags |= URB_FREE_BUFFER; usb_anchor_urb(urb, &parent->rx_submitted); rc = usb_submit_urb(urb, GFP_KERNEL); if (rc) { if (rc == -ENODEV) netif_device_detach(dev->netdev); netdev_err(netdev, "usb_submit_urb() failed, error %pe\n", ERR_PTR(rc)); goto out_usb_unanchor_urb; } /* Drop reference, * USB core will take care of freeing it */ usb_free_urb(urb); } } /* flags */ if (ctrlmode & CAN_CTRLMODE_LOOPBACK) flags |= GS_CAN_MODE_LOOP_BACK; if (ctrlmode & CAN_CTRLMODE_LISTENONLY) flags |= GS_CAN_MODE_LISTEN_ONLY; if (ctrlmode & CAN_CTRLMODE_3_SAMPLES) flags |= GS_CAN_MODE_TRIPLE_SAMPLE; if (ctrlmode & CAN_CTRLMODE_ONE_SHOT) flags |= GS_CAN_MODE_ONE_SHOT; if (ctrlmode & CAN_CTRLMODE_BERR_REPORTING) flags |= GS_CAN_MODE_BERR_REPORTING; if (ctrlmode & CAN_CTRLMODE_FD) flags |= GS_CAN_MODE_FD; /* if hardware supports timestamps, enable it */ if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) flags |= GS_CAN_MODE_HW_TIMESTAMP; /* finally start device */ dev->can.state = CAN_STATE_ERROR_ACTIVE; dm.flags = cpu_to_le32(flags); rc = usb_control_msg_send(dev->udev, 0, GS_USB_BREQ_MODE, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, dev->channel, 0, &dm, sizeof(dm), 1000, GFP_KERNEL); if (rc) { netdev_err(netdev, "Couldn't start device (err=%d)\n", rc); dev->can.state = CAN_STATE_STOPPED; goto out_usb_kill_anchored_urbs; } parent->active_channels++; if (!(dev->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)) netif_start_queue(netdev); return 0; out_usb_unanchor_urb: usb_unanchor_urb(urb); out_usb_free_urb: usb_free_urb(urb); out_usb_kill_anchored_urbs: if (!parent->active_channels) { usb_kill_anchored_urbs(&parent->rx_submitted); if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) gs_usb_timestamp_stop(parent); } can_rx_offload_disable(&dev->offload); close_candev(netdev); return rc; } static int gs_usb_get_state(const struct net_device *netdev, struct can_berr_counter *bec, enum can_state *state) { struct gs_can *dev = netdev_priv(netdev); struct gs_device_state ds; int rc; rc = usb_control_msg_recv(dev->udev, 0, GS_USB_BREQ_GET_STATE, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, dev->channel, 0, &ds, sizeof(ds), USB_CTRL_GET_TIMEOUT, GFP_KERNEL); if (rc) return rc; if (le32_to_cpu(ds.state) >= CAN_STATE_MAX) return -EOPNOTSUPP; *state = le32_to_cpu(ds.state); bec->txerr = le32_to_cpu(ds.txerr); bec->rxerr = le32_to_cpu(ds.rxerr); return 0; } static int gs_usb_can_get_berr_counter(const struct net_device *netdev, struct can_berr_counter *bec) { enum can_state state; return gs_usb_get_state(netdev, bec, &state); } static int gs_can_close(struct net_device *netdev) { int rc; struct gs_can *dev = netdev_priv(netdev); struct gs_usb *parent = dev->parent; netif_stop_queue(netdev); /* Stop polling */ parent->active_channels--; if (!parent->active_channels) { usb_kill_anchored_urbs(&parent->rx_submitted); if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) gs_usb_timestamp_stop(parent); } /* Stop sending URBs */ usb_kill_anchored_urbs(&dev->tx_submitted); atomic_set(&dev->active_tx_urbs, 0); dev->can.state = CAN_STATE_STOPPED; /* reset the device */ gs_cmd_reset(dev); /* reset tx contexts */ for (rc = 0; rc < GS_MAX_TX_URBS; rc++) { dev->tx_context[rc].dev = dev; dev->tx_context[rc].echo_id = GS_MAX_TX_URBS; } can_rx_offload_disable(&dev->offload); /* close the netdev */ close_candev(netdev); return 0; } static int gs_can_hwtstamp_get(struct net_device *netdev, struct kernel_hwtstamp_config *cfg) { const struct gs_can *dev = netdev_priv(netdev); if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) return can_hwtstamp_get(netdev, cfg); return -EOPNOTSUPP; } static int gs_can_hwtstamp_set(struct net_device *netdev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { const struct gs_can *dev = netdev_priv(netdev); if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) return can_hwtstamp_set(netdev, cfg, extack); return -EOPNOTSUPP; } static const struct net_device_ops gs_usb_netdev_ops = { .ndo_open = gs_can_open, .ndo_stop = gs_can_close, .ndo_start_xmit = gs_can_start_xmit, .ndo_hwtstamp_get = gs_can_hwtstamp_get, .ndo_hwtstamp_set = gs_can_hwtstamp_set, }; static int gs_usb_set_identify(struct net_device *netdev, bool do_identify) { struct gs_can *dev = netdev_priv(netdev); struct gs_identify_mode imode; if (do_identify) imode.mode = cpu_to_le32(GS_CAN_IDENTIFY_ON); else imode.mode = cpu_to_le32(GS_CAN_IDENTIFY_OFF); return usb_control_msg_send(dev->udev, 0, GS_USB_BREQ_IDENTIFY, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, dev->channel, 0, &imode, sizeof(imode), 100, GFP_KERNEL); } /* blink LED's for finding the this interface */ static int gs_usb_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { const struct gs_can *dev = netdev_priv(netdev); int rc = 0; if (!(dev->feature & GS_CAN_FEATURE_IDENTIFY)) return -EOPNOTSUPP; switch (state) { case ETHTOOL_ID_ACTIVE: rc = gs_usb_set_identify(netdev, GS_CAN_IDENTIFY_ON); break; case ETHTOOL_ID_INACTIVE: rc = gs_usb_set_identify(netdev, GS_CAN_IDENTIFY_OFF); break; default: break; } return rc; } static int gs_usb_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *info) { struct gs_can *dev = netdev_priv(netdev); /* report if device supports HW timestamps */ if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) return can_ethtool_op_get_ts_info_hwts(netdev, info); return ethtool_op_get_ts_info(netdev, info); } static const struct ethtool_ops gs_usb_ethtool_ops = { .set_phys_id = gs_usb_set_phys_id, .get_ts_info = gs_usb_get_ts_info, }; static int gs_usb_get_termination(struct net_device *netdev, u16 *term) { struct gs_can *dev = netdev_priv(netdev); struct gs_device_termination_state term_state; int rc; rc = usb_control_msg_recv(dev->udev, 0, GS_USB_BREQ_GET_TERMINATION, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, dev->channel, 0, &term_state, sizeof(term_state), 1000, GFP_KERNEL); if (rc) return rc; if (term_state.state == cpu_to_le32(GS_CAN_TERMINATION_STATE_ON)) *term = GS_USB_TERMINATION_ENABLED; else *term = GS_USB_TERMINATION_DISABLED; return 0; } static int gs_usb_set_termination(struct net_device *netdev, u16 term) { struct gs_can *dev = netdev_priv(netdev); struct gs_device_termination_state term_state; if (term == GS_USB_TERMINATION_ENABLED) term_state.state = cpu_to_le32(GS_CAN_TERMINATION_STATE_ON); else term_state.state = cpu_to_le32(GS_CAN_TERMINATION_STATE_OFF); return usb_control_msg_send(dev->udev, 0, GS_USB_BREQ_SET_TERMINATION, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, dev->channel, 0, &term_state, sizeof(term_state), 1000, GFP_KERNEL); } static const u16 gs_usb_termination_const[] = { GS_USB_TERMINATION_DISABLED, GS_USB_TERMINATION_ENABLED }; static struct gs_can *gs_make_candev(unsigned int channel, struct usb_interface *intf, struct gs_device_config *dconf) { struct gs_can *dev; struct net_device *netdev; int rc; struct gs_device_bt_const_extended bt_const_extended; struct gs_device_bt_const bt_const; u32 feature; /* fetch bit timing constants */ rc = usb_control_msg_recv(interface_to_usbdev(intf), 0, GS_USB_BREQ_BT_CONST, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, channel, 0, &bt_const, sizeof(bt_const), 1000, GFP_KERNEL); if (rc) { dev_err(&intf->dev, "Couldn't get bit timing const for channel %d (%pe)\n", channel, ERR_PTR(rc)); return ERR_PTR(rc); } /* create netdev */ netdev = alloc_candev(sizeof(struct gs_can), GS_MAX_TX_URBS); if (!netdev) { dev_err(&intf->dev, "Couldn't allocate candev\n"); return ERR_PTR(-ENOMEM); } dev = netdev_priv(netdev); netdev->netdev_ops = &gs_usb_netdev_ops; netdev->ethtool_ops = &gs_usb_ethtool_ops; netdev->flags |= IFF_ECHO; /* we support full roundtrip echo */ netdev->dev_id = channel; netdev->dev_port = channel; /* dev setup */ strcpy(dev->bt_const.name, KBUILD_MODNAME); dev->bt_const.tseg1_min = le32_to_cpu(bt_const.tseg1_min); dev->bt_const.tseg1_max = le32_to_cpu(bt_const.tseg1_max); dev->bt_const.tseg2_min = le32_to_cpu(bt_const.tseg2_min); dev->bt_const.tseg2_max = le32_to_cpu(bt_const.tseg2_max); dev->bt_const.sjw_max = le32_to_cpu(bt_const.sjw_max); dev->bt_const.brp_min = le32_to_cpu(bt_const.brp_min); dev->bt_const.brp_max = le32_to_cpu(bt_const.brp_max); dev->bt_const.brp_inc = le32_to_cpu(bt_const.brp_inc); dev->udev = interface_to_usbdev(intf); dev->netdev = netdev; dev->channel = channel; init_usb_anchor(&dev->tx_submitted); atomic_set(&dev->active_tx_urbs, 0); spin_lock_init(&dev->tx_ctx_lock); for (rc = 0; rc < GS_MAX_TX_URBS; rc++) { dev->tx_context[rc].dev = dev; dev->tx_context[rc].echo_id = GS_MAX_TX_URBS; } /* can setup */ dev->can.state = CAN_STATE_STOPPED; dev->can.clock.freq = le32_to_cpu(bt_const.fclk_can); dev->can.bittiming_const = &dev->bt_const; dev->can.do_set_bittiming = gs_usb_set_bittiming; dev->can.ctrlmode_supported = CAN_CTRLMODE_CC_LEN8_DLC; feature = le32_to_cpu(bt_const.feature); dev->feature = FIELD_GET(GS_CAN_FEATURE_MASK, feature); if (feature & GS_CAN_FEATURE_LISTEN_ONLY) dev->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY; if (feature & GS_CAN_FEATURE_LOOP_BACK) dev->can.ctrlmode_supported |= CAN_CTRLMODE_LOOPBACK; if (feature & GS_CAN_FEATURE_TRIPLE_SAMPLE) dev->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; if (feature & GS_CAN_FEATURE_ONE_SHOT) dev->can.ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT; if (feature & GS_CAN_FEATURE_FD) { dev->can.ctrlmode_supported |= CAN_CTRLMODE_FD; /* The data bit timing will be overwritten, if * GS_CAN_FEATURE_BT_CONST_EXT is set. */ dev->can.fd.data_bittiming_const = &dev->bt_const; dev->can.fd.do_set_data_bittiming = gs_usb_set_data_bittiming; } if (feature & GS_CAN_FEATURE_TERMINATION) { rc = gs_usb_get_termination(netdev, &dev->can.termination); if (rc) { dev->feature &= ~GS_CAN_FEATURE_TERMINATION; dev_info(&intf->dev, "Disabling termination support for channel %d (%pe)\n", channel, ERR_PTR(rc)); } else { dev->can.termination_const = gs_usb_termination_const; dev->can.termination_const_cnt = ARRAY_SIZE(gs_usb_termination_const); dev->can.do_set_termination = gs_usb_set_termination; } } if (feature & GS_CAN_FEATURE_BERR_REPORTING) dev->can.ctrlmode_supported |= CAN_CTRLMODE_BERR_REPORTING; if (feature & GS_CAN_FEATURE_GET_STATE) dev->can.do_get_berr_counter = gs_usb_can_get_berr_counter; /* The CANtact Pro from LinkLayer Labs is based on the * LPC54616 µC, which is affected by the NXP LPC USB transfer * erratum. However, the current firmware (version 2) doesn't * set the GS_CAN_FEATURE_REQ_USB_QUIRK_LPC546XX bit. Set the * feature GS_CAN_FEATURE_REQ_USB_QUIRK_LPC546XX to workaround * this issue. * * For the GS_USB_BREQ_DATA_BITTIMING USB control message the * CANtact Pro firmware uses a request value, which is already * used by the candleLight firmware for a different purpose * (GS_USB_BREQ_GET_USER_ID). Set the feature * GS_CAN_FEATURE_QUIRK_BREQ_CANTACT_PRO to workaround this * issue. */ if (dev->udev->descriptor.idVendor == cpu_to_le16(USB_GS_USB_1_VENDOR_ID) && dev->udev->descriptor.idProduct == cpu_to_le16(USB_GS_USB_1_PRODUCT_ID) && dev->udev->manufacturer && dev->udev->product && !strcmp(dev->udev->manufacturer, "LinkLayer Labs") && !strcmp(dev->udev->product, "CANtact Pro") && (le32_to_cpu(dconf->sw_version) <= 2)) dev->feature |= GS_CAN_FEATURE_REQ_USB_QUIRK_LPC546XX | GS_CAN_FEATURE_QUIRK_BREQ_CANTACT_PRO; /* GS_CAN_FEATURE_IDENTIFY is only supported for sw_version > 1 */ if (!(le32_to_cpu(dconf->sw_version) > 1 && feature & GS_CAN_FEATURE_IDENTIFY)) dev->feature &= ~GS_CAN_FEATURE_IDENTIFY; /* fetch extended bit timing constants if device has feature * GS_CAN_FEATURE_FD and GS_CAN_FEATURE_BT_CONST_EXT */ if (feature & GS_CAN_FEATURE_FD && feature & GS_CAN_FEATURE_BT_CONST_EXT) { rc = usb_control_msg_recv(interface_to_usbdev(intf), 0, GS_USB_BREQ_BT_CONST_EXT, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, channel, 0, &bt_const_extended, sizeof(bt_const_extended), 1000, GFP_KERNEL); if (rc) { dev_err(&intf->dev, "Couldn't get extended bit timing const for channel %d (%pe)\n", channel, ERR_PTR(rc)); goto out_free_candev; } strcpy(dev->data_bt_const.name, KBUILD_MODNAME); dev->data_bt_const.tseg1_min = le32_to_cpu(bt_const_extended.dtseg1_min); dev->data_bt_const.tseg1_max = le32_to_cpu(bt_const_extended.dtseg1_max); dev->data_bt_const.tseg2_min = le32_to_cpu(bt_const_extended.dtseg2_min); dev->data_bt_const.tseg2_max = le32_to_cpu(bt_const_extended.dtseg2_max); dev->data_bt_const.sjw_max = le32_to_cpu(bt_const_extended.dsjw_max); dev->data_bt_const.brp_min = le32_to_cpu(bt_const_extended.dbrp_min); dev->data_bt_const.brp_max = le32_to_cpu(bt_const_extended.dbrp_max); dev->data_bt_const.brp_inc = le32_to_cpu(bt_const_extended.dbrp_inc); dev->can.fd.data_bittiming_const = &dev->data_bt_const; } can_rx_offload_add_manual(netdev, &dev->offload, GS_NAPI_WEIGHT); SET_NETDEV_DEV(netdev, &intf->dev); rc = register_candev(dev->netdev); if (rc) { dev_err(&intf->dev, "Couldn't register candev for channel %d (%pe)\n", channel, ERR_PTR(rc)); goto out_can_rx_offload_del; } return dev; out_can_rx_offload_del: can_rx_offload_del(&dev->offload); out_free_candev: free_candev(dev->netdev); return ERR_PTR(rc); } static void gs_destroy_candev(struct gs_can *dev) { unregister_candev(dev->netdev); can_rx_offload_del(&dev->offload); free_candev(dev->netdev); } static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *ep_in, *ep_out; struct gs_host_frame *hf; struct gs_usb *parent; struct gs_host_config hconf = { .byte_order = cpu_to_le32(0x0000beef), }; struct gs_device_config dconf; unsigned int icount, i; int rc; rc = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out, NULL, NULL); if (rc) { dev_err(&intf->dev, "Required endpoints not found\n"); return rc; } /* send host config */ rc = usb_control_msg_send(udev, 0, GS_USB_BREQ_HOST_FORMAT, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 1, intf->cur_altsetting->desc.bInterfaceNumber, &hconf, sizeof(hconf), 1000, GFP_KERNEL); if (rc) { dev_err(&intf->dev, "Couldn't send data format (err=%d)\n", rc); return rc; } /* read device config */ rc = usb_control_msg_recv(udev, 0, GS_USB_BREQ_DEVICE_CONFIG, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 1, intf->cur_altsetting->desc.bInterfaceNumber, &dconf, sizeof(dconf), 1000, GFP_KERNEL); if (rc) { dev_err(&intf->dev, "Couldn't get device config: (err=%d)\n", rc); return rc; } icount = dconf.icount + 1; dev_info(&intf->dev, "Configuring for %u interfaces\n", icount); if (icount > type_max(parent->channel_cnt)) { dev_err(&intf->dev, "Driver cannot handle more that %u CAN interfaces\n", type_max(parent->channel_cnt)); return -EINVAL; } parent = kzalloc(struct_size(parent, canch, icount), GFP_KERNEL); if (!parent) return -ENOMEM; parent->channel_cnt = icount; init_usb_anchor(&parent->rx_submitted); usb_set_intfdata(intf, parent); parent->udev = udev; /* store the detected endpoints */ parent->pipe_in = usb_rcvbulkpipe(parent->udev, ep_in->bEndpointAddress); parent->pipe_out = usb_sndbulkpipe(parent->udev, ep_out->bEndpointAddress); for (i = 0; i < icount; i++) { unsigned int hf_size_rx = 0; parent->canch[i] = gs_make_candev(i, intf, &dconf); if (IS_ERR_OR_NULL(parent->canch[i])) { /* save error code to return later */ rc = PTR_ERR(parent->canch[i]); /* on failure destroy previously created candevs */ icount = i; for (i = 0; i < icount; i++) gs_destroy_candev(parent->canch[i]); usb_kill_anchored_urbs(&parent->rx_submitted); kfree(parent); return rc; } parent->canch[i]->parent = parent; /* set RX packet size based on FD and if hardware * timestamps are supported. */ if (parent->canch[i]->can.ctrlmode_supported & CAN_CTRLMODE_FD) { if (parent->canch[i]->feature & GS_CAN_FEATURE_HW_TIMESTAMP) hf_size_rx = struct_size(hf, canfd_ts, 1); else hf_size_rx = struct_size(hf, canfd, 1); } else { if (parent->canch[i]->feature & GS_CAN_FEATURE_HW_TIMESTAMP) hf_size_rx = struct_size(hf, classic_can_ts, 1); else hf_size_rx = struct_size(hf, classic_can, 1); } parent->hf_size_rx = max(parent->hf_size_rx, hf_size_rx); } return 0; } static void gs_usb_disconnect(struct usb_interface *intf) { struct gs_usb *parent = usb_get_intfdata(intf); unsigned int i; usb_set_intfdata(intf, NULL); if (!parent) { dev_err(&intf->dev, "Disconnect (nodata)\n"); return; } for (i = 0; i < parent->channel_cnt; i++) if (parent->canch[i]) gs_destroy_candev(parent->canch[i]); kfree(parent); } static const struct usb_device_id gs_usb_table[] = { { USB_DEVICE_INTERFACE_NUMBER(USB_GS_USB_1_VENDOR_ID, USB_GS_USB_1_PRODUCT_ID, 0) }, { USB_DEVICE_INTERFACE_NUMBER(USB_CANDLELIGHT_VENDOR_ID, USB_CANDLELIGHT_PRODUCT_ID, 0) }, { USB_DEVICE_INTERFACE_NUMBER(USB_CES_CANEXT_FD_VENDOR_ID, USB_CES_CANEXT_FD_PRODUCT_ID, 0) }, { USB_DEVICE_INTERFACE_NUMBER(USB_ABE_CANDEBUGGER_FD_VENDOR_ID, USB_ABE_CANDEBUGGER_FD_PRODUCT_ID, 0) }, { USB_DEVICE_INTERFACE_NUMBER(USB_XYLANTA_SAINT3_VENDOR_ID, USB_XYLANTA_SAINT3_PRODUCT_ID, 0) }, { USB_DEVICE_INTERFACE_NUMBER(USB_CANNECTIVITY_VENDOR_ID, USB_CANNECTIVITY_PRODUCT_ID, 0) }, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, gs_usb_table); static struct usb_driver gs_usb_driver = { .name = KBUILD_MODNAME, .probe = gs_usb_probe, .disconnect = gs_usb_disconnect, .id_table = gs_usb_table, }; module_usb_driver(gs_usb_driver); MODULE_AUTHOR("Maximilian Schneider <mws@schneidersoft.net>"); MODULE_DESCRIPTION( "Socket CAN device driver for Geschwister Schneider Technologie-, " "Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces\n" "and bytewerk.org candleLight USB CAN interfaces."); MODULE_LICENSE("GPL v2");
2 4 1 2 1 1 1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 // SPDX-License-Identifier: GPL-2.0-only /* * Accelerated GHASH implementation with Intel PCLMULQDQ-NI * instructions. This file contains glue code. * * Copyright (c) 2009 Intel Corp. * Author: Huang Ying <ying.huang@intel.com> */ #include <asm/cpu_device_id.h> #include <asm/simd.h> #include <crypto/b128ops.h> #include <crypto/ghash.h> #include <crypto/internal/hash.h> #include <crypto/utils.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/unaligned.h> asmlinkage void clmul_ghash_mul(char *dst, const le128 *shash); asmlinkage int clmul_ghash_update(char *dst, const char *src, unsigned int srclen, const le128 *shash); struct x86_ghash_ctx { le128 shash; }; static int ghash_init(struct shash_desc *desc) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); memset(dctx, 0, sizeof(*dctx)); return 0; } static int ghash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct x86_ghash_ctx *ctx = crypto_shash_ctx(tfm); u64 a, b; if (keylen != GHASH_BLOCK_SIZE) return -EINVAL; /* * GHASH maps bits to polynomial coefficients backwards, which makes it * hard to implement. But it can be shown that the GHASH multiplication * * D * K (mod x^128 + x^7 + x^2 + x + 1) * * (where D is a data block and K is the key) is equivalent to: * * bitreflect(D) * bitreflect(K) * x^(-127) * (mod x^128 + x^127 + x^126 + x^121 + 1) * * So, the code below precomputes: * * bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1) * * ... but in Montgomery form (so that Montgomery multiplication can be * used), i.e. with an extra x^128 factor, which means actually: * * bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1) * * The within-a-byte part of bitreflect() cancels out GHASH's built-in * reflection, and thus bitreflect() is actually a byteswap. */ a = get_unaligned_be64(key); b = get_unaligned_be64(key + 8); ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63)); ctx->shash.b = cpu_to_le64((b &l