Total coverage: 112547 (7%)of 1862109
1731 142 1639 877 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef MM_SLAB_H #define MM_SLAB_H #include <linux/reciprocal_div.h> #include <linux/list_lru.h> #include <linux/local_lock.h> #include <linux/random.h> #include <linux/kobject.h> #include <linux/sched/mm.h> #include <linux/memcontrol.h> #include <linux/kfence.h> #include <linux/kasan.h> /* * Internal slab definitions */ #ifdef CONFIG_64BIT # ifdef system_has_cmpxchg128 # define system_has_freelist_aba() system_has_cmpxchg128() # define try_cmpxchg_freelist try_cmpxchg128 # endif #define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg128 typedef u128 freelist_full_t; #else /* CONFIG_64BIT */ # ifdef system_has_cmpxchg64 # define system_has_freelist_aba() system_has_cmpxchg64() # define try_cmpxchg_freelist try_cmpxchg64 # endif #define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg64 typedef u64 freelist_full_t; #endif /* CONFIG_64BIT */ #if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) #undef system_has_freelist_aba #endif /* * Freelist pointer and counter to cmpxchg together, avoids the typical ABA * problems with cmpxchg of just a pointer. */ typedef union { struct { void *freelist; unsigned long counter; }; freelist_full_t full; } freelist_aba_t; /* Reuses the bits in struct page */ struct slab { unsigned long __page_flags; struct kmem_cache *slab_cache; union { struct { union { struct list_head slab_list; #ifdef CONFIG_SLUB_CPU_PARTIAL struct { struct slab *next; int slabs; /* Nr of slabs left */ }; #endif }; /* Double-word boundary */ union { struct { void *freelist; /* first free object */ union { unsigned long counters; struct { unsigned inuse:16; unsigned objects:15; /* * If slab debugging is enabled then the * frozen bit can be reused to indicate * that the slab was corrupted */ unsigned frozen:1; }; }; }; #ifdef system_has_freelist_aba freelist_aba_t freelist_counter; #endif }; }; struct rcu_head rcu_head; }; unsigned int __page_type; atomic_t __page_refcount; #ifdef CONFIG_SLAB_OBJ_EXT unsigned long obj_exts; #endif }; #define SLAB_MATCH(pg, sl) \ static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) SLAB_MATCH(flags, __page_flags); SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ SLAB_MATCH(_refcount, __page_refcount); #ifdef CONFIG_MEMCG SLAB_MATCH(memcg_data, obj_exts); #elif defined(CONFIG_SLAB_OBJ_EXT) SLAB_MATCH(_unused_slab_obj_exts, obj_exts); #endif #undef SLAB_MATCH static_assert(sizeof(struct slab) <= sizeof(struct page)); #if defined(system_has_freelist_aba) static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t))); #endif /** * folio_slab - Converts from folio to slab. * @folio: The folio. * * Currently struct slab is a different representation of a folio where * folio_test_slab() is true. * * Return: The slab which contains this folio. */ #define folio_slab(folio) (_Generic((folio), \ const struct folio *: (const struct slab *)(folio), \ struct folio *: (struct slab *)(folio))) /** * slab_folio - The folio allocated for a slab * @s: The slab. * * Slabs are allocated as folios that contain the individual objects and are * using some fields in the first struct page of the folio - those fields are * now accessed by struct slab. It is occasionally necessary to convert back to * a folio in order to communicate with the rest of the mm. Please use this * helper function instead of casting yourself, as the implementation may change * in the future. */ #define slab_folio(s) (_Generic((s), \ const struct slab *: (const struct folio *)s, \ struct slab *: (struct folio *)s)) /** * page_slab - Converts from first struct page to slab. * @p: The first (either head of compound or single) page of slab. * * A temporary wrapper to convert struct page to struct slab in situations where * we know the page is the compound head, or single order-0 page. * * Long-term ideally everything would work with struct slab directly or go * through folio to struct slab. * * Return: The slab which contains this page */ #define page_slab(p) (_Generic((p), \ const struct page *: (const struct slab *)(p), \ struct page *: (struct slab *)(p))) /** * slab_page - The first struct page allocated for a slab * @s: The slab. * * A convenience wrapper for converting slab to the first struct page of the * underlying folio, to communicate with code not yet converted to folio or * struct slab. */ #define slab_page(s) folio_page(slab_folio(s), 0) /* * If network-based swap is enabled, sl*b must keep track of whether pages * were allocated from pfmemalloc reserves. */ static inline bool slab_test_pfmemalloc(const struct slab *slab) { return folio_test_active(slab_folio(slab)); } static inline void slab_set_pfmemalloc(struct slab *slab) { folio_set_active(slab_folio(slab)); } static inline void slab_clear_pfmemalloc(struct slab *slab) { folio_clear_active(slab_folio(slab)); } static inline void __slab_clear_pfmemalloc(struct slab *slab) { __folio_clear_active(slab_folio(slab)); } static inline void *slab_address(const struct slab *slab) { return folio_address(slab_folio(slab)); } static inline int slab_nid(const struct slab *slab) { return folio_nid(slab_folio(slab)); } static inline pg_data_t *slab_pgdat(const struct slab *slab) { return folio_pgdat(slab_folio(slab)); } static inline struct slab *virt_to_slab(const void *addr) { struct folio *folio = virt_to_folio(addr); if (!folio_test_slab(folio)) return NULL; return folio_slab(folio); } static inline int slab_order(const struct slab *slab) { return folio_order(slab_folio(slab)); } static inline size_t slab_size(const struct slab *slab) { return PAGE_SIZE << slab_order(slab); } #ifdef CONFIG_SLUB_CPU_PARTIAL #define slub_percpu_partial(c) ((c)->partial) #define slub_set_percpu_partial(c, p) \ ({ \ slub_percpu_partial(c) = (p)->next; \ }) #define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c)) #else #define slub_percpu_partial(c) NULL #define slub_set_percpu_partial(c, p) #define slub_percpu_partial_read_once(c) NULL #endif // CONFIG_SLUB_CPU_PARTIAL /* * Word size structure that can be atomically updated or read and that * contains both the order and the number of objects that a slab of the * given order would contain. */ struct kmem_cache_order_objects { unsigned int x; }; /* * Slab cache management. */ struct kmem_cache { #ifndef CONFIG_SLUB_TINY struct kmem_cache_cpu __percpu *cpu_slab; #endif /* Used for retrieving partial slabs, etc. */ slab_flags_t flags; unsigned long min_partial; unsigned int size; /* Object size including metadata */ unsigned int object_size; /* Object size without metadata */ struct reciprocal_value reciprocal_size; unsigned int offset; /* Free pointer offset */ #ifdef CONFIG_SLUB_CPU_PARTIAL /* Number of per cpu partial objects to keep around */ unsigned int cpu_partial; /* Number of per cpu partial slabs to keep around */ unsigned int cpu_partial_slabs; #endif struct kmem_cache_order_objects oo; /* Allocation and freeing of slabs */ struct kmem_cache_order_objects min; gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ void (*ctor)(void *object); /* Object constructor */ unsigned int inuse; /* Offset to metadata */ unsigned int align; /* Alignment */ unsigned int red_left_pad; /* Left redzone padding size */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ #endif #ifdef CONFIG_SLAB_FREELIST_HARDENED unsigned long random; #endif #ifdef CONFIG_NUMA /* * Defragmentation by allocating from a remote node. */ unsigned int remote_node_defrag_ratio; #endif #ifdef CONFIG_SLAB_FREELIST_RANDOM unsigned int *random_seq; #endif #ifdef CONFIG_KASAN_GENERIC struct kasan_cache kasan_info; #endif #ifdef CONFIG_HARDENED_USERCOPY unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ #endif struct kmem_cache_node *node[MAX_NUMNODES]; }; #if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) #define SLAB_SUPPORTS_SYSFS 1 void sysfs_slab_unlink(struct kmem_cache *s); void sysfs_slab_release(struct kmem_cache *s); #else static inline void sysfs_slab_unlink(struct kmem_cache *s) { } static inline void sysfs_slab_release(struct kmem_cache *s) { } #endif void *fixup_red_left(struct kmem_cache *s, void *p); static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab, void *x) { void *object = x - (x - slab_address(slab)) % cache->size; void *last_object = slab_address(slab) + (slab->objects - 1) * cache->size; void *result = (unlikely(object > last_object)) ? last_object : object; result = fixup_red_left(cache, result); return result; } /* Determine object index from a given position */ static inline unsigned int __obj_to_index(const struct kmem_cache *cache, void *addr, void *obj) { return reciprocal_divide(kasan_reset_tag(obj) - addr, cache->reciprocal_size); } static inline unsigned int obj_to_index(const struct kmem_cache *cache, const struct slab *slab, void *obj) { if (is_kfence_address(obj)) return 0; return __obj_to_index(cache, slab_address(slab), obj); } static inline int objs_per_slab(const struct kmem_cache *cache, const struct slab *slab) { return slab->objects; } /* * State of the slab allocator. * * This is used to describe the states of the allocator during bootup. * Allocators use this to gradually bootstrap themselves. Most allocators * have the problem that the structures used for managing slab caches are * allocated from slab caches themselves. */ enum slab_state { DOWN, /* No slab functionality yet */ PARTIAL, /* SLUB: kmem_cache_node available */ UP, /* Slab caches usable but not all extras yet */ FULL /* Everything is working */ }; extern enum slab_state slab_state; /* The slab cache mutex protects the management structures during changes */ extern struct mutex slab_mutex; /* The list of all slab caches on the system */ extern struct list_head slab_caches; /* The slab cache that manages slab cache information */ extern struct kmem_cache *kmem_cache; /* A table of kmalloc cache names and sizes */ extern const struct kmalloc_info_struct { const char *name[NR_KMALLOC_TYPES]; unsigned int size; } kmalloc_info[]; /* Kmalloc array related functions */ void setup_kmalloc_cache_index_table(void); void create_kmalloc_caches(void); extern u8 kmalloc_size_index[24]; static inline unsigned int size_index_elem(unsigned int bytes) { return (bytes - 1) / 8; } /* * Find the kmem_cache structure that serves a given size of * allocation * * This assumes size is larger than zero and not larger than * KMALLOC_MAX_CACHE_SIZE and the caller must check that. */ static inline struct kmem_cache * kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller) { unsigned int index; if (!b) b = &kmalloc_caches[kmalloc_type(flags, caller)]; if (size <= 192) index = kmalloc_size_index[size_index_elem(size)]; else index = fls(size - 1); return (*b)[index]; } gfp_t kmalloc_fix_flags(gfp_t flags); /* Functions provided by the slab allocators */ int do_kmem_cache_create(struct kmem_cache *s, const char *name, unsigned int size, struct kmem_cache_args *args, slab_flags_t flags); void __init kmem_cache_init(void); extern void create_boot_cache(struct kmem_cache *, const char *name, unsigned int size, slab_flags_t flags, unsigned int useroffset, unsigned int usersize); int slab_unmergeable(struct kmem_cache *s); struct kmem_cache *find_mergeable(unsigned size, unsigned align, slab_flags_t flags, const char *name, void (*ctor)(void *)); struct kmem_cache * __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, void (*ctor)(void *)); slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name); static inline bool is_kmalloc_cache(struct kmem_cache *s) { return (s->flags & SLAB_KMALLOC); } static inline bool is_kmalloc_normal(struct kmem_cache *s) { if (!is_kmalloc_cache(s)) return false; return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT)); } #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ SLAB_CACHE_DMA32 | SLAB_PANIC | \ SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS | \ SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | SLAB_ACCOUNT | \ SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE) #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ SLAB_TRACE | SLAB_CONSISTENCY_CHECKS) #define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS) bool __kmem_cache_empty(struct kmem_cache *); int __kmem_cache_shutdown(struct kmem_cache *); void __kmem_cache_release(struct kmem_cache *); int __kmem_cache_shrink(struct kmem_cache *); void slab_kmem_cache_release(struct kmem_cache *); struct seq_file; struct file; struct slabinfo { unsigned long active_objs; unsigned long num_objs; unsigned long active_slabs; unsigned long num_slabs; unsigned long shared_avail; unsigned int limit; unsigned int batchcount; unsigned int shared; unsigned int objects_per_slab; unsigned int cache_order; }; void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo); #ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG_ON DECLARE_STATIC_KEY_TRUE(slub_debug_enabled); #else DECLARE_STATIC_KEY_FALSE(slub_debug_enabled); #endif extern void print_tracking(struct kmem_cache *s, void *object); long validate_slab_cache(struct kmem_cache *s); static inline bool __slub_debug_enabled(void) { return static_branch_unlikely(&slub_debug_enabled); } #else static inline void print_tracking(struct kmem_cache *s, void *object) { } static inline bool __slub_debug_enabled(void) { return false; } #endif /* * Returns true if any of the specified slab_debug flags is enabled for the * cache. Use only for flags parsed by setup_slub_debug() as it also enables * the static key. */ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags) { if (IS_ENABLED(CONFIG_SLUB_DEBUG)) VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS)); if (__slub_debug_enabled()) return s->flags & flags; return false; } #if IS_ENABLED(CONFIG_SLUB_DEBUG) && IS_ENABLED(CONFIG_KUNIT) bool slab_in_kunit_test(void); #else static inline bool slab_in_kunit_test(void) { return false; } #endif #ifdef CONFIG_SLAB_OBJ_EXT /* * slab_obj_exts - get the pointer to the slab object extension vector * associated with a slab. * @slab: a pointer to the slab struct * * Returns a pointer to the object extension vector associated with the slab, * or NULL if no such vector has been associated yet. */ static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) { unsigned long obj_exts = READ_ONCE(slab->obj_exts); #ifdef CONFIG_MEMCG VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS), slab_page(slab)); VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab)); #endif return (struct slabobj_ext *)(obj_exts & ~OBJEXTS_FLAGS_MASK); } int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab); #else /* CONFIG_SLAB_OBJ_EXT */ static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) { return NULL; } #endif /* CONFIG_SLAB_OBJ_EXT */ static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) { return (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; } #ifdef CONFIG_MEMCG bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, gfp_t flags, size_t size, void **p); void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, int objects, struct slabobj_ext *obj_exts); #endif void kvfree_rcu_cb(struct rcu_head *head); size_t __ksize(const void *objp); static inline size_t slab_ksize(const struct kmem_cache *s) { #ifdef CONFIG_SLUB_DEBUG /* * Debugging requires use of the padding between object * and whatever may come after it. */ if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) return s->object_size; #endif if (s->flags & SLAB_KASAN) return s->object_size; /* * If we have the need to store the freelist pointer * back there or track user information then we can * only use the space before that information. */ if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) return s->inuse; /* * Else we can use all the padding etc for the allocation */ return s->size; } #ifdef CONFIG_SLUB_DEBUG void dump_unreclaimable_slab(void); #else static inline void dump_unreclaimable_slab(void) { } #endif void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr); #ifdef CONFIG_SLAB_FREELIST_RANDOM int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count, gfp_t gfp); void cache_random_seq_destroy(struct kmem_cache *cachep); #else static inline int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count, gfp_t gfp) { return 0; } static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } #endif /* CONFIG_SLAB_FREELIST_RANDOM */ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) { if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, &init_on_alloc)) { if (c->ctor) return false; if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) return flags & __GFP_ZERO; return true; } return flags & __GFP_ZERO; } static inline bool slab_want_init_on_free(struct kmem_cache *c) { if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, &init_on_free)) return !(c->ctor || (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); return false; } #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG) void debugfs_slab_release(struct kmem_cache *); #else static inline void debugfs_slab_release(struct kmem_cache *s) { } #endif #ifdef CONFIG_PRINTK #define KS_ADDRS_COUNT 16 struct kmem_obj_info { void *kp_ptr; struct slab *kp_slab; void *kp_objp; unsigned long kp_data_offset; struct kmem_cache *kp_slab_cache; void *kp_ret; void *kp_stack[KS_ADDRS_COUNT]; void *kp_free_stack[KS_ADDRS_COUNT]; }; void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); #endif void __check_heap_object(const void *ptr, unsigned long n, const struct slab *slab, bool to_user); static inline bool slub_debug_orig_size(struct kmem_cache *s) { return (kmem_cache_debug_flags(s, SLAB_STORE_USER) && (s->flags & SLAB_KMALLOC)); } #ifdef CONFIG_SLUB_DEBUG void skip_orig_size_check(struct kmem_cache *s, const void *object); #endif #endif /* MM_SLAB_H */
5 5 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/key/af_key.c An implementation of PF_KEYv2 sockets. * * Authors: Maxim Giryaev <gem@asplinux.ru> * David S. Miller <davem@redhat.com> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * Kazunori MIYAZAWA / USAGI Project <miyazawa@linux-ipv6.org> * Derek Atkins <derek@ihtfp.com> */ #include <linux/capability.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/socket.h> #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/proc_fs.h> #include <linux/init.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/xfrm.h> #include <net/sock.h> #define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x)) #define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x)) static unsigned int pfkey_net_id __read_mostly; struct netns_pfkey { /* List of all pfkey sockets. */ struct hlist_head table; atomic_t socks_nr; }; static DEFINE_MUTEX(pfkey_mutex); #define DUMMY_MARK 0 static const struct xfrm_mark dummy_mark = {0, 0}; struct pfkey_sock { /* struct sock must be the first member of struct pfkey_sock */ struct sock sk; int registered; int promisc; struct { uint8_t msg_version; uint32_t msg_portid; int (*dump)(struct pfkey_sock *sk); void (*done)(struct pfkey_sock *sk); union { struct xfrm_policy_walk policy; struct xfrm_state_walk state; } u; struct sk_buff *skb; } dump; struct mutex dump_lock; }; static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family); static inline struct pfkey_sock *pfkey_sk(struct sock *sk) { return (struct pfkey_sock *)sk; } static int pfkey_can_dump(const struct sock *sk) { if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf) return 1; return 0; } static void pfkey_terminate_dump(struct pfkey_sock *pfk) { if (pfk->dump.dump) { if (pfk->dump.skb) { kfree_skb(pfk->dump.skb); pfk->dump.skb = NULL; } pfk->dump.done(pfk); pfk->dump.dump = NULL; pfk->dump.done = NULL; } } static void pfkey_sock_destruct(struct sock *sk) { struct net *net = sock_net(sk); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); pfkey_terminate_dump(pfkey_sk(sk)); skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive pfkey socket: %p\n", sk); return; } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(refcount_read(&sk->sk_wmem_alloc)); atomic_dec(&net_pfkey->socks_nr); } static const struct proto_ops pfkey_ops; static void pfkey_insert(struct sock *sk) { struct net *net = sock_net(sk); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); mutex_lock(&pfkey_mutex); sk_add_node_rcu(sk, &net_pfkey->table); mutex_unlock(&pfkey_mutex); } static void pfkey_remove(struct sock *sk) { mutex_lock(&pfkey_mutex); sk_del_node_init_rcu(sk); mutex_unlock(&pfkey_mutex); } static struct proto key_proto = { .name = "KEY", .owner = THIS_MODULE, .obj_size = sizeof(struct pfkey_sock), }; static int pfkey_create(struct net *net, struct socket *sock, int protocol, int kern) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; struct pfkey_sock *pfk; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; if (protocol != PF_KEY_V2) return -EPROTONOSUPPORT; sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern); if (sk == NULL) return -ENOMEM; pfk = pfkey_sk(sk); mutex_init(&pfk->dump_lock); sock->ops = &pfkey_ops; sock_init_data(sock, sk); sk->sk_family = PF_KEY; sk->sk_destruct = pfkey_sock_destruct; atomic_inc(&net_pfkey->socks_nr); pfkey_insert(sk); return 0; } static int pfkey_release(struct socket *sock) { struct sock *sk = sock->sk; if (!sk) return 0; pfkey_remove(sk); sock_orphan(sk); sock->sk = NULL; skb_queue_purge(&sk->sk_write_queue); synchronize_rcu(); sock_put(sk); return 0; } static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation, struct sock *sk) { int err = -ENOBUFS; if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) return err; skb = skb_clone(skb, allocation); if (skb) { skb_set_owner_r(skb, sk); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); err = 0; } return err; } /* Send SKB to all pfkey sockets matching selected criteria. */ #define BROADCAST_ALL 0 #define BROADCAST_ONE 1 #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, int broadcast_flags, struct sock *one_sk, struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; int err = -ESRCH; /* XXX Do we need something like netlink_overrun? I think * XXX PF_KEY socket apps will not mind current behavior. */ if (!skb) return -ENOMEM; rcu_read_lock(); sk_for_each_rcu(sk, &net_pfkey->table) { struct pfkey_sock *pfk = pfkey_sk(sk); int err2; /* Yes, it means that if you are meant to receive this * pfkey message you receive it twice as promiscuous * socket. */ if (pfk->promisc) pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* the exact target will be processed later */ if (sk == one_sk) continue; if (broadcast_flags != BROADCAST_ALL) { if (broadcast_flags & BROADCAST_PROMISC_ONLY) continue; if ((broadcast_flags & BROADCAST_REGISTERED) && !pfk->registered) continue; if (broadcast_flags & BROADCAST_ONE) continue; } err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* Error is cleared after successful sending to at least one * registered KM */ if ((broadcast_flags & BROADCAST_REGISTERED) && err) err = err2; } rcu_read_unlock(); if (one_sk != NULL) err = pfkey_broadcast_one(skb, allocation, one_sk); kfree_skb(skb); return err; } static int pfkey_do_dump(struct pfkey_sock *pfk) { struct sadb_msg *hdr; int rc; mutex_lock(&pfk->dump_lock); if (!pfk->dump.dump) { rc = 0; goto out; } rc = pfk->dump.dump(pfk); if (rc == -ENOBUFS) { rc = 0; goto out; } if (pfk->dump.skb) { if (!pfkey_can_dump(&pfk->sk)) { rc = 0; goto out; } hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; hdr->sadb_msg_errno = rc; pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = NULL; } pfkey_terminate_dump(pfk); out: mutex_unlock(&pfk->dump_lock); return rc; } static inline void pfkey_hdr_dup(struct sadb_msg *new, const struct sadb_msg *orig) { *new = *orig; } static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) { struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); struct sadb_msg *hdr; if (!skb) return -ENOBUFS; /* Woe be to the platform trying to support PFKEY yet * having normal errnos outside the 1-255 range, inclusive. */ err = -err; if (err == ERESTARTSYS || err == ERESTARTNOHAND || err == ERESTARTNOINTR) err = EINTR; if (err >= 512) err = EINVAL; BUG_ON(err <= 0 || err >= 256); hdr = skb_put(skb, sizeof(struct sadb_msg)); pfkey_hdr_dup(hdr, orig); hdr->sadb_msg_errno = (uint8_t) err; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); return 0; } static const u8 sadb_ext_min_len[] = { [SADB_EXT_RESERVED] = (u8) 0, [SADB_EXT_SA] = (u8) sizeof(struct sadb_sa), [SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_LIFETIME_HARD] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_LIFETIME_SOFT] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_ADDRESS_SRC] = (u8) sizeof(struct sadb_address), [SADB_EXT_ADDRESS_DST] = (u8) sizeof(struct sadb_address), [SADB_EXT_ADDRESS_PROXY] = (u8) sizeof(struct sadb_address), [SADB_EXT_KEY_AUTH] = (u8) sizeof(struct sadb_key), [SADB_EXT_KEY_ENCRYPT] = (u8) sizeof(struct sadb_key), [SADB_EXT_IDENTITY_SRC] = (u8) sizeof(struct sadb_ident), [SADB_EXT_IDENTITY_DST] = (u8) sizeof(struct sadb_ident), [SADB_EXT_SENSITIVITY] = (u8) sizeof(struct sadb_sens), [SADB_EXT_PROPOSAL] = (u8) sizeof(struct sadb_prop), [SADB_EXT_SUPPORTED_AUTH] = (u8) sizeof(struct sadb_supported), [SADB_EXT_SUPPORTED_ENCRYPT] = (u8) sizeof(struct sadb_supported), [SADB_EXT_SPIRANGE] = (u8) sizeof(struct sadb_spirange), [SADB_X_EXT_KMPRIVATE] = (u8) sizeof(struct sadb_x_kmprivate), [SADB_X_EXT_POLICY] = (u8) sizeof(struct sadb_x_policy), [SADB_X_EXT_SA2] = (u8) sizeof(struct sadb_x_sa2), [SADB_X_EXT_NAT_T_TYPE] = (u8) sizeof(struct sadb_x_nat_t_type), [SADB_X_EXT_NAT_T_SPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress), [SADB_X_EXT_FILTER] = (u8) sizeof(struct sadb_x_filter), }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ static int verify_address_len(const void *p) { const struct sadb_address *sp = p; const struct sockaddr *addr = (const struct sockaddr *)(sp + 1); const struct sockaddr_in *sin; #if IS_ENABLED(CONFIG_IPV6) const struct sockaddr_in6 *sin6; #endif int len; if (sp->sadb_address_len < DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family), sizeof(uint64_t))) return -EINVAL; switch (addr->sa_family) { case AF_INET: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 32) return -EINVAL; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 128) return -EINVAL; break; #endif default: /* It is user using kernel to keep track of security * associations for another protocol, such as * OSPF/RSVP/RIPV2/MIP. It is user's job to verify * lengths. * * XXX Actually, association/policy database is not yet * XXX able to cope with arbitrary sockaddr families. * XXX When it can, remove this -EINVAL. -DaveM */ return -EINVAL; } return 0; } static inline int sadb_key_len(const struct sadb_key *key) { int key_bytes = DIV_ROUND_UP(key->sadb_key_bits, 8); return DIV_ROUND_UP(sizeof(struct sadb_key) + key_bytes, sizeof(uint64_t)); } static int verify_key_len(const void *p) { const struct sadb_key *key = p; if (sadb_key_len(key) > key->sadb_key_len) return -EINVAL; return 0; } static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx) { return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + sec_ctx->sadb_x_ctx_len, sizeof(uint64_t)); } static inline int verify_sec_ctx_len(const void *p) { const struct sadb_x_sec_ctx *sec_ctx = p; int len = sec_ctx->sadb_x_ctx_len; if (len > PAGE_SIZE) return -EINVAL; len = pfkey_sec_ctx_len(sec_ctx); if (sec_ctx->sadb_x_sec_len != len) return -EINVAL; return 0; } static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx, gfp_t gfp) { struct xfrm_user_sec_ctx *uctx = NULL; int ctx_size = sec_ctx->sadb_x_ctx_len; uctx = kmalloc((sizeof(*uctx)+ctx_size), gfp); if (!uctx) return NULL; uctx->len = pfkey_sec_ctx_len(sec_ctx); uctx->exttype = sec_ctx->sadb_x_sec_exttype; uctx->ctx_doi = sec_ctx->sadb_x_ctx_doi; uctx->ctx_alg = sec_ctx->sadb_x_ctx_alg; uctx->ctx_len = sec_ctx->sadb_x_ctx_len; memcpy(uctx + 1, sec_ctx + 1, uctx->ctx_len); return uctx; } static int present_and_same_family(const struct sadb_address *src, const struct sadb_address *dst) { const struct sockaddr *s_addr, *d_addr; if (!src || !dst) return 0; s_addr = (const struct sockaddr *)(src + 1); d_addr = (const struct sockaddr *)(dst + 1); if (s_addr->sa_family != d_addr->sa_family) return 0; if (s_addr->sa_family != AF_INET #if IS_ENABLED(CONFIG_IPV6) && s_addr->sa_family != AF_INET6 #endif ) return 0; return 1; } static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void **ext_hdrs) { const char *p = (char *) hdr; int len = skb->len; len -= sizeof(*hdr); p += sizeof(*hdr); while (len > 0) { const struct sadb_ext *ehdr = (const struct sadb_ext *) p; uint16_t ext_type; int ext_len; if (len < sizeof(*ehdr)) return -EINVAL; ext_len = ehdr->sadb_ext_len; ext_len *= sizeof(uint64_t); ext_type = ehdr->sadb_ext_type; if (ext_len < sizeof(uint64_t) || ext_len > len || ext_type == SADB_EXT_RESERVED) return -EINVAL; if (ext_type <= SADB_EXT_MAX) { int min = (int) sadb_ext_min_len[ext_type]; if (ext_len < min) return -EINVAL; if (ext_hdrs[ext_type-1] != NULL) return -EINVAL; switch (ext_type) { case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: case SADB_X_EXT_NAT_T_OA: if (verify_address_len(p)) return -EINVAL; break; case SADB_X_EXT_SEC_CTX: if (verify_sec_ctx_len(p)) return -EINVAL; break; case SADB_EXT_KEY_AUTH: case SADB_EXT_KEY_ENCRYPT: if (verify_key_len(p)) return -EINVAL; break; default: break; } ext_hdrs[ext_type-1] = (void *) p; } p += ext_len; len -= ext_len; } return 0; } static uint16_t pfkey_satype2proto(uint8_t satype) { switch (satype) { case SADB_SATYPE_UNSPEC: return IPSEC_PROTO_ANY; case SADB_SATYPE_AH: return IPPROTO_AH; case SADB_SATYPE_ESP: return IPPROTO_ESP; case SADB_X_SATYPE_IPCOMP: return IPPROTO_COMP; default: return 0; } /* NOTREACHED */ } static uint8_t pfkey_proto2satype(uint16_t proto) { switch (proto) { case IPPROTO_AH: return SADB_SATYPE_AH; case IPPROTO_ESP: return SADB_SATYPE_ESP; case IPPROTO_COMP: return SADB_X_SATYPE_IPCOMP; default: return 0; } /* NOTREACHED */ } /* BTW, this scheme means that there is no way with PFKEY2 sockets to * say specifically 'just raw sockets' as we encode them as 255. */ static uint8_t pfkey_proto_to_xfrm(uint8_t proto) { return proto == IPSEC_PROTO_ANY ? 0 : proto; } static uint8_t pfkey_proto_from_xfrm(uint8_t proto) { return proto ? proto : IPSEC_PROTO_ANY; } static inline int pfkey_sockaddr_len(sa_family_t family) { switch (family) { case AF_INET: return sizeof(struct sockaddr_in); #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: return sizeof(struct sockaddr_in6); #endif } return 0; } static int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr) { switch (sa->sa_family) { case AF_INET: xaddr->a4 = ((struct sockaddr_in *)sa)->sin_addr.s_addr; return AF_INET; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: memcpy(xaddr->a6, &((struct sockaddr_in6 *)sa)->sin6_addr, sizeof(struct in6_addr)); return AF_INET6; #endif } return 0; } static int pfkey_sadb_addr2xfrm_addr(const struct sadb_address *addr, xfrm_address_t *xaddr) { return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1), xaddr); } static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs) { const struct sadb_sa *sa; const struct sadb_address *addr; uint16_t proto; unsigned short family; xfrm_address_t *xaddr; sa = ext_hdrs[SADB_EXT_SA - 1]; if (sa == NULL) return NULL; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return NULL; /* sadb_address_len should be checked by caller */ addr = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; if (addr == NULL) return NULL; family = ((const struct sockaddr *)(addr + 1))->sa_family; switch (family) { case AF_INET: xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr; break; #endif default: xaddr = NULL; } if (!xaddr) return NULL; return xfrm_state_lookup(net, DUMMY_MARK, xaddr, sa->sadb_sa_spi, proto, family); } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) static int pfkey_sockaddr_size(sa_family_t family) { return PFKEY_ALIGN8(pfkey_sockaddr_len(family)); } static inline int pfkey_mode_from_xfrm(int mode) { switch(mode) { case XFRM_MODE_TRANSPORT: return IPSEC_MODE_TRANSPORT; case XFRM_MODE_TUNNEL: return IPSEC_MODE_TUNNEL; case XFRM_MODE_BEET: return IPSEC_MODE_BEET; default: return -1; } } static inline int pfkey_mode_to_xfrm(int mode) { switch(mode) { case IPSEC_MODE_ANY: /*XXX*/ case IPSEC_MODE_TRANSPORT: return XFRM_MODE_TRANSPORT; case IPSEC_MODE_TUNNEL: return XFRM_MODE_TUNNEL; case IPSEC_MODE_BEET: return XFRM_MODE_BEET; default: return -1; } } static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port, struct sockaddr *sa, unsigned short family) { switch (family) { case AF_INET: { struct sockaddr_in *sin = (struct sockaddr_in *)sa; sin->sin_family = AF_INET; sin->sin_port = port; sin->sin_addr.s_addr = xaddr->a4; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); return 32; } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; sin6->sin6_family = AF_INET6; sin6->sin6_port = port; sin6->sin6_flowinfo = 0; sin6->sin6_addr = xaddr->in6; sin6->sin6_scope_id = 0; return 128; } #endif } return 0; } static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, int add_keys, int hsc) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_sa *sa; struct sadb_lifetime *lifetime; struct sadb_address *addr; struct sadb_key *key; struct sadb_x_sa2 *sa2; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; int size; int auth_key_size = 0; int encrypt_key_size = 0; int sockaddr_size; struct xfrm_encap_tmpl *natt = NULL; int mode; /* address family check */ sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return ERR_PTR(-EINVAL); /* base, SA, (lifetime (HSC),) address(SD), (address(P),) key(AE), (identity(SD),) (sensitivity)> */ size = sizeof(struct sadb_msg) +sizeof(struct sadb_sa) + sizeof(struct sadb_lifetime) + ((hsc & 1) ? sizeof(struct sadb_lifetime) : 0) + ((hsc & 2) ? sizeof(struct sadb_lifetime) : 0) + sizeof(struct sadb_address)*2 + sockaddr_size*2 + sizeof(struct sadb_x_sa2); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } /* identity & sensitivity */ if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) size += sizeof(struct sadb_address) + sockaddr_size; if (add_keys) { if (x->aalg && x->aalg->alg_key_len) { auth_key_size = PFKEY_ALIGN8((x->aalg->alg_key_len + 7) / 8); size += sizeof(struct sadb_key) + auth_key_size; } if (x->ealg && x->ealg->alg_key_len) { encrypt_key_size = PFKEY_ALIGN8((x->ealg->alg_key_len+7) / 8); size += sizeof(struct sadb_key) + encrypt_key_size; } } if (x->encap) natt = x->encap; if (natt && natt->encap_type) { size += sizeof(struct sadb_x_nat_t_type); size += sizeof(struct sadb_x_nat_t_port); size += sizeof(struct sadb_x_nat_t_port); } skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return ERR_PTR(-ENOBUFS); /* call should fill header later */ hdr = skb_put(skb, sizeof(struct sadb_msg)); memset(hdr, 0, size); /* XXX do we need this ? */ hdr->sadb_msg_len = size / sizeof(uint64_t); /* sa */ sa = skb_put(skb, sizeof(struct sadb_sa)); sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t); sa->sadb_sa_exttype = SADB_EXT_SA; sa->sadb_sa_spi = x->id.spi; sa->sadb_sa_replay = x->props.replay_window; switch (x->km.state) { case XFRM_STATE_VALID: sa->sadb_sa_state = x->km.dying ? SADB_SASTATE_DYING : SADB_SASTATE_MATURE; break; case XFRM_STATE_ACQ: sa->sadb_sa_state = SADB_SASTATE_LARVAL; break; default: sa->sadb_sa_state = SADB_SASTATE_DEAD; break; } sa->sadb_sa_auth = 0; if (x->aalg) { struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0); sa->sadb_sa_auth = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } sa->sadb_sa_encrypt = 0; BUG_ON(x->ealg && x->calg); if (x->ealg) { struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name, 0); sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } /* KAME compatible: sadb_sa_encrypt is overloaded with calg id */ if (x->calg) { struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name, 0); sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } sa->sadb_sa_flags = 0; if (x->props.flags & XFRM_STATE_NOECN) sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN; if (x->props.flags & XFRM_STATE_DECAP_DSCP) sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP; if (x->props.flags & XFRM_STATE_NOPMTUDISC) sa->sadb_sa_flags |= SADB_SAFLAGS_NOPMTUDISC; /* hard time */ if (hsc & 2) { lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lifetime->sadb_lifetime_allocations = _X2KEY(x->lft.hard_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.hard_byte_limit); lifetime->sadb_lifetime_addtime = x->lft.hard_add_expires_seconds; lifetime->sadb_lifetime_usetime = x->lft.hard_use_expires_seconds; } /* soft time */ if (hsc & 1) { lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; lifetime->sadb_lifetime_allocations = _X2KEY(x->lft.soft_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.soft_byte_limit); lifetime->sadb_lifetime_addtime = x->lft.soft_add_expires_seconds; lifetime->sadb_lifetime_usetime = x->lft.soft_use_expires_seconds; } /* current time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; lifetime->sadb_lifetime_allocations = x->curlft.packets; lifetime->sadb_lifetime_bytes = x->curlft.bytes; lifetime->sadb_lifetime_addtime = x->curlft.add_time; lifetime->sadb_lifetime_usetime = x->curlft.use_time; /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; /* "if the ports are non-zero, then the sadb_address_proto field, normally zero, MUST be filled in with the transport protocol's number." - RFC2367 */ addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); BUG_ON(!addr->sadb_address_prefixlen); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->id.daddr, 0, (struct sockaddr *) (addr + 1), x->props.family); BUG_ON(!addr->sadb_address_prefixlen); if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) { addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY; addr->sadb_address_proto = pfkey_proto_from_xfrm(x->sel.proto); addr->sadb_address_prefixlen = x->sel.prefixlen_s; addr->sadb_address_reserved = 0; pfkey_sockaddr_fill(&x->sel.saddr, x->sel.sport, (struct sockaddr *) (addr + 1), x->props.family); } /* auth key */ if (add_keys && auth_key_size) { key = skb_put(skb, sizeof(struct sadb_key) + auth_key_size); key->sadb_key_len = (sizeof(struct sadb_key) + auth_key_size) / sizeof(uint64_t); key->sadb_key_exttype = SADB_EXT_KEY_AUTH; key->sadb_key_bits = x->aalg->alg_key_len; key->sadb_key_reserved = 0; memcpy(key + 1, x->aalg->alg_key, (x->aalg->alg_key_len+7)/8); } /* encrypt key */ if (add_keys && encrypt_key_size) { key = skb_put(skb, sizeof(struct sadb_key) + encrypt_key_size); key->sadb_key_len = (sizeof(struct sadb_key) + encrypt_key_size) / sizeof(uint64_t); key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT; key->sadb_key_bits = x->ealg->alg_key_len; key->sadb_key_reserved = 0; memcpy(key + 1, x->ealg->alg_key, (x->ealg->alg_key_len+7)/8); } /* sa */ sa2 = skb_put(skb, sizeof(struct sadb_x_sa2)); sa2->sadb_x_sa2_len = sizeof(struct sadb_x_sa2)/sizeof(uint64_t); sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2; if ((mode = pfkey_mode_from_xfrm(x->props.mode)) < 0) { kfree_skb(skb); return ERR_PTR(-EINVAL); } sa2->sadb_x_sa2_mode = mode; sa2->sadb_x_sa2_reserved1 = 0; sa2->sadb_x_sa2_reserved2 = 0; sa2->sadb_x_sa2_sequence = 0; sa2->sadb_x_sa2_reqid = x->props.reqid; if (natt && natt->encap_type) { struct sadb_x_nat_t_type *n_type; struct sadb_x_nat_t_port *n_port; /* type */ n_type = skb_put(skb, sizeof(*n_type)); n_type->sadb_x_nat_t_type_len = sizeof(*n_type)/sizeof(uint64_t); n_type->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE; n_type->sadb_x_nat_t_type_type = natt->encap_type; n_type->sadb_x_nat_t_type_reserved[0] = 0; n_type->sadb_x_nat_t_type_reserved[1] = 0; n_type->sadb_x_nat_t_type_reserved[2] = 0; /* source port */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT; n_port->sadb_x_nat_t_port_port = natt->encap_sport; n_port->sadb_x_nat_t_port_reserved = 0; /* dest port */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT; n_port->sadb_x_nat_t_port_port = natt->encap_dport; n_port->sadb_x_nat_t_port_reserved = 0; } /* security context */ if (xfrm_ctx) { sec_ctx = skb_put(skb, sizeof(struct sadb_x_sec_ctx) + ctx_size); sec_ctx->sadb_x_sec_len = (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } return skb; } static inline struct sk_buff *pfkey_xfrm_state2msg(const struct xfrm_state *x) { struct sk_buff *skb; skb = __pfkey_xfrm_state2msg(x, 1, 3); return skb; } static inline struct sk_buff *pfkey_xfrm_state2msg_expire(const struct xfrm_state *x, int hsc) { return __pfkey_xfrm_state2msg(x, 0, hsc); } static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct xfrm_state *x; const struct sadb_lifetime *lifetime; const struct sadb_sa *sa; const struct sadb_key *key; const struct sadb_x_sec_ctx *sec_ctx; uint16_t proto; int err; sa = ext_hdrs[SADB_EXT_SA - 1]; if (!sa || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return ERR_PTR(-EINVAL); if (hdr->sadb_msg_satype == SADB_SATYPE_ESP && !ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]) return ERR_PTR(-EINVAL); if (hdr->sadb_msg_satype == SADB_SATYPE_AH && !ext_hdrs[SADB_EXT_KEY_AUTH-1]) return ERR_PTR(-EINVAL); if (!!ext_hdrs[SADB_EXT_LIFETIME_HARD-1] != !!ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]) return ERR_PTR(-EINVAL); proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return ERR_PTR(-EINVAL); /* default error is no buffer space */ err = -ENOBUFS; /* RFC2367: Only SADB_SASTATE_MATURE SAs may be submitted in an SADB_ADD message. SADB_SASTATE_LARVAL SAs are created by SADB_GETSPI and it is not sensible to add a new SA in the DYING or SADB_SASTATE_DEAD state. Therefore, the sadb_sa_state field of all submitted SAs MUST be SADB_SASTATE_MATURE and the kernel MUST return an error if this is not true. However, KAME setkey always uses SADB_SASTATE_LARVAL. Hence, we have to _ignore_ sadb_sa_state, which is also reasonable. */ if (sa->sadb_sa_auth > SADB_AALG_MAX || (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP && sa->sadb_sa_encrypt > SADB_X_CALG_MAX) || sa->sadb_sa_encrypt > SADB_EALG_MAX) return ERR_PTR(-EINVAL); key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (key != NULL && sa->sadb_sa_auth != SADB_X_AALG_NULL && key->sadb_key_bits == 0) return ERR_PTR(-EINVAL); key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]; if (key != NULL && sa->sadb_sa_encrypt != SADB_EALG_NULL && key->sadb_key_bits == 0) return ERR_PTR(-EINVAL); x = xfrm_state_alloc(net); if (x == NULL) return ERR_PTR(-ENOBUFS); x->id.proto = proto; x->id.spi = sa->sadb_sa_spi; x->props.replay_window = min_t(unsigned int, sa->sadb_sa_replay, (sizeof(x->replay.bitmap) * 8)); if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN) x->props.flags |= XFRM_STATE_NOECN; if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP) x->props.flags |= XFRM_STATE_DECAP_DSCP; if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) x->props.flags |= XFRM_STATE_NOPMTUDISC; lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD - 1]; if (lifetime != NULL) { x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT - 1]; if (lifetime != NULL) { x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) goto out; err = security_xfrm_state_alloc(x, uctx); kfree(uctx); if (err) goto out; } err = -ENOBUFS; key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (sa->sadb_sa_auth) { int keysize = 0; struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } if (key) keysize = (key->sadb_key_bits + 7) / 8; x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL); if (!x->aalg) { err = -ENOMEM; goto out; } strcpy(x->aalg->alg_name, a->name); x->aalg->alg_key_len = 0; if (key) { x->aalg->alg_key_len = key->sadb_key_bits; memcpy(x->aalg->alg_key, key+1, keysize); } x->aalg->alg_trunc_len = a->uinfo.auth.icv_truncbits; x->props.aalgo = sa->sadb_sa_auth; /* x->algo.flags = sa->sadb_sa_flags; */ } if (sa->sadb_sa_encrypt) { if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) { struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL); if (!x->calg) { err = -ENOMEM; goto out; } strcpy(x->calg->alg_name, a->name); x->props.calgo = sa->sadb_sa_encrypt; } else { int keysize = 0; struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]; if (key) keysize = (key->sadb_key_bits + 7) / 8; x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL); if (!x->ealg) { err = -ENOMEM; goto out; } strcpy(x->ealg->alg_name, a->name); x->ealg->alg_key_len = 0; if (key) { x->ealg->alg_key_len = key->sadb_key_bits; memcpy(x->ealg->alg_key, key+1, keysize); } x->props.ealgo = sa->sadb_sa_encrypt; x->geniv = a->uinfo.encr.geniv; } } /* x->algo.flags = sa->sadb_sa_flags; */ x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1], &x->props.saddr); pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1], &x->id.daddr); if (ext_hdrs[SADB_X_EXT_SA2-1]) { const struct sadb_x_sa2 *sa2 = ext_hdrs[SADB_X_EXT_SA2-1]; int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); if (mode < 0) { err = -EINVAL; goto out; } x->props.mode = mode; x->props.reqid = sa2->sadb_x_sa2_reqid; } if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) { const struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]; /* Nobody uses this, but we try. */ x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr); x->sel.prefixlen_s = addr->sadb_address_prefixlen; } if (!x->sel.family) x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { const struct sadb_x_nat_t_type* n_type; struct xfrm_encap_tmpl *natt; x->encap = kzalloc(sizeof(*x->encap), GFP_KERNEL); if (!x->encap) { err = -ENOMEM; goto out; } natt = x->encap; n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]; natt->encap_type = n_type->sadb_x_nat_t_type_type; if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) { const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]; natt->encap_sport = n_port->sadb_x_nat_t_port_port; } if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) { const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]; natt->encap_dport = n_port->sadb_x_nat_t_port_port; } } err = xfrm_init_state(x); if (err) goto out; x->km.seq = hdr->sadb_msg_seq; return x; out: x->km.state = XFRM_STATE_DEAD; xfrm_state_put(x); return ERR_PTR(err); } static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { return -EOPNOTSUPP; } static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct sk_buff *resp_skb; struct sadb_x_sa2 *sa2; struct sadb_address *saddr, *daddr; struct sadb_msg *out_hdr; struct sadb_spirange *range; struct xfrm_state *x = NULL; int mode; int err; u32 min_spi, max_spi; u32 reqid; u8 proto; unsigned short family; xfrm_address_t *xsaddr = NULL, *xdaddr = NULL; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; if ((sa2 = ext_hdrs[SADB_X_EXT_SA2-1]) != NULL) { mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); if (mode < 0) return -EINVAL; reqid = sa2->sadb_x_sa2_reqid; } else { mode = 0; reqid = 0; } saddr = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; daddr = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; family = ((struct sockaddr *)(saddr + 1))->sa_family; switch (family) { case AF_INET: xdaddr = (xfrm_address_t *)&((struct sockaddr_in *)(daddr + 1))->sin_addr.s_addr; xsaddr = (xfrm_address_t *)&((struct sockaddr_in *)(saddr + 1))->sin_addr.s_addr; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: xdaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(daddr + 1))->sin6_addr; xsaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(saddr + 1))->sin6_addr; break; #endif } if (hdr->sadb_msg_seq) { x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX); if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; } } if (!x) x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, UINT_MAX, proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; min_spi = 0x100; max_spi = 0x0fffffff; range = ext_hdrs[SADB_EXT_SPIRANGE-1]; if (range) { min_spi = range->sadb_spirange_min; max_spi = range->sadb_spirange_max; } err = verify_spi_info(x->id.proto, min_spi, max_spi, NULL); if (err) { xfrm_state_put(x); return err; } err = xfrm_alloc_spi(x, min_spi, max_spi, NULL); resp_skb = err ? ERR_PTR(err) : pfkey_xfrm_state2msg(x); if (IS_ERR(resp_skb)) { xfrm_state_put(x); return PTR_ERR(resp_skb); } out_hdr = (struct sadb_msg *) resp_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = SADB_GETSPI; out_hdr->sadb_msg_satype = pfkey_proto2satype(proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; xfrm_state_put(x); pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); return 0; } static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; if (hdr->sadb_msg_len != sizeof(struct sadb_msg)/8) return -EOPNOTSUPP; if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX); if (x == NULL) return 0; spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_ACQ) x->km.state = XFRM_STATE_ERROR; spin_unlock_bh(&x->lock); xfrm_state_put(x); return 0; } static inline int event2poltype(int event) { switch (event) { case XFRM_MSG_DELPOLICY: return SADB_X_SPDDELETE; case XFRM_MSG_NEWPOLICY: return SADB_X_SPDADD; case XFRM_MSG_UPDPOLICY: return SADB_X_SPDUPDATE; case XFRM_MSG_POLEXPIRE: // return SADB_X_SPDEXPIRE; default: pr_err("pfkey: Unknown policy event %d\n", event); break; } return 0; } static inline int event2keytype(int event) { switch (event) { case XFRM_MSG_DELSA: return SADB_DELETE; case XFRM_MSG_NEWSA: return SADB_ADD; case XFRM_MSG_UPDSA: return SADB_UPDATE; case XFRM_MSG_EXPIRE: return SADB_EXPIRE; default: pr_err("pfkey: Unknown SA event %d\n", event); break; } return 0; } /* ADD/UPD/DEL */ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; skb = pfkey_xfrm_state2msg(x); if (IS_ERR(skb)) return PTR_ERR(skb); hdr = (struct sadb_msg *) skb->data; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = event2keytype(c->event); hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); return 0; } static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; int err; struct km_event c; x = pfkey_msg2xfrm_state(net, hdr, ext_hdrs); if (IS_ERR(x)) return PTR_ERR(x); xfrm_state_hold(x); if (hdr->sadb_msg_type == SADB_ADD) err = xfrm_state_add(x); else err = xfrm_state_update(x); xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); goto out; } if (hdr->sadb_msg_type == SADB_ADD) c.event = XFRM_MSG_NEWSA; else c.event = XFRM_MSG_UPDSA; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; km_state_notify(x, &c); out: xfrm_state_put(x); return err; } static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; struct km_event c; int err; if (!ext_hdrs[SADB_EXT_SA-1] || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; if ((err = security_xfrm_state_delete(x))) goto out; if (xfrm_state_kern(x)) { err = -EPERM; goto out; } err = xfrm_state_delete(x); if (err < 0) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; } static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); __u8 proto; struct sk_buff *out_skb; struct sadb_msg *out_hdr; struct xfrm_state *x; if (!ext_hdrs[SADB_EXT_SA-1] || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; out_skb = pfkey_xfrm_state2msg(x); proto = x->id.proto; xfrm_state_put(x); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = SADB_GET; out_hdr->sadb_msg_satype = pfkey_proto2satype(proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); return 0; } static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, gfp_t allocation) { struct sk_buff *skb; struct sadb_msg *hdr; int len, auth_len, enc_len, i; auth_len = xfrm_count_pfkey_auth_supported(); if (auth_len) { auth_len *= sizeof(struct sadb_alg); auth_len += sizeof(struct sadb_supported); } enc_len = xfrm_count_pfkey_enc_supported(); if (enc_len) { enc_len *= sizeof(struct sadb_alg); enc_len += sizeof(struct sadb_supported); } len = enc_len + auth_len + sizeof(struct sadb_msg); skb = alloc_skb(len + 16, allocation); if (!skb) goto out_put_algs; hdr = skb_put(skb, sizeof(*hdr)); pfkey_hdr_dup(hdr, orig); hdr->sadb_msg_errno = 0; hdr->sadb_msg_len = len / sizeof(uint64_t); if (auth_len) { struct sadb_supported *sp; struct sadb_alg *ap; sp = skb_put(skb, auth_len); ap = (struct sadb_alg *) (sp + 1); sp->sadb_supported_len = auth_len / sizeof(uint64_t); sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; for (i = 0; ; i++) { struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg->available) *ap++ = aalg->desc; } } if (enc_len) { struct sadb_supported *sp; struct sadb_alg *ap; sp = skb_put(skb, enc_len); ap = (struct sadb_alg *) (sp + 1); sp->sadb_supported_len = enc_len / sizeof(uint64_t); sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT; for (i = 0; ; i++) { struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (ealg->available) *ap++ = ealg->desc; } } out_put_algs: return skb; } static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); struct sk_buff *supp_skb; if (hdr->sadb_msg_satype > SADB_SATYPE_MAX) return -EINVAL; if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) { if (pfk->registered&(1<<hdr->sadb_msg_satype)) return -EEXIST; pfk->registered |= (1<<hdr->sadb_msg_satype); } mutex_lock(&pfkey_mutex); xfrm_probe_algs(); supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); mutex_unlock(&pfkey_mutex); if (!supp_skb) { if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) pfk->registered &= ~(1<<hdr->sadb_msg_satype); return -ENOBUFS; } pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, sock_net(sk)); return 0; } static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) { struct sk_buff *skb; struct sadb_msg *hdr; skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb) return -ENOBUFS; hdr = skb_put_data(skb, ihdr, sizeof(struct sadb_msg)); hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); } static int key_notify_sa_flush(const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb) return -ENOBUFS; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto); hdr->sadb_msg_type = SADB_FLUSH; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned int proto; struct km_event c; int err, err2; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; err = xfrm_state_flush(net, proto, true, false); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - go quietly */ err = 0; return err ? err : err2; } c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; c.net = net; km_state_notify(NULL, &c); return 0; } static int dump_sa(struct xfrm_state *x, int count, void *ptr) { struct pfkey_sock *pfk = ptr; struct sk_buff *out_skb; struct sadb_msg *out_hdr; if (!pfkey_can_dump(&pfk->sk)) return -ENOBUFS; out_skb = pfkey_xfrm_state2msg(x); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = pfk->dump.msg_version; out_hdr->sadb_msg_type = SADB_DUMP; out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; } static int pfkey_dump_sa(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); return xfrm_state_walk(net, &pfk->dump.u.state, dump_sa, (void *) pfk); } static void pfkey_dump_sa_done(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); xfrm_state_walk_done(&pfk->dump.u.state, net); } static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { u8 proto; struct xfrm_address_filter *filter = NULL; struct pfkey_sock *pfk = pfkey_sk(sk); mutex_lock(&pfk->dump_lock); if (pfk->dump.dump != NULL) { mutex_unlock(&pfk->dump_lock); return -EBUSY; } proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) { mutex_unlock(&pfk->dump_lock); return -EINVAL; } if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; if ((xfilter->sadb_x_filter_splen > (sizeof(xfrm_address_t) << 3)) || (xfilter->sadb_x_filter_dplen > (sizeof(xfrm_address_t) << 3))) { mutex_unlock(&pfk->dump_lock); return -EINVAL; } filter = kmalloc(sizeof(*filter), GFP_KERNEL); if (filter == NULL) { mutex_unlock(&pfk->dump_lock); return -ENOMEM; } memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr, sizeof(xfrm_address_t)); memcpy(&filter->daddr, &xfilter->sadb_x_filter_daddr, sizeof(xfrm_address_t)); filter->family = xfilter->sadb_x_filter_family; filter->splen = xfilter->sadb_x_filter_splen; filter->dplen = xfilter->sadb_x_filter_dplen; } pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto, filter); mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); int satype = hdr->sadb_msg_satype; bool reset_errno = false; if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) { reset_errno = true; if (satype != 0 && satype != 1) return -EINVAL; pfk->promisc = satype; } if (reset_errno && skb_cloned(skb)) skb = skb_copy(skb, GFP_KERNEL); else skb = skb_clone(skb, GFP_KERNEL); if (reset_errno && skb) { struct sadb_msg *new_hdr = (struct sadb_msg *) skb->data; new_hdr->sadb_msg_errno = 0; } pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } static int check_reqid(struct xfrm_policy *xp, int dir, int count, void *ptr) { int i; u32 reqid = *(u32*)ptr; for (i=0; i<xp->xfrm_nr; i++) { if (xp->xfrm_vec[i].reqid == reqid) return -EEXIST; } return 0; } static u32 gen_reqid(struct net *net) { struct xfrm_policy_walk walk; u32 start; int rc; static u32 reqid = IPSEC_MANUAL_REQID_MAX; start = reqid; do { ++reqid; if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); rc = xfrm_policy_walk(net, &walk, check_reqid, (void*)&reqid); xfrm_policy_walk_done(&walk, net); if (rc != -EEXIST) return reqid; } while (reqid != start); return 0; } static int parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_policy *pol, struct sadb_x_ipsecrequest *rq) { struct net *net = xp_net(xp); struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr; int mode; if (xp->xfrm_nr >= XFRM_MAX_DEPTH) return -ELOOP; if (rq->sadb_x_ipsecrequest_mode == 0) return -EINVAL; if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto)) return -EINVAL; t->id.proto = rq->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; t->mode = mode; if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE) { if ((mode == XFRM_MODE_TUNNEL || mode == XFRM_MODE_BEET) && pol->sadb_x_policy_dir == IPSEC_DIR_OUTBOUND) return -EINVAL; t->optional = 1; } else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) { t->reqid = rq->sadb_x_ipsecrequest_reqid; if (t->reqid > IPSEC_MANUAL_REQID_MAX) t->reqid = 0; if (!t->reqid && !(t->reqid = gen_reqid(net))) return -ENOBUFS; } /* addresses present only in tunnel mode */ if (t->mode == XFRM_MODE_TUNNEL) { int err; err = parse_sockaddr_pair( (struct sockaddr *)(rq + 1), rq->sadb_x_ipsecrequest_len - sizeof(*rq), &t->saddr, &t->id.daddr, &t->encap_family); if (err) return err; } else t->encap_family = xp->family; /* No way to set this via kame pfkey */ t->allalgs = 1; xp->xfrm_nr++; return 0; } static int parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) { int err; int len = pol->sadb_x_policy_len*8 - sizeof(struct sadb_x_policy); struct sadb_x_ipsecrequest *rq = (void*)(pol+1); if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy)) return -EINVAL; while (len >= sizeof(*rq)) { if (len < rq->sadb_x_ipsecrequest_len || rq->sadb_x_ipsecrequest_len < sizeof(*rq)) return -EINVAL; if ((err = parse_ipsecrequest(xp, pol, rq)) < 0) return err; len -= rq->sadb_x_ipsecrequest_len; rq = (void*)((u8*)rq + rq->sadb_x_ipsecrequest_len); } return 0; } static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp) { struct xfrm_sec_ctx *xfrm_ctx = xp->security; if (xfrm_ctx) { int len = sizeof(struct sadb_x_sec_ctx); len += xfrm_ctx->ctx_len; return PFKEY_ALIGN8(len); } return 0; } static int pfkey_xfrm_policy2msg_size(const struct xfrm_policy *xp) { const struct xfrm_tmpl *t; int sockaddr_size = pfkey_sockaddr_size(xp->family); int socklen = 0; int i; for (i=0; i<xp->xfrm_nr; i++) { t = xp->xfrm_vec + i; socklen += pfkey_sockaddr_len(t->encap_family); } return sizeof(struct sadb_msg) + (sizeof(struct sadb_lifetime) * 3) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + sizeof(struct sadb_x_policy) + (xp->xfrm_nr * sizeof(struct sadb_x_ipsecrequest)) + (socklen * 2) + pfkey_xfrm_policy2sec_ctx_size(xp); } static struct sk_buff * pfkey_xfrm_policy2msg_prep(const struct xfrm_policy *xp) { struct sk_buff *skb; int size; size = pfkey_xfrm_policy2msg_size(xp); skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return ERR_PTR(-ENOBUFS); return skb; } static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *xp, int dir) { struct sadb_msg *hdr; struct sadb_address *addr; struct sadb_lifetime *lifetime; struct sadb_x_policy *pol; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int i; int size; int sockaddr_size = pfkey_sockaddr_size(xp->family); int socklen = pfkey_sockaddr_len(xp->family); size = pfkey_xfrm_policy2msg_size(xp); /* call should fill header later */ hdr = skb_put(skb, sizeof(struct sadb_msg)); memset(hdr, 0, size); /* XXX do we need this ? */ /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_s; addr->sadb_address_reserved = 0; if (!pfkey_sockaddr_fill(&xp->selector.saddr, xp->selector.sport, (struct sockaddr *) (addr + 1), xp->family)) BUG(); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_d; addr->sadb_address_reserved = 0; pfkey_sockaddr_fill(&xp->selector.daddr, xp->selector.dport, (struct sockaddr *) (addr + 1), xp->family); /* hard time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lifetime->sadb_lifetime_allocations = _X2KEY(xp->lft.hard_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.hard_byte_limit); lifetime->sadb_lifetime_addtime = xp->lft.hard_add_expires_seconds; lifetime->sadb_lifetime_usetime = xp->lft.hard_use_expires_seconds; /* soft time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; lifetime->sadb_lifetime_allocations = _X2KEY(xp->lft.soft_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.soft_byte_limit); lifetime->sadb_lifetime_addtime = xp->lft.soft_add_expires_seconds; lifetime->sadb_lifetime_usetime = xp->lft.soft_use_expires_seconds; /* current time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; lifetime->sadb_lifetime_allocations = xp->curlft.packets; lifetime->sadb_lifetime_bytes = xp->curlft.bytes; lifetime->sadb_lifetime_addtime = xp->curlft.add_time; lifetime->sadb_lifetime_usetime = xp->curlft.use_time; pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_DISCARD; if (xp->action == XFRM_POLICY_ALLOW) { if (xp->xfrm_nr) pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; else pol->sadb_x_policy_type = IPSEC_POLICY_NONE; } pol->sadb_x_policy_dir = dir+1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; for (i=0; i<xp->xfrm_nr; i++) { const struct xfrm_tmpl *t = xp->xfrm_vec + i; struct sadb_x_ipsecrequest *rq; int req_size; int mode; req_size = sizeof(struct sadb_x_ipsecrequest); if (t->mode == XFRM_MODE_TUNNEL) { socklen = pfkey_sockaddr_len(t->encap_family); req_size += socklen * 2; } else { size -= 2*socklen; } rq = skb_put(skb, req_size); pol->sadb_x_policy_len += req_size/8; memset(rq, 0, sizeof(*rq)); rq->sadb_x_ipsecrequest_len = req_size; rq->sadb_x_ipsecrequest_proto = t->id.proto; if ((mode = pfkey_mode_from_xfrm(t->mode)) < 0) return -EINVAL; rq->sadb_x_ipsecrequest_mode = mode; rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_REQUIRE; if (t->reqid) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_UNIQUE; if (t->optional) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; if (t->mode == XFRM_MODE_TUNNEL) { u8 *sa = (void *)(rq + 1); pfkey_sockaddr_fill(&t->saddr, 0, (struct sockaddr *)sa, t->encap_family); pfkey_sockaddr_fill(&t->id.daddr, 0, (struct sockaddr *) (sa + socklen), t->encap_family); } } /* security context */ if ((xfrm_ctx = xp->security)) { int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp); sec_ctx = skb_put(skb, ctx_size); sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_reserved = refcount_read(&xp->refcnt); return 0; } static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; int err; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); return err; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = PF_KEY_V2; if (c->data.byid && c->event == XFRM_MSG_DELPOLICY) out_hdr->sadb_msg_type = SADB_X_SPDDELETE2; else out_hdr->sadb_msg_type = event2poltype(c->event); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; out_hdr->sadb_msg_pid = c->portid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; } static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err = 0; struct sadb_lifetime *lifetime; struct sadb_address *sa; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct km_event c; struct sadb_x_sec_ctx *sec_ctx; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || !ext_hdrs[SADB_X_EXT_POLICY-1]) return -EINVAL; pol = ext_hdrs[SADB_X_EXT_POLICY-1]; if (pol->sadb_x_policy_type > IPSEC_POLICY_IPSEC) return -EINVAL; if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) return -EINVAL; xp = xfrm_policy_alloc(net, GFP_KERNEL); if (xp == NULL) return -ENOBUFS; xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ? XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW); xp->priority = pol->sadb_x_policy_priority; sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr); xp->selector.family = xp->family; xp->selector.prefixlen_s = sa->sadb_address_prefixlen; xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); xp->selector.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.sport) xp->selector.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr); xp->selector.prefixlen_d = sa->sadb_address_prefixlen; /* Amusing, we set this twice. KAME apps appear to set same value * in both addresses. */ xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); xp->selector.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.dport) xp->selector.dport_mask = htons(0xffff); sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) { err = -ENOBUFS; goto out; } err = security_xfrm_policy_alloc(&xp->security, uctx, GFP_KERNEL); kfree(uctx); if (err) goto out; } xp->lft.soft_byte_limit = XFRM_INF; xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD-1]) != NULL) { xp->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); xp->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); xp->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; xp->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]) != NULL) { xp->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); xp->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); xp->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; xp->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && (err = parse_ipsecrequests(xp, pol)) < 0) goto out; err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) goto out; if (hdr->sadb_msg_type == SADB_X_SPDUPDATE) c.event = XFRM_MSG_UPDPOLICY; else c.event = XFRM_MSG_NEWPOLICY; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); xfrm_pol_put(xp); return 0; out: xp->walk.dead = 1; xfrm_policy_destroy(xp); return err; } static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err; struct sadb_address *sa; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct xfrm_selector sel; struct km_event c; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *pol_ctx = NULL; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || !ext_hdrs[SADB_X_EXT_POLICY-1]) return -EINVAL; pol = ext_hdrs[SADB_X_EXT_POLICY-1]; if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) return -EINVAL; memset(&sel, 0, sizeof(sel)); sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); sel.prefixlen_s = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.sport) sel.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.dport) sel.dport_mask = htons(0xffff); sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) return -ENOMEM; err = security_xfrm_policy_alloc(&pol_ctx, uctx, GFP_KERNEL); kfree(uctx); if (err) return err; } xp = xfrm_policy_bysel_ctx(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); if (xp == NULL) return -ENOENT; xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.data.byid = 0; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); out: xfrm_pol_put(xp); return err; } static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struct sadb_msg *hdr, int dir) { int err; struct sk_buff *out_skb; struct sadb_msg *out_hdr; err = 0; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) { err = PTR_ERR(out_skb); goto out; } err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); goto out; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = hdr->sadb_msg_type; out_hdr->sadb_msg_satype = 0; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); err = 0; out: return err; } static int pfkey_sockaddr_pair_size(sa_family_t family) { return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); } static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family) { int af, socklen; if (ext_len < 2 || ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; af = pfkey_sockaddr_extract(sa, saddr); if (!af) return -EINVAL; socklen = pfkey_sockaddr_len(af); if (pfkey_sockaddr_extract((struct sockaddr *) (((u8 *)sa) + socklen), daddr) != af) return -EINVAL; *family = af; return 0; } #ifdef CONFIG_NET_KEY_MIGRATE static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, struct xfrm_migrate *m) { int err; struct sadb_x_ipsecrequest *rq2; int mode; if (len < sizeof(*rq1) || len < rq1->sadb_x_ipsecrequest_len || rq1->sadb_x_ipsecrequest_len < sizeof(*rq1)) return -EINVAL; /* old endoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), rq1->sadb_x_ipsecrequest_len - sizeof(*rq1), &m->old_saddr, &m->old_daddr, &m->old_family); if (err) return err; rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len); len -= rq1->sadb_x_ipsecrequest_len; if (len <= sizeof(*rq2) || len < rq2->sadb_x_ipsecrequest_len || rq2->sadb_x_ipsecrequest_len < sizeof(*rq2)) return -EINVAL; /* new endpoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), rq2->sadb_x_ipsecrequest_len - sizeof(*rq2), &m->new_saddr, &m->new_daddr, &m->new_family); if (err) return err; if (rq1->sadb_x_ipsecrequest_proto != rq2->sadb_x_ipsecrequest_proto || rq1->sadb_x_ipsecrequest_mode != rq2->sadb_x_ipsecrequest_mode || rq1->sadb_x_ipsecrequest_reqid != rq2->sadb_x_ipsecrequest_reqid) return -EINVAL; m->proto = rq1->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq1->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; m->mode = mode; m->reqid = rq1->sadb_x_ipsecrequest_reqid; return ((int)(rq1->sadb_x_ipsecrequest_len + rq2->sadb_x_ipsecrequest_len)); } static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { int i, len, ret, err = -EINVAL; u8 dir; struct sadb_address *sa; struct sadb_x_kmaddress *kma; struct sadb_x_policy *pol; struct sadb_x_ipsecrequest *rq; struct xfrm_selector sel; struct xfrm_migrate m[XFRM_MAX_DEPTH]; struct xfrm_kmaddress k; struct net *net = sock_net(sk); if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || !ext_hdrs[SADB_X_EXT_POLICY - 1]) { err = -EINVAL; goto out; } kma = ext_hdrs[SADB_X_EXT_KMADDRESS - 1]; pol = ext_hdrs[SADB_X_EXT_POLICY - 1]; if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) { err = -EINVAL; goto out; } if (kma) { /* convert sadb_x_kmaddress to xfrm_kmaddress */ k.reserved = kma->sadb_x_kmaddress_reserved; ret = parse_sockaddr_pair((struct sockaddr *)(kma + 1), 8*(kma->sadb_x_kmaddress_len) - sizeof(*kma), &k.local, &k.remote, &k.family); if (ret < 0) { err = ret; goto out; } } dir = pol->sadb_x_policy_dir - 1; memset(&sel, 0, sizeof(sel)); /* set source address info of selector */ sa = ext_hdrs[SADB_EXT_ADDRESS_SRC - 1]; sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); sel.prefixlen_s = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.sport = ((struct sockaddr_in *)(sa + 1))->sin_port; if (sel.sport) sel.sport_mask = htons(0xffff); /* set destination address info of selector */ sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.dport = ((struct sockaddr_in *)(sa + 1))->sin_port; if (sel.dport) sel.dport_mask = htons(0xffff); rq = (struct sadb_x_ipsecrequest *)(pol + 1); /* extract ipsecrequests */ i = 0; len = pol->sadb_x_policy_len * 8 - sizeof(struct sadb_x_policy); while (len > 0 && i < XFRM_MAX_DEPTH) { ret = ipsecrequests_to_migrate(rq, len, &m[i]); if (ret < 0) { err = ret; goto out; } else { rq = (struct sadb_x_ipsecrequest *)((u8 *)rq + ret); len -= ret; i++; } } if (!i || len > 0) { err = -EINVAL; goto out; } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, kma ? &k : NULL, net, NULL, 0, NULL, NULL); out: return err; } #else static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { return -ENOPROTOOPT; } #endif static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned int dir; int err = 0, delete; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct km_event c; if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL) return -EINVAL; dir = xfrm_policy_id2dir(pol->sadb_x_policy_id); if (dir >= XFRM_POLICY_MAX) return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); xp = xfrm_policy_byid(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; if (delete) { xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.data.byid = 1; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, dir, &c); } else { err = key_pol_get_resp(sk, xp, hdr, dir); } out: xfrm_pol_put(xp); return err; } static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct pfkey_sock *pfk = ptr; struct sk_buff *out_skb; struct sadb_msg *out_hdr; int err; if (!pfkey_can_dump(&pfk->sk)) return -ENOBUFS; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); return err; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = pfk->dump.msg_version; out_hdr->sadb_msg_type = SADB_X_SPDDUMP; out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; } static int pfkey_dump_sp(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); return xfrm_policy_walk(net, &pfk->dump.u.policy, dump_sp, (void *) pfk); } static void pfkey_dump_sp_done(struct pfkey_sock *pfk) { struct net *net = sock_net((struct sock *)pfk); xfrm_policy_walk_done(&pfk->dump.u.policy, net); } static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); mutex_lock(&pfk->dump_lock); if (pfk->dump.dump != NULL) { mutex_unlock(&pfk->dump_lock); return -EBUSY; } pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } static int key_notify_policy_flush(const struct km_event *c) { struct sk_buff *skb_out; struct sadb_msg *hdr; skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb_out) return -ENOBUFS; hdr = skb_put(skb_out, sizeof(struct sadb_msg)); hdr->sadb_msg_type = SADB_X_SPDFLUSH; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct km_event c; int err, err2; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ return 0; return err; } c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.portid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; c.net = net; km_policy_notify(NULL, 0, &c); return 0; } typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs); static const pfkey_handler pfkey_funcs[SADB_MAX + 1] = { [SADB_RESERVED] = pfkey_reserved, [SADB_GETSPI] = pfkey_getspi, [SADB_UPDATE] = pfkey_add, [SADB_ADD] = pfkey_add, [SADB_DELETE] = pfkey_delete, [SADB_GET] = pfkey_get, [SADB_ACQUIRE] = pfkey_acquire, [SADB_REGISTER] = pfkey_register, [SADB_EXPIRE] = NULL, [SADB_FLUSH] = pfkey_flush, [SADB_DUMP] = pfkey_dump, [SADB_X_PROMISC] = pfkey_promisc, [SADB_X_PCHANGE] = NULL, [SADB_X_SPDUPDATE] = pfkey_spdadd, [SADB_X_SPDADD] = pfkey_spdadd, [SADB_X_SPDDELETE] = pfkey_spddelete, [SADB_X_SPDGET] = pfkey_spdget, [SADB_X_SPDACQUIRE] = NULL, [SADB_X_SPDDUMP] = pfkey_spddump, [SADB_X_SPDFLUSH] = pfkey_spdflush, [SADB_X_SPDSETIDX] = pfkey_spdadd, [SADB_X_SPDDELETE2] = pfkey_spdget, [SADB_X_MIGRATE] = pfkey_migrate, }; static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr) { void *ext_hdrs[SADB_EXT_MAX]; int err; /* Non-zero return value of pfkey_broadcast() does not always signal * an error and even on an actual error we may still want to process * the message so rather ignore the return value. */ pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); if (!err) { err = -EOPNOTSUPP; if (pfkey_funcs[hdr->sadb_msg_type]) err = pfkey_funcs[hdr->sadb_msg_type](sk, skb, hdr, ext_hdrs); } return err; } static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp) { struct sadb_msg *hdr = NULL; if (skb->len < sizeof(*hdr)) { *errp = -EMSGSIZE; } else { hdr = (struct sadb_msg *) skb->data; if (hdr->sadb_msg_version != PF_KEY_V2 || hdr->sadb_msg_reserved != 0 || (hdr->sadb_msg_type <= SADB_RESERVED || hdr->sadb_msg_type > SADB_MAX)) { hdr = NULL; *errp = -EINVAL; } else if (hdr->sadb_msg_len != (skb->len / sizeof(uint64_t)) || hdr->sadb_msg_len < (sizeof(struct sadb_msg) / sizeof(uint64_t))) { hdr = NULL; *errp = -EMSGSIZE; } else { *errp = 0; } } return hdr; } static inline int aalg_tmpl_set(const struct xfrm_tmpl *t, const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; if (id >= sizeof(t->aalgos) * 8) return 0; return (t->aalgos >> id) & 1; } static inline int ealg_tmpl_set(const struct xfrm_tmpl *t, const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; if (id >= sizeof(t->ealgos) * 8) return 0; return (t->ealgos >> id) & 1; } static int count_ah_combs(const struct xfrm_tmpl *t) { int i, sz = 0; for (i = 0; ; i++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); } static int count_esp_combs(const struct xfrm_tmpl *t) { int i, k, sz = 0; for (i = 0; ; i++) { const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (!(ealg_tmpl_set(t, ealg))) continue; for (k = 1; ; k++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } return sz + sizeof(struct sadb_prop); } static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int sz = 0; int i; p = skb_put(skb, sizeof(struct sadb_prop)); p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i = 0; ; i++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg) && aalg->available) { struct sadb_comb *c; c = skb_put_zero(skb, sizeof(struct sadb_comb)); p->sadb_prop_len += sizeof(struct sadb_comb)/8; c->sadb_comb_auth = aalg->desc.sadb_alg_id; c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits; c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits; c->sadb_comb_hard_addtime = 24*60*60; c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; sz += sizeof(*c); } } return sz + sizeof(*p); } static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int sz = 0; int i, k; p = skb_put(skb, sizeof(struct sadb_prop)); p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i=0; ; i++) { const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; for (k = 1; ; k++) { struct sadb_comb *c; const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (!(aalg_tmpl_set(t, aalg) && aalg->available)) continue; c = skb_put(skb, sizeof(struct sadb_comb)); memset(c, 0, sizeof(*c)); p->sadb_prop_len += sizeof(struct sadb_comb)/8; c->sadb_comb_auth = aalg->desc.sadb_alg_id; c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits; c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits; c->sadb_comb_encrypt = ealg->desc.sadb_alg_id; c->sadb_comb_encrypt_minbits = ealg->desc.sadb_alg_minbits; c->sadb_comb_encrypt_maxbits = ealg->desc.sadb_alg_maxbits; c->sadb_comb_hard_addtime = 24*60*60; c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; sz += sizeof(*c); } } return sz + sizeof(*p); } static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) { return 0; } static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; int hard; int hsc; hard = c->data.hard; if (hard) hsc = 2; else hsc = 1; out_skb = pfkey_xfrm_state2msg_expire(x, hsc); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = PF_KEY_V2; out_hdr->sadb_msg_type = SADB_EXPIRE; out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = 0; out_hdr->sadb_msg_pid = 0; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); return 0; } static int pfkey_send_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = x ? xs_net(x) : c->net; struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); if (atomic_read(&net_pfkey->socks_nr) == 0) return 0; switch (c->event) { case XFRM_MSG_EXPIRE: return key_notify_sa_expire(x, c); case XFRM_MSG_DELSA: case XFRM_MSG_NEWSA: case XFRM_MSG_UPDSA: return key_notify_sa(x, c); case XFRM_MSG_FLUSHSA: return key_notify_sa_flush(c); case XFRM_MSG_NEWAE: /* not yet supported */ break; default: pr_err("pfkey: Unknown SA event %d\n", c->event); break; } return 0; } static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) return 0; switch (c->event) { case XFRM_MSG_POLEXPIRE: return key_notify_policy_expire(xp, c); case XFRM_MSG_DELPOLICY: case XFRM_MSG_NEWPOLICY: case XFRM_MSG_UPDPOLICY: return key_notify_policy(xp, dir, c); case XFRM_MSG_FLUSHPOLICY: if (c->data.type != XFRM_POLICY_TYPE_MAIN) break; return key_notify_policy_flush(c); default: pr_err("pfkey: Unknown policy event %d\n", c->event); break; } return 0; } static u32 get_acqseq(void) { u32 res; static atomic_t acqseq; do { res = atomic_inc_return(&acqseq); } while (!res); return res; } static bool pfkey_is_alive(const struct km_event *c) { struct netns_pfkey *net_pfkey = net_generic(c->net, pfkey_net_id); struct sock *sk; bool is_alive = false; rcu_read_lock(); sk_for_each_rcu(sk, &net_pfkey->table) { if (pfkey_sk(sk)->registered) { is_alive = true; break; } } rcu_read_unlock(); return is_alive; } static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_address *addr; struct sadb_x_policy *pol; int sockaddr_size; int size; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; int alg_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return -EINVAL; size = sizeof(struct sadb_msg) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + sizeof(struct sadb_x_policy); if (x->id.proto == IPPROTO_AH) alg_size = count_ah_combs(t); else if (x->id.proto == IPPROTO_ESP) alg_size = count_esp_combs(t); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_ACQUIRE; hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = x->km.seq = get_acqseq(); hdr->sadb_msg_pid = 0; /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->id.daddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ alg_size = 0; if (x->id.proto == IPPROTO_AH) alg_size = dump_ah_combs(skb, t); else if (x->id.proto == IPPROTO_ESP) alg_size = dump_esp_combs(skb, t); hdr->sadb_msg_len += alg_size / 8; /* security context */ if (xfrm_ctx) { sec_ctx = skb_put(skb, sizeof(struct sadb_x_sec_ctx) + ctx_size); sec_ctx->sadb_x_sec_len = (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct net *net = sock_net(sk); struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; switch (sk->sk_family) { case AF_INET: if (opt != IP_IPSEC_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: if (opt != IPV6_IPSEC_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; #endif default: *dir = -EINVAL; return NULL; } *dir = -EINVAL; if (len < sizeof(struct sadb_x_policy) || pol->sadb_x_policy_len*8 > len || pol->sadb_x_policy_type > IPSEC_POLICY_BYPASS || (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir > IPSEC_DIR_OUTBOUND)) return NULL; xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; } xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ? XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW); xp->lft.soft_byte_limit = XFRM_INF; xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; xp->family = sk->sk_family; xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && (*dir = parse_ipsecrequests(xp, pol)) < 0) goto out; /* security context too */ if (len >= (pol->sadb_x_policy_len*8 + sizeof(struct sadb_x_sec_ctx))) { char *p = (char *)pol; struct xfrm_user_sec_ctx *uctx; p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + sec_ctx->sadb_x_sec_len*8) { *dir = -EINVAL; goto out; } if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_ATOMIC); *dir = security_xfrm_policy_alloc(&xp->security, uctx, GFP_ATOMIC); kfree(uctx); if (*dir) goto out; } *dir = pol->sadb_x_policy_dir-1; return xp; out: xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_sa *sa; struct sadb_address *addr; struct sadb_x_nat_t_port *n_port; int sockaddr_size; int size; __u8 satype = (x->id.proto == IPPROTO_ESP ? SADB_SATYPE_ESP : 0); struct xfrm_encap_tmpl *natt = NULL; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return -EINVAL; if (!satype) return -EINVAL; if (!x->encap) return -EINVAL; natt = x->encap; /* Build an SADB_X_NAT_T_NEW_MAPPING message: * * HDR | SA | ADDRESS_SRC (old addr) | NAT_T_SPORT (old port) | * ADDRESS_DST (new addr) | NAT_T_DPORT (new port) */ size = sizeof(struct sadb_msg) + sizeof(struct sadb_sa) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + (sizeof(struct sadb_x_nat_t_port) * 2); skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_X_NAT_T_NEW_MAPPING; hdr->sadb_msg_satype = satype; hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = x->km.seq; hdr->sadb_msg_pid = 0; /* SA */ sa = skb_put(skb, sizeof(struct sadb_sa)); sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t); sa->sadb_sa_exttype = SADB_EXT_SA; sa->sadb_sa_spi = x->id.spi; sa->sadb_sa_replay = 0; sa->sadb_sa_state = 0; sa->sadb_sa_auth = 0; sa->sadb_sa_encrypt = 0; sa->sadb_sa_flags = 0; /* ADDRESS_SRC (old addr) */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_SPORT (old port) */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT; n_port->sadb_x_nat_t_port_port = natt->encap_sport; n_port->sadb_x_nat_t_port_reserved = 0; /* ADDRESS_DST (new addr) */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(ipaddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_DPORT (new port) */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT; n_port->sadb_x_nat_t_port_port = sport; n_port->sadb_x_nat_t_port_reserved = 0; return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } #ifdef CONFIG_NET_KEY_MIGRATE static int set_sadb_address(struct sk_buff *skb, int sasize, int type, const struct xfrm_selector *sel) { struct sadb_address *addr; addr = skb_put(skb, sizeof(struct sadb_address) + sasize); addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8; addr->sadb_address_exttype = type; addr->sadb_address_proto = sel->proto; addr->sadb_address_reserved = 0; switch (type) { case SADB_EXT_ADDRESS_SRC: addr->sadb_address_prefixlen = sel->prefixlen_s; pfkey_sockaddr_fill(&sel->saddr, 0, (struct sockaddr *)(addr + 1), sel->family); break; case SADB_EXT_ADDRESS_DST: addr->sadb_address_prefixlen = sel->prefixlen_d; pfkey_sockaddr_fill(&sel->daddr, 0, (struct sockaddr *)(addr + 1), sel->family); break; default: return -EINVAL; } return 0; } static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress *k) { struct sadb_x_kmaddress *kma; u8 *sa; int family = k->family; int socklen = pfkey_sockaddr_len(family); int size_req; size_req = (sizeof(struct sadb_x_kmaddress) + pfkey_sockaddr_pair_size(family)); kma = skb_put_zero(skb, size_req); kma->sadb_x_kmaddress_len = size_req / 8; kma->sadb_x_kmaddress_exttype = SADB_X_EXT_KMADDRESS; kma->sadb_x_kmaddress_reserved = k->reserved; sa = (u8 *)(kma + 1); if (!pfkey_sockaddr_fill(&k->local, 0, (struct sockaddr *)sa, family) || !pfkey_sockaddr_fill(&k->remote, 0, (struct sockaddr *)(sa+socklen), family)) return -EINVAL; return 0; } static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, uint32_t reqid, uint8_t family, const xfrm_address_t *src, const xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; u8 *sa; int socklen = pfkey_sockaddr_len(family); int size_req; size_req = sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(family); rq = skb_put_zero(skb, size_req); rq->sadb_x_ipsecrequest_len = size_req; rq->sadb_x_ipsecrequest_proto = proto; rq->sadb_x_ipsecrequest_mode = mode; rq->sadb_x_ipsecrequest_level = level; rq->sadb_x_ipsecrequest_reqid = reqid; sa = (u8 *) (rq + 1); if (!pfkey_sockaddr_fill(src, 0, (struct sockaddr *)sa, family) || !pfkey_sockaddr_fill(dst, 0, (struct sockaddr *)(sa + socklen), family)) return -EINVAL; return 0; } #endif #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap) { int i; int sasize_sel; int size = 0; int size_pol = 0; struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_x_policy *pol; const struct xfrm_migrate *mp; if (type != XFRM_POLICY_TYPE_MAIN) return 0; if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH) return -EINVAL; if (k != NULL) { /* addresses for KM */ size += PFKEY_ALIGN8(sizeof(struct sadb_x_kmaddress) + pfkey_sockaddr_pair_size(k->family)); } /* selector */ sasize_sel = pfkey_sockaddr_size(sel->family); if (!sasize_sel) return -EINVAL; size += (sizeof(struct sadb_address) + sasize_sel) * 2; /* policy info */ size_pol += sizeof(struct sadb_x_policy); /* ipsecrequests */ for (i = 0, mp = m; i < num_bundles; i++, mp++) { /* old locator pair */ size_pol += sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(mp->old_family); /* new locator pair */ size_pol += sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(mp->new_family); } size += sizeof(struct sadb_msg) + size_pol; /* alloc buffer */ skb = alloc_skb(size, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_X_MIGRATE; hdr->sadb_msg_satype = pfkey_proto2satype(m->proto); hdr->sadb_msg_len = size / 8; hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = 0; hdr->sadb_msg_pid = 0; /* Addresses to be used by KM for negotiation, if ext is available */ if (k != NULL && (set_sadb_kmaddress(skb, k) < 0)) goto err; /* selector src */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); /* selector dst */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_DST, sel); /* policy information */ pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = size_pol / 8; pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = dir + 1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = 0; pol->sadb_x_policy_priority = 0; for (i = 0, mp = m; i < num_bundles; i++, mp++) { /* old ipsecrequest */ int mode = pfkey_mode_from_xfrm(mp->mode); if (mode < 0) goto err; if (set_ipsecrequest(skb, mp->proto, mode, (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), mp->reqid, mp->old_family, &mp->old_saddr, &mp->old_daddr) < 0) goto err; /* new ipsecrequest */ if (set_ipsecrequest(skb, mp->proto, mode, (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), mp->reqid, mp->new_family, &mp->new_saddr, &mp->new_daddr) < 0) goto err; } /* broadcast migrate message to sockets */ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); return 0; err: kfree_skb(skb); return -EINVAL; } #else static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap) { return -ENOPROTOOPT; } #endif static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sk_buff *skb = NULL; struct sadb_msg *hdr = NULL; int err; struct net *net = sock_net(sk); err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) goto out; err = -EMSGSIZE; if ((unsigned int)len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; skb = alloc_skb(len, GFP_KERNEL); if (skb == NULL) goto out; err = -EFAULT; if (memcpy_from_msg(skb_put(skb,len), msg, len)) goto out; hdr = pfkey_get_base_msg(skb, &err); if (!hdr) goto out; mutex_lock(&net->xfrm.xfrm_cfg_mutex); err = pfkey_process(sk, skb, hdr); mutex_unlock(&net->xfrm.xfrm_cfg_mutex); out: if (err && hdr && pfkey_error(hdr, err, sk) == 0) err = 0; kfree_skb(skb); return err ? : len; } static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct pfkey_sock *pfk = pfkey_sk(sk); struct sk_buff *skb; int copied, err; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; copied = skb->len; if (copied > len) { msg->msg_flags |= MSG_TRUNC; copied = len; } skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free; sock_recv_cmsgs(msg, sk, skb); err = (flags & MSG_TRUNC) ? skb->len : copied; if (pfk->dump.dump != NULL && 3 * atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) pfkey_do_dump(pfk); out_free: skb_free_datagram(sk, skb); out: return err; } static const struct proto_ops pfkey_ops = { .family = PF_KEY, .owner = THIS_MODULE, /* Operations that make no sense on pfkey sockets. */ .bind = sock_no_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .mmap = sock_no_mmap, /* Now the operations that really occur. */ .release = pfkey_release, .poll = datagram_poll, .sendmsg = pfkey_sendmsg, .recvmsg = pfkey_recvmsg, }; static const struct net_proto_family pfkey_family_ops = { .family = PF_KEY, .create = pfkey_create, .owner = THIS_MODULE, }; #ifdef CONFIG_PROC_FS static int pfkey_seq_show(struct seq_file *f, void *v) { struct sock *s = sk_entry(v); if (v == SEQ_START_TOKEN) seq_printf(f ,"sk RefCnt Rmem Wmem User Inode\n"); else seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6lu\n", s, refcount_read(&s->sk_refcnt), sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), from_kuid_munged(seq_user_ns(f), sock_i_uid(s)), sock_i_ino(s) ); return 0; } static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) __acquires(rcu) { struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); rcu_read_lock(); return seq_hlist_start_head_rcu(&net_pfkey->table, *ppos); } static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos) { struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); return seq_hlist_next_rcu(v, &net_pfkey->table, ppos); } static void pfkey_seq_stop(struct seq_file *f, void *v) __releases(rcu) { rcu_read_unlock(); } static const struct seq_operations pfkey_seq_ops = { .start = pfkey_seq_start, .next = pfkey_seq_next, .stop = pfkey_seq_stop, .show = pfkey_seq_show, }; static int __net_init pfkey_init_proc(struct net *net) { struct proc_dir_entry *e; e = proc_create_net("pfkey", 0, net->proc_net, &pfkey_seq_ops, sizeof(struct seq_net_private)); if (e == NULL) return -ENOMEM; return 0; } static void __net_exit pfkey_exit_proc(struct net *net) { remove_proc_entry("pfkey", net->proc_net); } #else static inline int pfkey_init_proc(struct net *net) { return 0; } static inline void pfkey_exit_proc(struct net *net) { } #endif static struct xfrm_mgr pfkeyv2_mgr = { .notify = pfkey_send_notify, .acquire = pfkey_send_acquire, .compile_policy = pfkey_compile_policy, .new_mapping = pfkey_send_new_mapping, .notify_policy = pfkey_send_policy_notify, .migrate = pfkey_send_migrate, .is_alive = pfkey_is_alive, }; static int __net_init pfkey_net_init(struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); int rv; INIT_HLIST_HEAD(&net_pfkey->table); atomic_set(&net_pfkey->socks_nr, 0); rv = pfkey_init_proc(net); return rv; } static void __net_exit pfkey_net_exit(struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); pfkey_exit_proc(net); WARN_ON(!hlist_empty(&net_pfkey->table)); } static struct pernet_operations pfkey_net_ops = { .init = pfkey_net_init, .exit = pfkey_net_exit, .id = &pfkey_net_id, .size = sizeof(struct netns_pfkey), }; static void __exit ipsec_pfkey_exit(void) { xfrm_unregister_km(&pfkeyv2_mgr); sock_unregister(PF_KEY); unregister_pernet_subsys(&pfkey_net_ops); proto_unregister(&key_proto); } static int __init ipsec_pfkey_init(void) { int err = proto_register(&key_proto, 0); if (err != 0) goto out; err = register_pernet_subsys(&pfkey_net_ops); if (err != 0) goto out_unregister_key_proto; err = sock_register(&pfkey_family_ops); if (err != 0) goto out_unregister_pernet; xfrm_register_km(&pfkeyv2_mgr); out: return err; out_unregister_pernet: unregister_pernet_subsys(&pfkey_net_ops); out_unregister_key_proto: proto_unregister(&key_proto); goto out; } module_init(ipsec_pfkey_init); module_exit(ipsec_pfkey_exit); MODULE_DESCRIPTION("PF_KEY socket helpers"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_KEY);
3 3 3 2 3 3 3 3 2 2 2 2 2 2 2 2 3 1 1 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID support for Linux * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2012 Jiri Kosina */ /* */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mm.h> #include <linux/spinlock.h> #include <linux/unaligned.h> #include <asm/byteorder.h> #include <linux/input.h> #include <linux/wait.h> #include <linux/vmalloc.h> #include <linux/sched.h> #include <linux/semaphore.h> #include <linux/hid.h> #include <linux/hiddev.h> #include <linux/hid-debug.h> #include <linux/hidraw.h> #include "hid-ids.h" /* * Version Information */ #define DRIVER_DESC "HID core driver" static int hid_ignore_special_drivers = 0; module_param_named(ignore_special_drivers, hid_ignore_special_drivers, int, 0600); MODULE_PARM_DESC(ignore_special_drivers, "Ignore any special drivers and handle all devices by generic driver"); /* * Convert a signed n-bit integer to signed 32-bit integer. */ static s32 snto32(__u32 value, unsigned int n) { if (!value || !n) return 0; if (n > 32) n = 32; return sign_extend32(value, n - 1); } /* * Convert a signed 32-bit integer to a signed n-bit integer. */ static u32 s32ton(__s32 value, unsigned int n) { s32 a = value >> (n - 1); if (a && a != -1) return value < 0 ? 1 << (n - 1) : (1 << (n - 1)) - 1; return value & ((1 << n) - 1); } /* * Register a new report for a device. */ struct hid_report *hid_register_report(struct hid_device *device, enum hid_report_type type, unsigned int id, unsigned int application) { struct hid_report_enum *report_enum = device->report_enum + type; struct hid_report *report; if (id >= HID_MAX_IDS) return NULL; if (report_enum->report_id_hash[id]) return report_enum->report_id_hash[id]; report = kzalloc(sizeof(struct hid_report), GFP_KERNEL); if (!report) return NULL; if (id != 0) report_enum->numbered = 1; report->id = id; report->type = type; report->size = 0; report->device = device; report->application = application; report_enum->report_id_hash[id] = report; list_add_tail(&report->list, &report_enum->report_list); INIT_LIST_HEAD(&report->field_entry_list); return report; } EXPORT_SYMBOL_GPL(hid_register_report); /* * Register a new field for this report. */ static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages) { struct hid_field *field; if (report->maxfield == HID_MAX_FIELDS) { hid_err(report->device, "too many fields in report\n"); return NULL; } field = kvzalloc((sizeof(struct hid_field) + usages * sizeof(struct hid_usage) + 3 * usages * sizeof(unsigned int)), GFP_KERNEL); if (!field) return NULL; field->index = report->maxfield++; report->field[field->index] = field; field->usage = (struct hid_usage *)(field + 1); field->value = (s32 *)(field->usage + usages); field->new_value = (s32 *)(field->value + usages); field->usages_priorities = (s32 *)(field->new_value + usages); field->report = report; return field; } /* * Open a collection. The type/usage is pushed on the stack. */ static int open_collection(struct hid_parser *parser, unsigned type) { struct hid_collection *collection; unsigned usage; int collection_index; usage = parser->local.usage[0]; if (parser->collection_stack_ptr == parser->collection_stack_size) { unsigned int *collection_stack; unsigned int new_size = parser->collection_stack_size + HID_COLLECTION_STACK_SIZE; collection_stack = krealloc(parser->collection_stack, new_size * sizeof(unsigned int), GFP_KERNEL); if (!collection_stack) return -ENOMEM; parser->collection_stack = collection_stack; parser->collection_stack_size = new_size; } if (parser->device->maxcollection == parser->device->collection_size) { collection = kmalloc( array3_size(sizeof(struct hid_collection), parser->device->collection_size, 2), GFP_KERNEL); if (collection == NULL) { hid_err(parser->device, "failed to reallocate collection array\n"); return -ENOMEM; } memcpy(collection, parser->device->collection, sizeof(struct hid_collection) * parser->device->collection_size); memset(collection + parser->device->collection_size, 0, sizeof(struct hid_collection) * parser->device->collection_size); kfree(parser->device->collection); parser->device->collection = collection; parser->device->collection_size *= 2; } parser->collection_stack[parser->collection_stack_ptr++] = parser->device->maxcollection; collection_index = parser->device->maxcollection++; collection = parser->device->collection + collection_index; collection->type = type; collection->usage = usage; collection->level = parser->collection_stack_ptr - 1; collection->parent_idx = (collection->level == 0) ? -1 : parser->collection_stack[collection->level - 1]; if (type == HID_COLLECTION_APPLICATION) parser->device->maxapplication++; return 0; } /* * Close a collection. */ static int close_collection(struct hid_parser *parser) { if (!parser->collection_stack_ptr) { hid_err(parser->device, "collection stack underflow\n"); return -EINVAL; } parser->collection_stack_ptr--; return 0; } /* * Climb up the stack, search for the specified collection type * and return the usage. */ static unsigned hid_lookup_collection(struct hid_parser *parser, unsigned type) { struct hid_collection *collection = parser->device->collection; int n; for (n = parser->collection_stack_ptr - 1; n >= 0; n--) { unsigned index = parser->collection_stack[n]; if (collection[index].type == type) return collection[index].usage; } return 0; /* we know nothing about this usage type */ } /* * Concatenate usage which defines 16 bits or less with the * currently defined usage page to form a 32 bit usage */ static void complete_usage(struct hid_parser *parser, unsigned int index) { parser->local.usage[index] &= 0xFFFF; parser->local.usage[index] |= (parser->global.usage_page & 0xFFFF) << 16; } /* * Add a usage to the temporary parser table. */ static int hid_add_usage(struct hid_parser *parser, unsigned usage, u8 size) { if (parser->local.usage_index >= HID_MAX_USAGES) { hid_err(parser->device, "usage index exceeded\n"); return -1; } parser->local.usage[parser->local.usage_index] = usage; /* * If Usage item only includes usage id, concatenate it with * currently defined usage page */ if (size <= 2) complete_usage(parser, parser->local.usage_index); parser->local.usage_size[parser->local.usage_index] = size; parser->local.collection_index[parser->local.usage_index] = parser->collection_stack_ptr ? parser->collection_stack[parser->collection_stack_ptr - 1] : 0; parser->local.usage_index++; return 0; } /* * Register a new field for this report. */ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsigned flags) { struct hid_report *report; struct hid_field *field; unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE; unsigned int usages; unsigned int offset; unsigned int i; unsigned int application; application = hid_lookup_collection(parser, HID_COLLECTION_APPLICATION); report = hid_register_report(parser->device, report_type, parser->global.report_id, application); if (!report) { hid_err(parser->device, "hid_register_report failed\n"); return -1; } /* Handle both signed and unsigned cases properly */ if ((parser->global.logical_minimum < 0 && parser->global.logical_maximum < parser->global.logical_minimum) || (parser->global.logical_minimum >= 0 && (__u32)parser->global.logical_maximum < (__u32)parser->global.logical_minimum)) { dbg_hid("logical range invalid 0x%x 0x%x\n", parser->global.logical_minimum, parser->global.logical_maximum); return -1; } offset = report->size; report->size += parser->global.report_size * parser->global.report_count; if (parser->device->ll_driver->max_buffer_size) max_buffer_size = parser->device->ll_driver->max_buffer_size; /* Total size check: Allow for possible report index byte */ if (report->size > (max_buffer_size - 1) << 3) { hid_err(parser->device, "report is too long\n"); return -1; } if (!parser->local.usage_index) /* Ignore padding fields */ return 0; usages = max_t(unsigned, parser->local.usage_index, parser->global.report_count); field = hid_register_field(report, usages); if (!field) return 0; field->physical = hid_lookup_collection(parser, HID_COLLECTION_PHYSICAL); field->logical = hid_lookup_collection(parser, HID_COLLECTION_LOGICAL); field->application = application; for (i = 0; i < usages; i++) { unsigned j = i; /* Duplicate the last usage we parsed if we have excess values */ if (i >= parser->local.usage_index) j = parser->local.usage_index - 1; field->usage[i].hid = parser->local.usage[j]; field->usage[i].collection_index = parser->local.collection_index[j]; field->usage[i].usage_index = i; field->usage[i].resolution_multiplier = 1; } field->maxusage = usages; field->flags = flags; field->report_offset = offset; field->report_type = report_type; field->report_size = parser->global.report_size; field->report_count = parser->global.report_count; field->logical_minimum = parser->global.logical_minimum; field->logical_maximum = parser->global.logical_maximum; field->physical_minimum = parser->global.physical_minimum; field->physical_maximum = parser->global.physical_maximum; field->unit_exponent = parser->global.unit_exponent; field->unit = parser->global.unit; return 0; } /* * Read data value from item. */ static u32 item_udata(struct hid_item *item) { switch (item->size) { case 1: return item->data.u8; case 2: return item->data.u16; case 4: return item->data.u32; } return 0; } static s32 item_sdata(struct hid_item *item) { switch (item->size) { case 1: return item->data.s8; case 2: return item->data.s16; case 4: return item->data.s32; } return 0; } /* * Process a global item. */ static int hid_parser_global(struct hid_parser *parser, struct hid_item *item) { __s32 raw_value; switch (item->tag) { case HID_GLOBAL_ITEM_TAG_PUSH: if (parser->global_stack_ptr == HID_GLOBAL_STACK_SIZE) { hid_err(parser->device, "global environment stack overflow\n"); return -1; } memcpy(parser->global_stack + parser->global_stack_ptr++, &parser->global, sizeof(struct hid_global)); return 0; case HID_GLOBAL_ITEM_TAG_POP: if (!parser->global_stack_ptr) { hid_err(parser->device, "global environment stack underflow\n"); return -1; } memcpy(&parser->global, parser->global_stack + --parser->global_stack_ptr, sizeof(struct hid_global)); return 0; case HID_GLOBAL_ITEM_TAG_USAGE_PAGE: parser->global.usage_page = item_udata(item); return 0; case HID_GLOBAL_ITEM_TAG_LOGICAL_MINIMUM: parser->global.logical_minimum = item_sdata(item); return 0; case HID_GLOBAL_ITEM_TAG_LOGICAL_MAXIMUM: if (parser->global.logical_minimum < 0) parser->global.logical_maximum = item_sdata(item); else parser->global.logical_maximum = item_udata(item); return 0; case HID_GLOBAL_ITEM_TAG_PHYSICAL_MINIMUM: parser->global.physical_minimum = item_sdata(item); return 0; case HID_GLOBAL_ITEM_TAG_PHYSICAL_MAXIMUM: if (parser->global.physical_minimum < 0) parser->global.physical_maximum = item_sdata(item); else parser->global.physical_maximum = item_udata(item); return 0; case HID_GLOBAL_ITEM_TAG_UNIT_EXPONENT: /* Many devices provide unit exponent as a two's complement * nibble due to the common misunderstanding of HID * specification 1.11, 6.2.2.7 Global Items. Attempt to handle * both this and the standard encoding. */ raw_value = item_sdata(item); if (!(raw_value & 0xfffffff0)) parser->global.unit_exponent = snto32(raw_value, 4); else parser->global.unit_exponent = raw_value; return 0; case HID_GLOBAL_ITEM_TAG_UNIT: parser->global.unit = item_udata(item); return 0; case HID_GLOBAL_ITEM_TAG_REPORT_SIZE: parser->global.report_size = item_udata(item); if (parser->global.report_size > 256) { hid_err(parser->device, "invalid report_size %d\n", parser->global.report_size); return -1; } return 0; case HID_GLOBAL_ITEM_TAG_REPORT_COUNT: parser->global.report_count = item_udata(item); if (parser->global.report_count > HID_MAX_USAGES) { hid_err(parser->device, "invalid report_count %d\n", parser->global.report_count); return -1; } return 0; case HID_GLOBAL_ITEM_TAG_REPORT_ID: parser->global.report_id = item_udata(item); if (parser->global.report_id == 0 || parser->global.report_id >= HID_MAX_IDS) { hid_err(parser->device, "report_id %u is invalid\n", parser->global.report_id); return -1; } return 0; default: hid_err(parser->device, "unknown global tag 0x%x\n", item->tag); return -1; } } /* * Process a local item. */ static int hid_parser_local(struct hid_parser *parser, struct hid_item *item) { __u32 data; unsigned n; __u32 count; data = item_udata(item); switch (item->tag) { case HID_LOCAL_ITEM_TAG_DELIMITER: if (data) { /* * We treat items before the first delimiter * as global to all usage sets (branch 0). * In the moment we process only these global * items and the first delimiter set. */ if (parser->local.delimiter_depth != 0) { hid_err(parser->device, "nested delimiters\n"); return -1; } parser->local.delimiter_depth++; parser->local.delimiter_branch++; } else { if (parser->local.delimiter_depth < 1) { hid_err(parser->device, "bogus close delimiter\n"); return -1; } parser->local.delimiter_depth--; } return 0; case HID_LOCAL_ITEM_TAG_USAGE: if (parser->local.delimiter_branch > 1) { dbg_hid("alternative usage ignored\n"); return 0; } return hid_add_usage(parser, data, item->size); case HID_LOCAL_ITEM_TAG_USAGE_MINIMUM: if (parser->local.delimiter_branch > 1) { dbg_hid("alternative usage ignored\n"); return 0; } parser->local.usage_minimum = data; return 0; case HID_LOCAL_ITEM_TAG_USAGE_MAXIMUM: if (parser->local.delimiter_branch > 1) { dbg_hid("alternative usage ignored\n"); return 0; } count = data - parser->local.usage_minimum; if (count + parser->local.usage_index >= HID_MAX_USAGES) { /* * We do not warn if the name is not set, we are * actually pre-scanning the device. */ if (dev_name(&parser->device->dev)) hid_warn(parser->device, "ignoring exceeding usage max\n"); data = HID_MAX_USAGES - parser->local.usage_index + parser->local.usage_minimum - 1; if (data <= 0) { hid_err(parser->device, "no more usage index available\n"); return -1; } } for (n = parser->local.usage_minimum; n <= data; n++) if (hid_add_usage(parser, n, item->size)) { dbg_hid("hid_add_usage failed\n"); return -1; } return 0; default: dbg_hid("unknown local item tag 0x%x\n", item->tag); return 0; } return 0; } /* * Concatenate Usage Pages into Usages where relevant: * As per specification, 6.2.2.8: "When the parser encounters a main item it * concatenates the last declared Usage Page with a Usage to form a complete * usage value." */ static void hid_concatenate_last_usage_page(struct hid_parser *parser) { int i; unsigned int usage_page; unsigned int current_page; if (!parser->local.usage_index) return; usage_page = parser->global.usage_page; /* * Concatenate usage page again only if last declared Usage Page * has not been already used in previous usages concatenation */ for (i = parser->local.usage_index - 1; i >= 0; i--) { if (parser->local.usage_size[i] > 2) /* Ignore extended usages */ continue; current_page = parser->local.usage[i] >> 16; if (current_page == usage_page) break; complete_usage(parser, i); } } /* * Process a main item. */ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item) { __u32 data; int ret; hid_concatenate_last_usage_page(parser); data = item_udata(item); switch (item->tag) { case HID_MAIN_ITEM_TAG_BEGIN_COLLECTION: ret = open_collection(parser, data & 0xff); break; case HID_MAIN_ITEM_TAG_END_COLLECTION: ret = close_collection(parser); break; case HID_MAIN_ITEM_TAG_INPUT: ret = hid_add_field(parser, HID_INPUT_REPORT, data); break; case HID_MAIN_ITEM_TAG_OUTPUT: ret = hid_add_field(parser, HID_OUTPUT_REPORT, data); break; case HID_MAIN_ITEM_TAG_FEATURE: ret = hid_add_field(parser, HID_FEATURE_REPORT, data); break; default: if (item->tag >= HID_MAIN_ITEM_TAG_RESERVED_MIN && item->tag <= HID_MAIN_ITEM_TAG_RESERVED_MAX) hid_warn(parser->device, "reserved main item tag 0x%x\n", item->tag); else hid_warn(parser->device, "unknown main item tag 0x%x\n", item->tag); ret = 0; } memset(&parser->local, 0, sizeof(parser->local)); /* Reset the local parser environment */ return ret; } /* * Process a reserved item. */ static int hid_parser_reserved(struct hid_parser *parser, struct hid_item *item) { dbg_hid("reserved item type, tag 0x%x\n", item->tag); return 0; } /* * Free a report and all registered fields. The field->usage and * field->value table's are allocated behind the field, so we need * only to free(field) itself. */ static void hid_free_report(struct hid_report *report) { unsigned n; kfree(report->field_entries); for (n = 0; n < report->maxfield; n++) kvfree(report->field[n]); kfree(report); } /* * Close report. This function returns the device * state to the point prior to hid_open_report(). */ static void hid_close_report(struct hid_device *device) { unsigned i, j; for (i = 0; i < HID_REPORT_TYPES; i++) { struct hid_report_enum *report_enum = device->report_enum + i; for (j = 0; j < HID_MAX_IDS; j++) { struct hid_report *report = report_enum->report_id_hash[j]; if (report) hid_free_report(report); } memset(report_enum, 0, sizeof(*report_enum)); INIT_LIST_HEAD(&report_enum->report_list); } /* * If the HID driver had a rdesc_fixup() callback, dev->rdesc * will be allocated by hid-core and needs to be freed. * Otherwise, it is either equal to dev_rdesc or bpf_rdesc, in * which cases it'll be freed later on device removal or destroy. */ if (device->rdesc != device->dev_rdesc && device->rdesc != device->bpf_rdesc) kfree(device->rdesc); device->rdesc = NULL; device->rsize = 0; kfree(device->collection); device->collection = NULL; device->collection_size = 0; device->maxcollection = 0; device->maxapplication = 0; device->status &= ~HID_STAT_PARSED; } static inline void hid_free_bpf_rdesc(struct hid_device *hdev) { /* bpf_rdesc is either equal to dev_rdesc or allocated by call_hid_bpf_rdesc_fixup() */ if (hdev->bpf_rdesc != hdev->dev_rdesc) kfree(hdev->bpf_rdesc); hdev->bpf_rdesc = NULL; } /* * Free a device structure, all reports, and all fields. */ void hiddev_free(struct kref *ref) { struct hid_device *hid = container_of(ref, struct hid_device, ref); hid_close_report(hid); hid_free_bpf_rdesc(hid); kfree(hid->dev_rdesc); kfree(hid); } static void hid_device_release(struct device *dev) { struct hid_device *hid = to_hid_device(dev); kref_put(&hid->ref, hiddev_free); } /* * Fetch a report description item from the data stream. We support long * items, though they are not used yet. */ static const u8 *fetch_item(const __u8 *start, const __u8 *end, struct hid_item *item) { u8 b; if ((end - start) <= 0) return NULL; b = *start++; item->type = (b >> 2) & 3; item->tag = (b >> 4) & 15; if (item->tag == HID_ITEM_TAG_LONG) { item->format = HID_ITEM_FORMAT_LONG; if ((end - start) < 2) return NULL; item->size = *start++; item->tag = *start++; if ((end - start) < item->size) return NULL; item->data.longdata = start; start += item->size; return start; } item->format = HID_ITEM_FORMAT_SHORT; item->size = BIT(b & 3) >> 1; /* 0, 1, 2, 3 -> 0, 1, 2, 4 */ if (end - start < item->size) return NULL; switch (item->size) { case 0: break; case 1: item->data.u8 = *start; break; case 2: item->data.u16 = get_unaligned_le16(start); break; case 4: item->data.u32 = get_unaligned_le32(start); break; } return start + item->size; } static void hid_scan_input_usage(struct hid_parser *parser, u32 usage) { struct hid_device *hid = parser->device; if (usage == HID_DG_CONTACTID) hid->group = HID_GROUP_MULTITOUCH; } static void hid_scan_feature_usage(struct hid_parser *parser, u32 usage) { if (usage == 0xff0000c5 && parser->global.report_count == 256 && parser->global.report_size == 8) parser->scan_flags |= HID_SCAN_FLAG_MT_WIN_8; if (usage == 0xff0000c6 && parser->global.report_count == 1 && parser->global.report_size == 8) parser->scan_flags |= HID_SCAN_FLAG_MT_WIN_8; } static void hid_scan_collection(struct hid_parser *parser, unsigned type) { struct hid_device *hid = parser->device; int i; if (((parser->global.usage_page << 16) == HID_UP_SENSOR) && (type == HID_COLLECTION_PHYSICAL || type == HID_COLLECTION_APPLICATION)) hid->group = HID_GROUP_SENSOR_HUB; if (hid->vendor == USB_VENDOR_ID_MICROSOFT && hid->product == USB_DEVICE_ID_MS_POWER_COVER && hid->group == HID_GROUP_MULTITOUCH) hid->group = HID_GROUP_GENERIC; if ((parser->global.usage_page << 16) == HID_UP_GENDESK) for (i = 0; i < parser->local.usage_index; i++) if (parser->local.usage[i] == HID_GD_POINTER) parser->scan_flags |= HID_SCAN_FLAG_GD_POINTER; if ((parser->global.usage_page << 16) >= HID_UP_MSVENDOR) parser->scan_flags |= HID_SCAN_FLAG_VENDOR_SPECIFIC; if ((parser->global.usage_page << 16) == HID_UP_GOOGLEVENDOR) for (i = 0; i < parser->local.usage_index; i++) if (parser->local.usage[i] == (HID_UP_GOOGLEVENDOR | 0x0001)) parser->device->group = HID_GROUP_VIVALDI; } static int hid_scan_main(struct hid_parser *parser, struct hid_item *item) { __u32 data; int i; hid_concatenate_last_usage_page(parser); data = item_udata(item); switch (item->tag) { case HID_MAIN_ITEM_TAG_BEGIN_COLLECTION: hid_scan_collection(parser, data & 0xff); break; case HID_MAIN_ITEM_TAG_END_COLLECTION: break; case HID_MAIN_ITEM_TAG_INPUT: /* ignore constant inputs, they will be ignored by hid-input */ if (data & HID_MAIN_ITEM_CONSTANT) break; for (i = 0; i < parser->local.usage_index; i++) hid_scan_input_usage(parser, parser->local.usage[i]); break; case HID_MAIN_ITEM_TAG_OUTPUT: break; case HID_MAIN_ITEM_TAG_FEATURE: for (i = 0; i < parser->local.usage_index; i++) hid_scan_feature_usage(parser, parser->local.usage[i]); break; } /* Reset the local parser environment */ memset(&parser->local, 0, sizeof(parser->local)); return 0; } /* * Scan a report descriptor before the device is added to the bus. * Sets device groups and other properties that determine what driver * to load. */ static int hid_scan_report(struct hid_device *hid) { struct hid_parser *parser; struct hid_item item; const __u8 *start = hid->dev_rdesc; const __u8 *end = start + hid->dev_rsize; static int (*dispatch_type[])(struct hid_parser *parser, struct hid_item *item) = { hid_scan_main, hid_parser_global, hid_parser_local, hid_parser_reserved }; parser = vzalloc(sizeof(struct hid_parser)); if (!parser) return -ENOMEM; parser->device = hid; hid->group = HID_GROUP_GENERIC; /* * The parsing is simpler than the one in hid_open_report() as we should * be robust against hid errors. Those errors will be raised by * hid_open_report() anyway. */ while ((start = fetch_item(start, end, &item)) != NULL) dispatch_type[item.type](parser, &item); /* * Handle special flags set during scanning. */ if ((parser->scan_flags & HID_SCAN_FLAG_MT_WIN_8) && (hid->group == HID_GROUP_MULTITOUCH)) hid->group = HID_GROUP_MULTITOUCH_WIN_8; /* * Vendor specific handlings */ switch (hid->vendor) { case USB_VENDOR_ID_WACOM: hid->group = HID_GROUP_WACOM; break; case USB_VENDOR_ID_SYNAPTICS: if (hid->group == HID_GROUP_GENERIC) if ((parser->scan_flags & HID_SCAN_FLAG_VENDOR_SPECIFIC) && (parser->scan_flags & HID_SCAN_FLAG_GD_POINTER)) /* * hid-rmi should take care of them, * not hid-generic */ hid->group = HID_GROUP_RMI; break; } kfree(parser->collection_stack); vfree(parser); return 0; } /** * hid_parse_report - parse device report * * @hid: hid device * @start: report start * @size: report size * * Allocate the device report as read by the bus driver. This function should * only be called from parse() in ll drivers. */ int hid_parse_report(struct hid_device *hid, const __u8 *start, unsigned size) { hid->dev_rdesc = kmemdup(start, size, GFP_KERNEL); if (!hid->dev_rdesc) return -ENOMEM; hid->dev_rsize = size; return 0; } EXPORT_SYMBOL_GPL(hid_parse_report); static const char * const hid_report_names[] = { "HID_INPUT_REPORT", "HID_OUTPUT_REPORT", "HID_FEATURE_REPORT", }; /** * hid_validate_values - validate existing device report's value indexes * * @hid: hid device * @type: which report type to examine * @id: which report ID to examine (0 for first) * @field_index: which report field to examine * @report_counts: expected number of values * * Validate the number of values in a given field of a given report, after * parsing. */ struct hid_report *hid_validate_values(struct hid_device *hid, enum hid_report_type type, unsigned int id, unsigned int field_index, unsigned int report_counts) { struct hid_report *report; if (type > HID_FEATURE_REPORT) { hid_err(hid, "invalid HID report type %u\n", type); return NULL; } if (id >= HID_MAX_IDS) { hid_err(hid, "invalid HID report id %u\n", id); return NULL; } /* * Explicitly not using hid_get_report() here since it depends on * ->numbered being checked, which may not always be the case when * drivers go to access report values. */ if (id == 0) { /* * Validating on id 0 means we should examine the first * report in the list. */ report = list_first_entry_or_null( &hid->report_enum[type].report_list, struct hid_report, list); } else { report = hid->report_enum[type].report_id_hash[id]; } if (!report) { hid_err(hid, "missing %s %u\n", hid_report_names[type], id); return NULL; } if (report->maxfield <= field_index) { hid_err(hid, "not enough fields in %s %u\n", hid_report_names[type], id); return NULL; } if (report->field[field_index]->report_count < report_counts) { hid_err(hid, "not enough values in %s %u field %u\n", hid_report_names[type], id, field_index); return NULL; } return report; } EXPORT_SYMBOL_GPL(hid_validate_values); static int hid_calculate_multiplier(struct hid_device *hid, struct hid_field *multiplier) { int m; __s32 v = *multiplier->value; __s32 lmin = multiplier->logical_minimum; __s32 lmax = multiplier->logical_maximum; __s32 pmin = multiplier->physical_minimum; __s32 pmax = multiplier->physical_maximum; /* * "Because OS implementations will generally divide the control's * reported count by the Effective Resolution Multiplier, designers * should take care not to establish a potential Effective * Resolution Multiplier of zero." * HID Usage Table, v1.12, Section 4.3.1, p31 */ if (lmax - lmin == 0) return 1; /* * Handling the unit exponent is left as an exercise to whoever * finds a device where that exponent is not 0. */ m = ((v - lmin)/(lmax - lmin) * (pmax - pmin) + pmin); if (unlikely(multiplier->unit_exponent != 0)) { hid_warn(hid, "unsupported Resolution Multiplier unit exponent %d\n", multiplier->unit_exponent); } /* There are no devices with an effective multiplier > 255 */ if (unlikely(m == 0 || m > 255 || m < -255)) { hid_warn(hid, "unsupported Resolution Multiplier %d\n", m); m = 1; } return m; } static void hid_apply_multiplier_to_field(struct hid_device *hid, struct hid_field *field, struct hid_collection *multiplier_collection, int effective_multiplier) { struct hid_collection *collection; struct hid_usage *usage; int i; /* * If multiplier_collection is NULL, the multiplier applies * to all fields in the report. * Otherwise, it is the Logical Collection the multiplier applies to * but our field may be in a subcollection of that collection. */ for (i = 0; i < field->maxusage; i++) { usage = &field->usage[i]; collection = &hid->collection[usage->collection_index]; while (collection->parent_idx != -1 && collection != multiplier_collection) collection = &hid->collection[collection->parent_idx]; if (collection->parent_idx != -1 || multiplier_collection == NULL) usage->resolution_multiplier = effective_multiplier; } } static void hid_apply_multiplier(struct hid_device *hid, struct hid_field *multiplier) { struct hid_report_enum *rep_enum; struct hid_report *rep; struct hid_field *field; struct hid_collection *multiplier_collection; int effective_multiplier; int i; /* * "The Resolution Multiplier control must be contained in the same * Logical Collection as the control(s) to which it is to be applied. * If no Resolution Multiplier is defined, then the Resolution * Multiplier defaults to 1. If more than one control exists in a * Logical Collection, the Resolution Multiplier is associated with * all controls in the collection. If no Logical Collection is * defined, the Resolution Multiplier is associated with all * controls in the report." * HID Usage Table, v1.12, Section 4.3.1, p30 * * Thus, search from the current collection upwards until we find a * logical collection. Then search all fields for that same parent * collection. Those are the fields the multiplier applies to. * * If we have more than one multiplier, it will overwrite the * applicable fields later. */ multiplier_collection = &hid->collection[multiplier->usage->collection_index]; while (multiplier_collection->parent_idx != -1 && multiplier_collection->type != HID_COLLECTION_LOGICAL) multiplier_collection = &hid->collection[multiplier_collection->parent_idx]; if (multiplier_collection->type != HID_COLLECTION_LOGICAL) multiplier_collection = NULL; effective_multiplier = hid_calculate_multiplier(hid, multiplier); rep_enum = &hid->report_enum[HID_INPUT_REPORT]; list_for_each_entry(rep, &rep_enum->report_list, list) { for (i = 0; i < rep->maxfield; i++) { field = rep->field[i]; hid_apply_multiplier_to_field(hid, field, multiplier_collection, effective_multiplier); } } } /* * hid_setup_resolution_multiplier - set up all resolution multipliers * * @device: hid device * * Search for all Resolution Multiplier Feature Reports and apply their * value to all matching Input items. This only updates the internal struct * fields. * * The Resolution Multiplier is applied by the hardware. If the multiplier * is anything other than 1, the hardware will send pre-multiplied events * so that the same physical interaction generates an accumulated * accumulated_value = value * * multiplier * This may be achieved by sending * - "value * multiplier" for each event, or * - "value" but "multiplier" times as frequently, or * - a combination of the above * The only guarantee is that the same physical interaction always generates * an accumulated 'value * multiplier'. * * This function must be called before any event processing and after * any SetRequest to the Resolution Multiplier. */ void hid_setup_resolution_multiplier(struct hid_device *hid) { struct hid_report_enum *rep_enum; struct hid_report *rep; struct hid_usage *usage; int i, j; rep_enum = &hid->report_enum[HID_FEATURE_REPORT]; list_for_each_entry(rep, &rep_enum->report_list, list) { for (i = 0; i < rep->maxfield; i++) { /* Ignore if report count is out of bounds. */ if (rep->field[i]->report_count < 1) continue; for (j = 0; j < rep->field[i]->maxusage; j++) { usage = &rep->field[i]->usage[j]; if (usage->hid == HID_GD_RESOLUTION_MULTIPLIER) hid_apply_multiplier(hid, rep->field[i]); } } } } EXPORT_SYMBOL_GPL(hid_setup_resolution_multiplier); /** * hid_open_report - open a driver-specific device report * * @device: hid device * * Parse a report description into a hid_device structure. Reports are * enumerated, fields are attached to these reports. * 0 returned on success, otherwise nonzero error value. * * This function (or the equivalent hid_parse() macro) should only be * called from probe() in drivers, before starting the device. */ int hid_open_report(struct hid_device *device) { struct hid_parser *parser; struct hid_item item; unsigned int size; const __u8 *start; const __u8 *end; const __u8 *next; int ret; int i; static int (*dispatch_type[])(struct hid_parser *parser, struct hid_item *item) = { hid_parser_main, hid_parser_global, hid_parser_local, hid_parser_reserved }; if (WARN_ON(device->status & HID_STAT_PARSED)) return -EBUSY; start = device->bpf_rdesc; if (WARN_ON(!start)) return -ENODEV; size = device->bpf_rsize; if (device->driver->report_fixup) { /* * device->driver->report_fixup() needs to work * on a copy of our report descriptor so it can * change it. */ __u8 *buf = kmemdup(start, size, GFP_KERNEL); if (buf == NULL) return -ENOMEM; start = device->driver->report_fixup(device, buf, &size); /* * The second kmemdup is required in case report_fixup() returns * a static read-only memory, but we have no idea if that memory * needs to be cleaned up or not at the end. */ start = kmemdup(start, size, GFP_KERNEL); kfree(buf); if (start == NULL) return -ENOMEM; } device->rdesc = start; device->rsize = size; parser = vzalloc(sizeof(struct hid_parser)); if (!parser) { ret = -ENOMEM; goto alloc_err; } parser->device = device; end = start + size; device->collection = kcalloc(HID_DEFAULT_NUM_COLLECTIONS, sizeof(struct hid_collection), GFP_KERNEL); if (!device->collection) { ret = -ENOMEM; goto err; } device->collection_size = HID_DEFAULT_NUM_COLLECTIONS; for (i = 0; i < HID_DEFAULT_NUM_COLLECTIONS; i++) device->collection[i].parent_idx = -1; ret = -EINVAL; while ((next = fetch_item(start, end, &item)) != NULL) { start = next; if (item.format != HID_ITEM_FORMAT_SHORT) { hid_err(device, "unexpected long global item\n"); goto err; } if (dispatch_type[item.type](parser, &item)) { hid_err(device, "item %u %u %u %u parsing failed\n", item.format, (unsigned)item.size, (unsigned)item.type, (unsigned)item.tag); goto err; } if (start == end) { if (parser->collection_stack_ptr) { hid_err(device, "unbalanced collection at end of report description\n"); goto err; } if (parser->local.delimiter_depth) { hid_err(device, "unbalanced delimiter at end of report description\n"); goto err; } /* * fetch initial values in case the device's * default multiplier isn't the recommended 1 */ hid_setup_resolution_multiplier(device); kfree(parser->collection_stack); vfree(parser); device->status |= HID_STAT_PARSED; return 0; } } hid_err(device, "item fetching failed at offset %u/%u\n", size - (unsigned int)(end - start), size); err: kfree(parser->collection_stack); alloc_err: vfree(parser); hid_close_report(device); return ret; } EXPORT_SYMBOL_GPL(hid_open_report); /* * Extract/implement a data field from/to a little endian report (bit array). * * Code sort-of follows HID spec: * http://www.usb.org/developers/hidpage/HID1_11.pdf * * While the USB HID spec allows unlimited length bit fields in "report * descriptors", most devices never use more than 16 bits. * One model of UPS is claimed to report "LINEV" as a 32-bit field. * Search linux-kernel and linux-usb-devel archives for "hid-core extract". */ static u32 __extract(u8 *report, unsigned offset, int n) { unsigned int idx = offset / 8; unsigned int bit_nr = 0; unsigned int bit_shift = offset % 8; int bits_to_copy = 8 - bit_shift; u32 value = 0; u32 mask = n < 32 ? (1U << n) - 1 : ~0U; while (n > 0) { value |= ((u32)report[idx] >> bit_shift) << bit_nr; n -= bits_to_copy; bit_nr += bits_to_copy; bits_to_copy = 8; bit_shift = 0; idx++; } return value & mask; } u32 hid_field_extract(const struct hid_device *hid, u8 *report, unsigned offset, unsigned n) { if (n > 32) { hid_warn_once(hid, "%s() called with n (%d) > 32! (%s)\n", __func__, n, current->comm); n = 32; } return __extract(report, offset, n); } EXPORT_SYMBOL_GPL(hid_field_extract); /* * "implement" : set bits in a little endian bit stream. * Same concepts as "extract" (see comments above). * The data mangled in the bit stream remains in little endian * order the whole time. It make more sense to talk about * endianness of register values by considering a register * a "cached" copy of the little endian bit stream. */ static void __implement(u8 *report, unsigned offset, int n, u32 value) { unsigned int idx = offset / 8; unsigned int bit_shift = offset % 8; int bits_to_set = 8 - bit_shift; while (n - bits_to_set >= 0) { report[idx] &= ~(0xff << bit_shift); report[idx] |= value << bit_shift; value >>= bits_to_set; n -= bits_to_set; bits_to_set = 8; bit_shift = 0; idx++; } /* last nibble */ if (n) { u8 bit_mask = ((1U << n) - 1); report[idx] &= ~(bit_mask << bit_shift); report[idx] |= value << bit_shift; } } static void implement(const struct hid_device *hid, u8 *report, unsigned offset, unsigned n, u32 value) { if (unlikely(n > 32)) { hid_warn(hid, "%s() called with n (%d) > 32! (%s)\n", __func__, n, current->comm); n = 32; } else if (n < 32) { u32 m = (1U << n) - 1; if (unlikely(value > m)) { hid_warn(hid, "%s() called with too large value %d (n: %d)! (%s)\n", __func__, value, n, current->comm); value &= m; } } __implement(report, offset, n, value); } /* * Search an array for a value. */ static int search(__s32 *array, __s32 value, unsigned n) { while (n--) { if (*array++ == value) return 0; } return -1; } /** * hid_match_report - check if driver's raw_event should be called * * @hid: hid device * @report: hid report to match against * * compare hid->driver->report_table->report_type to report->type */ static int hid_match_report(struct hid_device *hid, struct hid_report *report) { const struct hid_report_id *id = hid->driver->report_table; if (!id) /* NULL means all */ return 1; for (; id->report_type != HID_TERMINATOR; id++) if (id->report_type == HID_ANY_ID || id->report_type == report->type) return 1; return 0; } /** * hid_match_usage - check if driver's event should be called * * @hid: hid device * @usage: usage to match against * * compare hid->driver->usage_table->usage_{type,code} to * usage->usage_{type,code} */ static int hid_match_usage(struct hid_device *hid, struct hid_usage *usage) { const struct hid_usage_id *id = hid->driver->usage_table; if (!id) /* NULL means all */ return 1; for (; id->usage_type != HID_ANY_ID - 1; id++) if ((id->usage_hid == HID_ANY_ID || id->usage_hid == usage->hid) && (id->usage_type == HID_ANY_ID || id->usage_type == usage->type) && (id->usage_code == HID_ANY_ID || id->usage_code == usage->code)) return 1; return 0; } static void hid_process_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value, int interrupt) { struct hid_driver *hdrv = hid->driver; int ret; if (!list_empty(&hid->debug_list)) hid_dump_input(hid, usage, value); if (hdrv && hdrv->event && hid_match_usage(hid, usage)) { ret = hdrv->event(hid, field, usage, value); if (ret != 0) { if (ret < 0) hid_err(hid, "%s's event failed with %d\n", hdrv->name, ret); return; } } if (hid->claimed & HID_CLAIMED_INPUT) hidinput_hid_event(hid, field, usage, value); if (hid->claimed & HID_CLAIMED_HIDDEV && interrupt && hid->hiddev_hid_event) hid->hiddev_hid_event(hid, field, usage, value); } /* * Checks if the given value is valid within this field */ static inline int hid_array_value_is_valid(struct hid_field *field, __s32 value) { __s32 min = field->logical_minimum; /* * Value needs to be between logical min and max, and * (value - min) is used as an index in the usage array. * This array is of size field->maxusage */ return value >= min && value <= field->logical_maximum && value - min < field->maxusage; } /* * Fetch the field from the data. The field content is stored for next * report processing (we do differential reporting to the layer). */ static void hid_input_fetch_field(struct hid_device *hid, struct hid_field *field, __u8 *data) { unsigned n; unsigned count = field->report_count; unsigned offset = field->report_offset; unsigned size = field->report_size; __s32 min = field->logical_minimum; __s32 *value; value = field->new_value; memset(value, 0, count * sizeof(__s32)); field->ignored = false; for (n = 0; n < count; n++) { value[n] = min < 0 ? snto32(hid_field_extract(hid, data, offset + n * size, size), size) : hid_field_extract(hid, data, offset + n * size, size); /* Ignore report if ErrorRollOver */ if (!(field->flags & HID_MAIN_ITEM_VARIABLE) && hid_array_value_is_valid(field, value[n]) && field->usage[value[n] - min].hid == HID_UP_KEYBOARD + 1) { field->ignored = true; return; } } } /* * Process a received variable field. */ static void hid_input_var_field(struct hid_device *hid, struct hid_field *field, int interrupt) { unsigned int count = field->report_count; __s32 *value = field->new_value; unsigned int n; for (n = 0; n < count; n++) hid_process_event(hid, field, &field->usage[n], value[n], interrupt); memcpy(field->value, value, count * sizeof(__s32)); } /* * Process a received array field. The field content is stored for * next report processing (we do differential reporting to the layer). */ static void hid_input_array_field(struct hid_device *hid, struct hid_field *field, int interrupt) { unsigned int n; unsigned int count = field->report_count; __s32 min = field->logical_minimum; __s32 *value; value = field->new_value; /* ErrorRollOver */ if (field->ignored) return; for (n = 0; n < count; n++) { if (hid_array_value_is_valid(field, field->value[n]) && search(value, field->value[n], count)) hid_process_event(hid, field, &field->usage[field->value[n] - min], 0, interrupt); if (hid_array_value_is_valid(field, value[n]) && search(field->value, value[n], count)) hid_process_event(hid, field, &field->usage[value[n] - min], 1, interrupt); } memcpy(field->value, value, count * sizeof(__s32)); } /* * Analyse a received report, and fetch the data from it. The field * content is stored for next report processing (we do differential * reporting to the layer). */ static void hid_process_report(struct hid_device *hid, struct hid_report *report, __u8 *data, int interrupt) { unsigned int a; struct hid_field_entry *entry; struct hid_field *field; /* first retrieve all incoming values in data */ for (a = 0; a < report->maxfield; a++) hid_input_fetch_field(hid, report->field[a], data); if (!list_empty(&report->field_entry_list)) { /* INPUT_REPORT, we have a priority list of fields */ list_for_each_entry(entry, &report->field_entry_list, list) { field = entry->field; if (field->flags & HID_MAIN_ITEM_VARIABLE) hid_process_event(hid, field, &field->usage[entry->index], field->new_value[entry->index], interrupt); else hid_input_array_field(hid, field, interrupt); } /* we need to do the memcpy at the end for var items */ for (a = 0; a < report->maxfield; a++) { field = report->field[a]; if (field->flags & HID_MAIN_ITEM_VARIABLE) memcpy(field->value, field->new_value, field->report_count * sizeof(__s32)); } } else { /* FEATURE_REPORT, regular processing */ for (a = 0; a < report->maxfield; a++) { field = report->field[a]; if (field->flags & HID_MAIN_ITEM_VARIABLE) hid_input_var_field(hid, field, interrupt); else hid_input_array_field(hid, field, interrupt); } } } /* * Insert a given usage_index in a field in the list * of processed usages in the report. * * The elements of lower priority score are processed * first. */ static void __hid_insert_field_entry(struct hid_device *hid, struct hid_report *report, struct hid_field_entry *entry, struct hid_field *field, unsigned int usage_index) { struct hid_field_entry *next; entry->field = field; entry->index = usage_index; entry->priority = field->usages_priorities[usage_index]; /* insert the element at the correct position */ list_for_each_entry(next, &report->field_entry_list, list) { /* * the priority of our element is strictly higher * than the next one, insert it before */ if (entry->priority > next->priority) { list_add_tail(&entry->list, &next->list); return; } } /* lowest priority score: insert at the end */ list_add_tail(&entry->list, &report->field_entry_list); } static void hid_report_process_ordering(struct hid_device *hid, struct hid_report *report) { struct hid_field *field; struct hid_field_entry *entries; unsigned int a, u, usages; unsigned int count = 0; /* count the number of individual fields in the report */ for (a = 0; a < report->maxfield; a++) { field = report->field[a]; if (field->flags & HID_MAIN_ITEM_VARIABLE) count += field->report_count; else count++; } /* allocate the memory to process the fields */ entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); if (!entries) return; report->field_entries = entries; /* * walk through all fields in the report and * store them by priority order in report->field_entry_list * * - Var elements are individualized (field + usage_index) * - Arrays are taken as one, we can not chose an order for them */ usages = 0; for (a = 0; a < report->maxfield; a++) { field = report->field[a]; if (field->flags & HID_MAIN_ITEM_VARIABLE) { for (u = 0; u < field->report_count; u++) { __hid_insert_field_entry(hid, report, &entries[usages], field, u); usages++; } } else { __hid_insert_field_entry(hid, report, &entries[usages], field, 0); usages++; } } } static void hid_process_ordering(struct hid_device *hid) { struct hid_report *report; struct hid_report_enum *report_enum = &hid->report_enum[HID_INPUT_REPORT]; list_for_each_entry(report, &report_enum->report_list, list) hid_report_process_ordering(hid, report); } /* * Output the field into the report. */ static void hid_output_field(const struct hid_device *hid, struct hid_field *field, __u8 *data) { unsigned count = field->report_count; unsigned offset = field->report_offset; unsigned size = field->report_size; unsigned n; for (n = 0; n < count; n++) { if (field->logical_minimum < 0) /* signed values */ implement(hid, data, offset + n * size, size, s32ton(field->value[n], size)); else /* unsigned values */ implement(hid, data, offset + n * size, size, field->value[n]); } } /* * Compute the size of a report. */ static size_t hid_compute_report_size(struct hid_report *report) { if (report->size) return ((report->size - 1) >> 3) + 1; return 0; } /* * Create a report. 'data' has to be allocated using * hid_alloc_report_buf() so that it has proper size. */ void hid_output_report(struct hid_report *report, __u8 *data) { unsigned n; if (report->id > 0) *data++ = report->id; memset(data, 0, hid_compute_report_size(report)); for (n = 0; n < report->maxfield; n++) hid_output_field(report->device, report->field[n], data); } EXPORT_SYMBOL_GPL(hid_output_report); /* * Allocator for buffer that is going to be passed to hid_output_report() */ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags) { /* * 7 extra bytes are necessary to achieve proper functionality * of implement() working on 8 byte chunks */ u32 len = hid_report_len(report) + 7; return kzalloc(len, flags); } EXPORT_SYMBOL_GPL(hid_alloc_report_buf); /* * Set a field value. The report this field belongs to has to be * created and transferred to the device, to set this value in the * device. */ int hid_set_field(struct hid_field *field, unsigned offset, __s32 value) { unsigned size; if (!field) return -1; size = field->report_size; hid_dump_input(field->report->device, field->usage + offset, value); if (offset >= field->report_count) { hid_err(field->report->device, "offset (%d) exceeds report_count (%d)\n", offset, field->report_count); return -1; } if (field->logical_minimum < 0) { if (value != snto32(s32ton(value, size), size)) { hid_err(field->report->device, "value %d is out of range\n", value); return -1; } } field->value[offset] = value; return 0; } EXPORT_SYMBOL_GPL(hid_set_field); struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type, unsigned int application, unsigned int usage) { struct list_head *report_list = &hdev->report_enum[report_type].report_list; struct hid_report *report; int i, j; list_for_each_entry(report, report_list, list) { if (report->application != application) continue; for (i = 0; i < report->maxfield; i++) { struct hid_field *field = report->field[i]; for (j = 0; j < field->maxusage; j++) { if (field->usage[j].hid == usage) return field; } } } return NULL; } EXPORT_SYMBOL_GPL(hid_find_field); static struct hid_report *hid_get_report(struct hid_report_enum *report_enum, const u8 *data) { struct hid_report *report; unsigned int n = 0; /* Normally report number is 0 */ /* Device uses numbered reports, data[0] is report number */ if (report_enum->numbered) n = *data; report = report_enum->report_id_hash[n]; if (report == NULL) dbg_hid("undefined report_id %u received\n", n); return report; } /* * Implement a generic .request() callback, using .raw_request() * DO NOT USE in hid drivers directly, but through hid_hw_request instead. */ int __hid_request(struct hid_device *hid, struct hid_report *report, enum hid_class_request reqtype) { char *buf; int ret; u32 len; buf = hid_alloc_report_buf(report, GFP_KERNEL); if (!buf) return -ENOMEM; len = hid_report_len(report); if (reqtype == HID_REQ_SET_REPORT) hid_output_report(report, buf); ret = hid->ll_driver->raw_request(hid, report->id, buf, len, report->type, reqtype); if (ret < 0) { dbg_hid("unable to complete request: %d\n", ret); goto out; } if (reqtype == HID_REQ_GET_REPORT) hid_input_report(hid, report->type, buf, ret, 0); ret = 0; out: kfree(buf); return ret; } EXPORT_SYMBOL_GPL(__hid_request); int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, int interrupt) { struct hid_report_enum *report_enum = hid->report_enum + type; struct hid_report *report; struct hid_driver *hdrv; int max_buffer_size = HID_MAX_BUFFER_SIZE; u32 rsize, csize = size; u8 *cdata = data; int ret = 0; report = hid_get_report(report_enum, data); if (!report) goto out; if (report_enum->numbered) { cdata++; csize--; } rsize = hid_compute_report_size(report); if (hid->ll_driver->max_buffer_size) max_buffer_size = hid->ll_driver->max_buffer_size; if (report_enum->numbered && rsize >= max_buffer_size) rsize = max_buffer_size - 1; else if (rsize > max_buffer_size) rsize = max_buffer_size; if (csize < rsize) { dbg_hid("report %d is too short, (%d < %d)\n", report->id, csize, rsize); memset(cdata + csize, 0, rsize - csize); } if ((hid->claimed & HID_CLAIMED_HIDDEV) && hid->hiddev_report_event) hid->hiddev_report_event(hid, report); if (hid->claimed & HID_CLAIMED_HIDRAW) { ret = hidraw_report_event(hid, data, size); if (ret) goto out; } if (hid->claimed != HID_CLAIMED_HIDRAW && report->maxfield) { hid_process_report(hid, report, cdata, interrupt); hdrv = hid->driver; if (hdrv && hdrv->report) hdrv->report(hid, report); } if (hid->claimed & HID_CLAIMED_INPUT) hidinput_report_event(hid, report); out: return ret; } EXPORT_SYMBOL_GPL(hid_report_raw_event); static int __hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, int interrupt, u64 source, bool from_bpf, bool lock_already_taken) { struct hid_report_enum *report_enum; struct hid_driver *hdrv; struct hid_report *report; int ret = 0; if (!hid) return -ENODEV; ret = down_trylock(&hid->driver_input_lock); if (lock_already_taken && !ret) { up(&hid->driver_input_lock); return -EINVAL; } else if (!lock_already_taken && ret) { return -EBUSY; } if (!hid->driver) { ret = -ENODEV; goto unlock; } report_enum = hid->report_enum + type; hdrv = hid->driver; data = dispatch_hid_bpf_device_event(hid, type, data, &size, interrupt, source, from_bpf); if (IS_ERR(data)) { ret = PTR_ERR(data); goto unlock; } if (!size) { dbg_hid("empty report\n"); ret = -1; goto unlock; } /* Avoid unnecessary overhead if debugfs is disabled */ if (!list_empty(&hid->debug_list)) hid_dump_report(hid, type, data, size); report = hid_get_report(report_enum, data); if (!report) { ret = -1; goto unlock; } if (hdrv && hdrv->raw_event && hid_match_report(hid, report)) { ret = hdrv->raw_event(hid, report, data, size); if (ret < 0) goto unlock; } ret = hid_report_raw_event(hid, type, data, size, interrupt); unlock: if (!lock_already_taken) up(&hid->driver_input_lock); return ret; } /** * hid_input_report - report data from lower layer (usb, bt...) * * @hid: hid device * @type: HID report type (HID_*_REPORT) * @data: report contents * @size: size of data parameter * @interrupt: distinguish between interrupt and control transfers * * This is data entry for lower layers. */ int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, int interrupt) { return __hid_input_report(hid, type, data, size, interrupt, 0, false, /* from_bpf */ false /* lock_already_taken */); } EXPORT_SYMBOL_GPL(hid_input_report); bool hid_match_one_id(const struct hid_device *hdev, const struct hid_device_id *id) { return (id->bus == HID_BUS_ANY || id->bus == hdev->bus) && (id->group == HID_GROUP_ANY || id->group == hdev->group) && (id->vendor == HID_ANY_ID || id->vendor == hdev->vendor) && (id->product == HID_ANY_ID || id->product == hdev->product); } const struct hid_device_id *hid_match_id(const struct hid_device *hdev, const struct hid_device_id *id) { for (; id->bus; id++) if (hid_match_one_id(hdev, id)) return id; return NULL; } EXPORT_SYMBOL_GPL(hid_match_id); static const struct hid_device_id hid_hiddev_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS) }, { HID_USB_DEVICE(USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS1) }, { } }; static bool hid_hiddev(struct hid_device *hdev) { return !!hid_match_id(hdev, hid_hiddev_list); } static ssize_t report_descriptor_read(struct file *filp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); struct hid_device *hdev = to_hid_device(dev); if (off >= hdev->rsize) return 0; if (off + count > hdev->rsize) count = hdev->rsize - off; memcpy(buf, hdev->rdesc + off, count); return count; } static ssize_t country_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); return sprintf(buf, "%02x\n", hdev->country & 0xff); } static const BIN_ATTR_RO(report_descriptor, HID_MAX_DESCRIPTOR_SIZE); static const DEVICE_ATTR_RO(country); int hid_connect(struct hid_device *hdev, unsigned int connect_mask) { static const char *types[] = { "Device", "Pointer", "Mouse", "Device", "Joystick", "Gamepad", "Keyboard", "Keypad", "Multi-Axis Controller" }; const char *type, *bus; char buf[64] = ""; unsigned int i; int len; int ret; ret = hid_bpf_connect_device(hdev); if (ret) return ret; if (hdev->quirks & HID_QUIRK_HIDDEV_FORCE) connect_mask |= (HID_CONNECT_HIDDEV_FORCE | HID_CONNECT_HIDDEV); if (hdev->quirks & HID_QUIRK_HIDINPUT_FORCE) connect_mask |= HID_CONNECT_HIDINPUT_FORCE; if (hdev->bus != BUS_USB) connect_mask &= ~HID_CONNECT_HIDDEV; if (hid_hiddev(hdev)) connect_mask |= HID_CONNECT_HIDDEV_FORCE; if ((connect_mask & HID_CONNECT_HIDINPUT) && !hidinput_connect(hdev, connect_mask & HID_CONNECT_HIDINPUT_FORCE)) hdev->claimed |= HID_CLAIMED_INPUT; if ((connect_mask & HID_CONNECT_HIDDEV) && hdev->hiddev_connect && !hdev->hiddev_connect(hdev, connect_mask & HID_CONNECT_HIDDEV_FORCE)) hdev->claimed |= HID_CLAIMED_HIDDEV; if ((connect_mask & HID_CONNECT_HIDRAW) && !hidraw_connect(hdev)) hdev->claimed |= HID_CLAIMED_HIDRAW; if (connect_mask & HID_CONNECT_DRIVER) hdev->claimed |= HID_CLAIMED_DRIVER; /* Drivers with the ->raw_event callback set are not required to connect * to any other listener. */ if (!hdev->claimed && !hdev->driver->raw_event) { hid_err(hdev, "device has no listeners, quitting\n"); return -ENODEV; } hid_process_ordering(hdev); if ((hdev->claimed & HID_CLAIMED_INPUT) && (connect_mask & HID_CONNECT_FF) && hdev->ff_init) hdev->ff_init(hdev); len = 0; if (hdev->claimed & HID_CLAIMED_INPUT) len += sprintf(buf + len, "input"); if (hdev->claimed & HID_CLAIMED_HIDDEV) len += sprintf(buf + len, "%shiddev%d", len ? "," : "", ((struct hiddev *)hdev->hiddev)->minor); if (hdev->claimed & HID_CLAIMED_HIDRAW) len += sprintf(buf + len, "%shidraw%d", len ? "," : "", ((struct hidraw *)hdev->hidraw)->minor); type = "Device"; for (i = 0; i < hdev->maxcollection; i++) { struct hid_collection *col = &hdev->collection[i]; if (col->type == HID_COLLECTION_APPLICATION && (col->usage & HID_USAGE_PAGE) == HID_UP_GENDESK && (col->usage & 0xffff) < ARRAY_SIZE(types)) { type = types[col->usage & 0xffff]; break; } } switch (hdev->bus) { case BUS_USB: bus = "USB"; break; case BUS_BLUETOOTH: bus = "BLUETOOTH"; break; case BUS_I2C: bus = "I2C"; break; case BUS_VIRTUAL: bus = "VIRTUAL"; break; case BUS_INTEL_ISHTP: case BUS_AMD_SFH: bus = "SENSOR HUB"; break; default: bus = "<UNKNOWN>"; } ret = device_create_file(&hdev->dev, &dev_attr_country); if (ret) hid_warn(hdev, "can't create sysfs country code attribute err: %d\n", ret); hid_info(hdev, "%s: %s HID v%x.%02x %s [%s] on %s\n", buf, bus, hdev->version >> 8, hdev->version & 0xff, type, hdev->name, hdev->phys); return 0; } EXPORT_SYMBOL_GPL(hid_connect); void hid_disconnect(struct hid_device *hdev) { device_remove_file(&hdev->dev, &dev_attr_country); if (hdev->claimed & HID_CLAIMED_INPUT) hidinput_disconnect(hdev); if (hdev->claimed & HID_CLAIMED_HIDDEV) hdev->hiddev_disconnect(hdev); if (hdev->claimed & HID_CLAIMED_HIDRAW) hidraw_disconnect(hdev); hdev->claimed = 0; hid_bpf_disconnect_device(hdev); } EXPORT_SYMBOL_GPL(hid_disconnect); /** * hid_hw_start - start underlying HW * @hdev: hid device * @connect_mask: which outputs to connect, see HID_CONNECT_* * * Call this in probe function *after* hid_parse. This will setup HW * buffers and start the device (if not defeirred to device open). * hid_hw_stop must be called if this was successful. */ int hid_hw_start(struct hid_device *hdev, unsigned int connect_mask) { int error; error = hdev->ll_driver->start(hdev); if (error) return error; if (connect_mask) { error = hid_connect(hdev, connect_mask); if (error) { hdev->ll_driver->stop(hdev); return error; } } return 0; } EXPORT_SYMBOL_GPL(hid_hw_start); /** * hid_hw_stop - stop underlying HW * @hdev: hid device * * This is usually called from remove function or from probe when something * failed and hid_hw_start was called already. */ void hid_hw_stop(struct hid_device *hdev) { hid_disconnect(hdev); hdev->ll_driver->stop(hdev); } EXPORT_SYMBOL_GPL(hid_hw_stop); /** * hid_hw_open - signal underlying HW to start delivering events * @hdev: hid device * * Tell underlying HW to start delivering events from the device. * This function should be called sometime after successful call * to hid_hw_start(). */ int hid_hw_open(struct hid_device *hdev) { int ret; ret = mutex_lock_killable(&hdev->ll_open_lock); if (ret) return ret; if (!hdev->ll_open_count++) { ret = hdev->ll_driver->open(hdev); if (ret) hdev->ll_open_count--; if (hdev->driver->on_hid_hw_open) hdev->driver->on_hid_hw_open(hdev); } mutex_unlock(&hdev->ll_open_lock); return ret; } EXPORT_SYMBOL_GPL(hid_hw_open); /** * hid_hw_close - signal underlaying HW to stop delivering events * * @hdev: hid device * * This function indicates that we are not interested in the events * from this device anymore. Delivery of events may or may not stop, * depending on the number of users still outstanding. */ void hid_hw_close(struct hid_device *hdev) { mutex_lock(&hdev->ll_open_lock); if (!--hdev->ll_open_count) { hdev->ll_driver->close(hdev); if (hdev->driver->on_hid_hw_close) hdev->driver->on_hid_hw_close(hdev); } mutex_unlock(&hdev->ll_open_lock); } EXPORT_SYMBOL_GPL(hid_hw_close); /** * hid_hw_request - send report request to device * * @hdev: hid device * @report: report to send * @reqtype: hid request type */ void hid_hw_request(struct hid_device *hdev, struct hid_report *report, enum hid_class_request reqtype) { if (hdev->ll_driver->request) return hdev->ll_driver->request(hdev, report, reqtype); __hid_request(hdev, report, reqtype); } EXPORT_SYMBOL_GPL(hid_hw_request); int __hid_hw_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, enum hid_report_type rtype, enum hid_class_request reqtype, u64 source, bool from_bpf) { unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE; int ret; if (hdev->ll_driver->max_buffer_size) max_buffer_size = hdev->ll_driver->max_buffer_size; if (len < 1 || len > max_buffer_size || !buf) return -EINVAL; ret = dispatch_hid_bpf_raw_requests(hdev, reportnum, buf, len, rtype, reqtype, source, from_bpf); if (ret) return ret; return hdev->ll_driver->raw_request(hdev, reportnum, buf, len, rtype, reqtype); } /** * hid_hw_raw_request - send report request to device * * @hdev: hid device * @reportnum: report ID * @buf: in/out data to transfer * @len: length of buf * @rtype: HID report type * @reqtype: HID_REQ_GET_REPORT or HID_REQ_SET_REPORT * * Return: count of data transferred, negative if error * * Same behavior as hid_hw_request, but with raw buffers instead. */ int hid_hw_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, enum hid_report_type rtype, enum hid_class_request reqtype) { return __hid_hw_raw_request(hdev, reportnum, buf, len, rtype, reqtype, 0, false); } EXPORT_SYMBOL_GPL(hid_hw_raw_request); int __hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len, u64 source, bool from_bpf) { unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE; int ret; if (hdev->ll_driver->max_buffer_size) max_buffer_size = hdev->ll_driver->max_buffer_size; if (len < 1 || len > max_buffer_size || !buf) return -EINVAL; ret = dispatch_hid_bpf_output_report(hdev, buf, len, source, from_bpf); if (ret) return ret; if (hdev->ll_driver->output_report) return hdev->ll_driver->output_report(hdev, buf, len); return -ENOSYS; } /** * hid_hw_output_report - send output report to device * * @hdev: hid device * @buf: raw data to transfer * @len: length of buf * * Return: count of data transferred, negative if error */ int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len) { return __hid_hw_output_report(hdev, buf, len, 0, false); } EXPORT_SYMBOL_GPL(hid_hw_output_report); #ifdef CONFIG_PM int hid_driver_suspend(struct hid_device *hdev, pm_message_t state) { if (hdev->driver && hdev->driver->suspend) return hdev->driver->suspend(hdev, state); return 0; } EXPORT_SYMBOL_GPL(hid_driver_suspend); int hid_driver_reset_resume(struct hid_device *hdev) { if (hdev->driver && hdev->driver->reset_resume) return hdev->driver->reset_resume(hdev); return 0; } EXPORT_SYMBOL_GPL(hid_driver_reset_resume); int hid_driver_resume(struct hid_device *hdev) { if (hdev->driver && hdev->driver->resume) return hdev->driver->resume(hdev); return 0; } EXPORT_SYMBOL_GPL(hid_driver_resume); #endif /* CONFIG_PM */ struct hid_dynid { struct list_head list; struct hid_device_id id; }; /** * new_id_store - add a new HID device ID to this driver and re-probe devices * @drv: target device driver * @buf: buffer for scanning device ID data * @count: input size * * Adds a new dynamic hid device ID to this driver, * and causes the driver to probe for all devices again. */ static ssize_t new_id_store(struct device_driver *drv, const char *buf, size_t count) { struct hid_driver *hdrv = to_hid_driver(drv); struct hid_dynid *dynid; __u32 bus, vendor, product; unsigned long driver_data = 0; int ret; ret = sscanf(buf, "%x %x %x %lx", &bus, &vendor, &product, &driver_data); if (ret < 3) return -EINVAL; dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); if (!dynid) return -ENOMEM; dynid->id.bus = bus; dynid->id.group = HID_GROUP_ANY; dynid->id.vendor = vendor; dynid->id.product = product; dynid->id.driver_data = driver_data; spin_lock(&hdrv->dyn_lock); list_add_tail(&dynid->list, &hdrv->dyn_list); spin_unlock(&hdrv->dyn_lock); ret = driver_attach(&hdrv->driver); return ret ? : count; } static DRIVER_ATTR_WO(new_id); static struct attribute *hid_drv_attrs[] = { &driver_attr_new_id.attr, NULL, }; ATTRIBUTE_GROUPS(hid_drv); static void hid_free_dynids(struct hid_driver *hdrv) { struct hid_dynid *dynid, *n; spin_lock(&hdrv->dyn_lock); list_for_each_entry_safe(dynid, n, &hdrv->dyn_list, list) { list_del(&dynid->list); kfree(dynid); } spin_unlock(&hdrv->dyn_lock); } const struct hid_device_id *hid_match_device(struct hid_device *hdev, struct hid_driver *hdrv) { struct hid_dynid *dynid; spin_lock(&hdrv->dyn_lock); list_for_each_entry(dynid, &hdrv->dyn_list, list) { if (hid_match_one_id(hdev, &dynid->id)) { spin_unlock(&hdrv->dyn_lock); return &dynid->id; } } spin_unlock(&hdrv->dyn_lock); return hid_match_id(hdev, hdrv->id_table); } EXPORT_SYMBOL_GPL(hid_match_device); static int hid_bus_match(struct device *dev, const struct device_driver *drv) { struct hid_driver *hdrv = to_hid_driver(drv); struct hid_device *hdev = to_hid_device(dev); return hid_match_device(hdev, hdrv) != NULL; } /** * hid_compare_device_paths - check if both devices share the same path * @hdev_a: hid device * @hdev_b: hid device * @separator: char to use as separator * * Check if two devices share the same path up to the last occurrence of * the separator char. Both paths must exist (i.e., zero-length paths * don't match). */ bool hid_compare_device_paths(struct hid_device *hdev_a, struct hid_device *hdev_b, char separator) { int n1 = strrchr(hdev_a->phys, separator) - hdev_a->phys; int n2 = strrchr(hdev_b->phys, separator) - hdev_b->phys; if (n1 != n2 || n1 <= 0 || n2 <= 0) return false; return !strncmp(hdev_a->phys, hdev_b->phys, n1); } EXPORT_SYMBOL_GPL(hid_compare_device_paths); static bool hid_check_device_match(struct hid_device *hdev, struct hid_driver *hdrv, const struct hid_device_id **id) { *id = hid_match_device(hdev, hdrv); if (!*id) return false; if (hdrv->match) return hdrv->match(hdev, hid_ignore_special_drivers); /* * hid-generic implements .match(), so we must be dealing with a * different HID driver here, and can simply check if * hid_ignore_special_drivers or HID_QUIRK_IGNORE_SPECIAL_DRIVER * are set or not. */ return !hid_ignore_special_drivers && !(hdev->quirks & HID_QUIRK_IGNORE_SPECIAL_DRIVER); } static int __hid_device_probe(struct hid_device *hdev, struct hid_driver *hdrv) { const struct hid_device_id *id; int ret; if (!hdev->bpf_rsize) { /* in case a bpf program gets detached, we need to free the old one */ hid_free_bpf_rdesc(hdev); /* keep this around so we know we called it once */ hdev->bpf_rsize = hdev->dev_rsize; /* call_hid_bpf_rdesc_fixup will always return a valid pointer */ hdev->bpf_rdesc = call_hid_bpf_rdesc_fixup(hdev, hdev->dev_rdesc, &hdev->bpf_rsize); } if (!hid_check_device_match(hdev, hdrv, &id)) return -ENODEV; hdev->devres_group_id = devres_open_group(&hdev->dev, NULL, GFP_KERNEL); if (!hdev->devres_group_id) return -ENOMEM; /* reset the quirks that has been previously set */ hdev->quirks = hid_lookup_quirk(hdev); hdev->driver = hdrv; if (hdrv->probe) { ret = hdrv->probe(hdev, id); } else { /* default probe */ ret = hid_open_report(hdev); if (!ret) ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); } /* * Note that we are not closing the devres group opened above so * even resources that were attached to the device after probe is * run are released when hid_device_remove() is executed. This is * needed as some drivers would allocate additional resources, * for example when updating firmware. */ if (ret) { devres_release_group(&hdev->dev, hdev->devres_group_id); hid_close_report(hdev); hdev->driver = NULL; } return ret; } static int hid_device_probe(struct device *dev) { struct hid_device *hdev = to_hid_device(dev); struct hid_driver *hdrv = to_hid_driver(dev->driver); int ret = 0; if (down_interruptible(&hdev->driver_input_lock)) return -EINTR; hdev->io_started = false; clear_bit(ffs(HID_STAT_REPROBED), &hdev->status); if (!hdev->driver) ret = __hid_device_probe(hdev, hdrv); if (!hdev->io_started) up(&hdev->driver_input_lock); return ret; } static void hid_device_remove(struct device *dev) { struct hid_device *hdev = to_hid_device(dev); struct hid_driver *hdrv; down(&hdev->driver_input_lock); hdev->io_started = false; hdrv = hdev->driver; if (hdrv) { if (hdrv->remove) hdrv->remove(hdev); else /* default remove */ hid_hw_stop(hdev); /* Release all devres resources allocated by the driver */ devres_release_group(&hdev->dev, hdev->devres_group_id); hid_close_report(hdev); hdev->driver = NULL; } if (!hdev->io_started) up(&hdev->driver_input_lock); } static ssize_t modalias_show(struct device *dev, struct device_attribute *a, char *buf) { struct hid_device *hdev = container_of(dev, struct hid_device, dev); return scnprintf(buf, PAGE_SIZE, "hid:b%04Xg%04Xv%08Xp%08X\n", hdev->bus, hdev->group, hdev->vendor, hdev->product); } static DEVICE_ATTR_RO(modalias); static struct attribute *hid_dev_attrs[] = { &dev_attr_modalias.attr, NULL, }; static const struct bin_attribute *hid_dev_bin_attrs[] = { &bin_attr_report_descriptor, NULL }; static const struct attribute_group hid_dev_group = { .attrs = hid_dev_attrs, .bin_attrs_new = hid_dev_bin_attrs, }; __ATTRIBUTE_GROUPS(hid_dev); static int hid_uevent(const struct device *dev, struct kobj_uevent_env *env) { const struct hid_device *hdev = to_hid_device(dev); if (add_uevent_var(env, "HID_ID=%04X:%08X:%08X", hdev->bus, hdev->vendor, hdev->product)) return -ENOMEM; if (add_uevent_var(env, "HID_NAME=%s", hdev->name)) return -ENOMEM; if (add_uevent_var(env, "HID_PHYS=%s", hdev->phys)) return -ENOMEM; if (add_uevent_var(env, "HID_UNIQ=%s", hdev->uniq)) return -ENOMEM; if (add_uevent_var(env, "MODALIAS=hid:b%04Xg%04Xv%08Xp%08X", hdev->bus, hdev->group, hdev->vendor, hdev->product)) return -ENOMEM; return 0; } const struct bus_type hid_bus_type = { .name = "hid", .dev_groups = hid_dev_groups, .drv_groups = hid_drv_groups, .match = hid_bus_match, .probe = hid_device_probe, .remove = hid_device_remove, .uevent = hid_uevent, }; EXPORT_SYMBOL(hid_bus_type); int hid_add_device(struct hid_device *hdev) { static atomic_t id = ATOMIC_INIT(0); int ret; if (WARN_ON(hdev->status & HID_STAT_ADDED)) return -EBUSY; hdev->quirks = hid_lookup_quirk(hdev); /* we need to kill them here, otherwise they will stay allocated to * wait for coming driver */ if (hid_ignore(hdev)) return -ENODEV; /* * Check for the mandatory transport channel. */ if (!hdev->ll_driver->raw_request) { hid_err(hdev, "transport driver missing .raw_request()\n"); return -EINVAL; } /* * Read the device report descriptor once and use as template * for the driver-specific modifications. */ ret = hdev->ll_driver->parse(hdev); if (ret) return ret; if (!hdev->dev_rdesc) return -ENODEV; /* * Scan generic devices for group information */ if (hid_ignore_special_drivers) { hdev->group = HID_GROUP_GENERIC; } else if (!hdev->group && !(hdev->quirks & HID_QUIRK_HAVE_SPECIAL_DRIVER)) { ret = hid_scan_report(hdev); if (ret) hid_warn(hdev, "bad device descriptor (%d)\n", ret); } hdev->id = atomic_inc_return(&id); /* XXX hack, any other cleaner solution after the driver core * is converted to allow more than 20 bytes as the device name? */ dev_set_name(&hdev->dev, "%04X:%04X:%04X.%04X", hdev->bus, hdev->vendor, hdev->product, hdev->id); hid_debug_register(hdev, dev_name(&hdev->dev)); ret = device_add(&hdev->dev); if (!ret) hdev->status |= HID_STAT_ADDED; else hid_debug_unregister(hdev); return ret; } EXPORT_SYMBOL_GPL(hid_add_device); /** * hid_allocate_device - allocate new hid device descriptor * * Allocate and initialize hid device, so that hid_destroy_device might be * used to free it. * * New hid_device pointer is returned on success, otherwise ERR_PTR encoded * error value. */ struct hid_device *hid_allocate_device(void) { struct hid_device *hdev; int ret = -ENOMEM; hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); if (hdev == NULL) return ERR_PTR(ret); device_initialize(&hdev->dev); hdev->dev.release = hid_device_release; hdev->dev.bus = &hid_bus_type; device_enable_async_suspend(&hdev->dev); hid_close_report(hdev); init_waitqueue_head(&hdev->debug_wait); INIT_LIST_HEAD(&hdev->debug_list); spin_lock_init(&hdev->debug_list_lock); sema_init(&hdev->driver_input_lock, 1); mutex_init(&hdev->ll_open_lock); kref_init(&hdev->ref); ret = hid_bpf_device_init(hdev); if (ret) goto out_err; return hdev; out_err: hid_destroy_device(hdev); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(hid_allocate_device); static void hid_remove_device(struct hid_device *hdev) { if (hdev->status & HID_STAT_ADDED) { device_del(&hdev->dev); hid_debug_unregister(hdev); hdev->status &= ~HID_STAT_ADDED; } hid_free_bpf_rdesc(hdev); kfree(hdev->dev_rdesc); hdev->dev_rdesc = NULL; hdev->dev_rsize = 0; hdev->bpf_rsize = 0; } /** * hid_destroy_device - free previously allocated device * * @hdev: hid device * * If you allocate hid_device through hid_allocate_device, you should ever * free by this function. */ void hid_destroy_device(struct hid_device *hdev) { hid_bpf_destroy_device(hdev); hid_remove_device(hdev); put_device(&hdev->dev); } EXPORT_SYMBOL_GPL(hid_destroy_device); static int __hid_bus_reprobe_drivers(struct device *dev, void *data) { struct hid_driver *hdrv = data; struct hid_device *hdev = to_hid_device(dev); if (hdev->driver == hdrv && !hdrv->match(hdev, hid_ignore_special_drivers) && !test_and_set_bit(ffs(HID_STAT_REPROBED), &hdev->status)) return device_reprobe(dev); return 0; } static int __hid_bus_driver_added(struct device_driver *drv, void *data) { struct hid_driver *hdrv = to_hid_driver(drv); if (hdrv->match) { bus_for_each_dev(&hid_bus_type, NULL, hdrv, __hid_bus_reprobe_drivers); } return 0; } static int __bus_removed_driver(struct device_driver *drv, void *data) { return bus_rescan_devices(&hid_bus_type); } int __hid_register_driver(struct hid_driver *hdrv, struct module *owner, const char *mod_name) { int ret; hdrv->driver.name = hdrv->name; hdrv->driver.bus = &hid_bus_type; hdrv->driver.owner = owner; hdrv->driver.mod_name = mod_name; INIT_LIST_HEAD(&hdrv->dyn_list); spin_lock_init(&hdrv->dyn_lock); ret = driver_register(&hdrv->driver); if (ret == 0) bus_for_each_drv(&hid_bus_type, NULL, NULL, __hid_bus_driver_added); return ret; } EXPORT_SYMBOL_GPL(__hid_register_driver); void hid_unregister_driver(struct hid_driver *hdrv) { driver_unregister(&hdrv->driver); hid_free_dynids(hdrv); bus_for_each_drv(&hid_bus_type, NULL, hdrv, __bus_removed_driver); } EXPORT_SYMBOL_GPL(hid_unregister_driver); int hid_check_keys_pressed(struct hid_device *hid) { struct hid_input *hidinput; int i; if (!(hid->claimed & HID_CLAIMED_INPUT)) return 0; list_for_each_entry(hidinput, &hid->inputs, list) { for (i = 0; i < BITS_TO_LONGS(KEY_MAX); i++) if (hidinput->input->key[i]) return 1; } return 0; } EXPORT_SYMBOL_GPL(hid_check_keys_pressed); #ifdef CONFIG_HID_BPF static const struct hid_ops __hid_ops = { .hid_get_report = hid_get_report, .hid_hw_raw_request = __hid_hw_raw_request, .hid_hw_output_report = __hid_hw_output_report, .hid_input_report = __hid_input_report, .owner = THIS_MODULE, .bus_type = &hid_bus_type, }; #endif static int __init hid_init(void) { int ret; ret = bus_register(&hid_bus_type); if (ret) { pr_err("can't register hid bus\n"); goto err; } #ifdef CONFIG_HID_BPF hid_ops = &__hid_ops; #endif ret = hidraw_init(); if (ret) goto err_bus; hid_debug_init(); return 0; err_bus: bus_unregister(&hid_bus_type); err: return ret; } static void __exit hid_exit(void) { #ifdef CONFIG_HID_BPF hid_ops = NULL; #endif hid_debug_exit(); hidraw_exit(); bus_unregister(&hid_bus_type); hid_quirks_exit(HID_BUS_ANY); } module_init(hid_init); module_exit(hid_exit); MODULE_AUTHOR("Andreas Gal"); MODULE_AUTHOR("Vojtech Pavlik"); MODULE_AUTHOR("Jiri Kosina"); MODULE_DESCRIPTION("HID support for Linux"); MODULE_LICENSE("GPL");
725 723 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 // SPDX-License-Identifier: GPL-2.0 /* * Device physical location support * * Author: Won Chung <wonchung@google.com> */ #include <linux/acpi.h> #include <linux/sysfs.h> #include <linux/string_choices.h> #include "physical_location.h" bool dev_add_physical_location(struct device *dev) { struct acpi_pld_info *pld; if (!has_acpi_companion(dev)) return false; if (!acpi_get_physical_device_location(ACPI_HANDLE(dev), &pld)) return false; dev->physical_location = kzalloc(sizeof(*dev->physical_location), GFP_KERNEL); if (!dev->physical_location) { ACPI_FREE(pld); return false; } dev->physical_location->panel = pld->panel; dev->physical_location->vertical_position = pld->vertical_position; dev->physical_location->horizontal_position = pld->horizontal_position; dev->physical_location->dock = pld->dock; dev->physical_location->lid = pld->lid; ACPI_FREE(pld); return true; } static ssize_t panel_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *panel; switch (dev->physical_location->panel) { case DEVICE_PANEL_TOP: panel = "top"; break; case DEVICE_PANEL_BOTTOM: panel = "bottom"; break; case DEVICE_PANEL_LEFT: panel = "left"; break; case DEVICE_PANEL_RIGHT: panel = "right"; break; case DEVICE_PANEL_FRONT: panel = "front"; break; case DEVICE_PANEL_BACK: panel = "back"; break; default: panel = "unknown"; } return sysfs_emit(buf, "%s\n", panel); } static DEVICE_ATTR_RO(panel); static ssize_t vertical_position_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *vertical_position; switch (dev->physical_location->vertical_position) { case DEVICE_VERT_POS_UPPER: vertical_position = "upper"; break; case DEVICE_VERT_POS_CENTER: vertical_position = "center"; break; case DEVICE_VERT_POS_LOWER: vertical_position = "lower"; break; default: vertical_position = "unknown"; } return sysfs_emit(buf, "%s\n", vertical_position); } static DEVICE_ATTR_RO(vertical_position); static ssize_t horizontal_position_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *horizontal_position; switch (dev->physical_location->horizontal_position) { case DEVICE_HORI_POS_LEFT: horizontal_position = "left"; break; case DEVICE_HORI_POS_CENTER: horizontal_position = "center"; break; case DEVICE_HORI_POS_RIGHT: horizontal_position = "right"; break; default: horizontal_position = "unknown"; } return sysfs_emit(buf, "%s\n", horizontal_position); } static DEVICE_ATTR_RO(horizontal_position); static ssize_t dock_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", str_yes_no(dev->physical_location->dock)); } static DEVICE_ATTR_RO(dock); static ssize_t lid_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", str_yes_no(dev->physical_location->lid)); } static DEVICE_ATTR_RO(lid); static struct attribute *dev_attr_physical_location[] = { &dev_attr_panel.attr, &dev_attr_vertical_position.attr, &dev_attr_horizontal_position.attr, &dev_attr_dock.attr, &dev_attr_lid.attr, NULL, }; const struct attribute_group dev_attr_physical_location_group = { .name = "physical_location", .attrs = dev_attr_physical_location, };
4 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 // SPDX-License-Identifier: GPL-2.0 /* * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. * * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] */ #include <linux/sysctl.h> #include <linux/seqlock.h> #include <linux/init.h> #include <linux/slab.h> #include <net/icmp.h> #include <net/ip.h> #include <net/ip_fib.h> #include <net/tcp.h> #include <net/udp.h> #include <net/cipso_ipv4.h> #include <net/ping.h> #include <net/protocol.h> #include <net/netevent.h> static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; static int tcp_app_win_max = 31; static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; static int tcp_min_snd_mss_max = 65535; static int tcp_rto_max_max = TCP_RTO_MAX_SEC * MSEC_PER_SEC; static int ip_privileged_port_min; static int ip_privileged_port_max = 65535; static int ip_ttl_min = 1; static int ip_ttl_max = 255; static int tcp_syn_retries_min = 1; static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int tcp_syn_linear_timeouts_max = MAX_TCP_SYNCNT; static unsigned long ip_ping_group_range_min[] = { 0, 0 }; static unsigned long ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static u32 u32_max_div_HZ = UINT_MAX / HZ; static int one_day_secs = 24 * 3600; static u32 fib_multipath_hash_fields_all_mask __maybe_unused = FIB_MULTIPATH_HASH_FIELD_ALL_MASK; static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024; static unsigned int udp_child_hash_entries_max = UDP_HTABLE_SIZE_MAX; static int tcp_plb_max_rounds = 31; static int tcp_plb_max_cong_thresh = 256; static unsigned int tcp_tw_reuse_delay_max = TCP_PAWS_MSL * MSEC_PER_SEC; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, unsigned int low, unsigned int high) { bool same_parity = !((low ^ high) & 1); if (same_parity && !net->ipv4.ip_local_ports.warned) { net->ipv4.ip_local_ports.warned = true; pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n"); } WRITE_ONCE(net->ipv4.ip_local_ports.range, high << 16 | low); } /* Validate changes from /proc interface. */ static int ipv4_local_port_range(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = table->data; int ret; int range[2]; struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, .extra1 = &ip_local_port_range_min, .extra2 = &ip_local_port_range_max, }; inet_get_local_port_range(net, &range[0], &range[1]); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { /* Ensure that the upper limit is not smaller than the lower, * and that the lower does not encroach upon the privileged * port limit. */ if ((range[1] < range[0]) || (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock))) ret = -EINVAL; else set_local_port_range(net, range[0], range[1]); } return ret; } /* Validate changes from /proc interface. */ static int ipv4_privileged_ports(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.sysctl_ip_prot_sock); int ret; int pports; int range[2]; struct ctl_table tmp = { .data = &pports, .maxlen = sizeof(pports), .mode = table->mode, .extra1 = &ip_privileged_port_min, .extra2 = &ip_privileged_port_max, }; pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { inet_get_local_port_range(net, &range[0], &range[1]); /* Ensure that the local port range doesn't overlap with the * privileged port range. */ if (range[0] < pports) ret = -EINVAL; else WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports); } return ret; } static void inet_get_ping_group_range_table(const struct ctl_table *table, kgid_t *low, kgid_t *high) { kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } /* Update system visible IP port range */ static void set_ping_group_range(const struct ctl_table *table, kgid_t low, kgid_t high) { kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); write_seqlock(&net->ipv4.ping_group_range.lock); data[0] = low; data[1] = high; write_sequnlock(&net->ipv4.ping_group_range.lock); } /* Validate changes from /proc interface. */ static int ipv4_ping_group_range(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct user_namespace *user_ns = current_user_ns(); int ret; unsigned long urange[2]; kgid_t low, high; struct ctl_table tmp = { .data = &urange, .maxlen = sizeof(urange), .mode = table->mode, .extra1 = &ip_ping_group_range_min, .extra2 = &ip_ping_group_range_max, }; inet_get_ping_group_range_table(table, &low, &high); urange[0] = from_kgid_munged(user_ns, low); urange[1] = from_kgid_munged(user_ns, high); ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { low = make_kgid(user_ns, urange[0]); high = make_kgid(user_ns, urange[1]); if (!gid_valid(low) || !gid_valid(high)) return -EINVAL; if (urange[1] < urange[0] || gid_lt(high, low)) { low = make_kgid(&init_user_ns, 1); high = make_kgid(&init_user_ns, 0); } set_ping_group_range(table, low, high); } return ret; } static int ipv4_fwd_update_priority(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net; int ret; net = container_of(table->data, struct net, ipv4.sysctl_ip_fwd_update_priority); ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) call_netevent_notifiers(NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE, net); return ret; } static int proc_tcp_congestion_control(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(ctl->data, struct net, ipv4.tcp_congestion_control); char val[TCP_CA_NAME_MAX]; struct ctl_table tbl = { .data = val, .maxlen = TCP_CA_NAME_MAX, }; int ret; tcp_get_default_congestion_control(net, val); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) ret = tcp_set_default_congestion_control(net, val); return ret; } static int proc_tcp_available_congestion_control(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); if (!tbl.data) return -ENOMEM; tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); kfree(tbl.data); return ret; } static int proc_allowed_congestion_control(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); if (!tbl.data) return -ENOMEM; tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) ret = tcp_set_allowed_congestion_control(tbl.data); kfree(tbl.data); return ret; } static int sscanf_key(char *buf, __le32 *key) { u32 user_key[4]; int i, ret = 0; if (sscanf(buf, "%x-%x-%x-%x", user_key, user_key + 1, user_key + 2, user_key + 3) != 4) { ret = -EINVAL; } else { for (i = 0; i < ARRAY_SIZE(user_key); i++) key[i] = cpu_to_le32(user_key[i]); } pr_debug("proc TFO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", user_key[0], user_key[1], user_key[2], user_key[3], buf, ret); return ret; } static int proc_tcp_fastopen_key(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.sysctl_tcp_fastopen); /* maxlen to print the list of keys in hex (*2), with dashes * separating doublewords and a comma in between keys. */ struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2 * TCP_FASTOPEN_KEY_MAX) + (TCP_FASTOPEN_KEY_MAX * 5)) }; u32 user_key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u32)]; __le32 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(__le32)]; char *backup_data; int ret, i = 0, off = 0, n_keys; tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); if (!tbl.data) return -ENOMEM; n_keys = tcp_fastopen_get_cipher(net, NULL, (u64 *)key); if (!n_keys) { memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH); n_keys = 1; } for (i = 0; i < n_keys * 4; i++) user_key[i] = le32_to_cpu(key[i]); for (i = 0; i < n_keys; i++) { off += snprintf(tbl.data + off, tbl.maxlen - off, "%08x-%08x-%08x-%08x", user_key[i * 4], user_key[i * 4 + 1], user_key[i * 4 + 2], user_key[i * 4 + 3]); if (WARN_ON_ONCE(off >= tbl.maxlen - 1)) break; if (i + 1 < n_keys) off += snprintf(tbl.data + off, tbl.maxlen - off, ","); } ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { backup_data = strchr(tbl.data, ','); if (backup_data) { *backup_data = '\0'; backup_data++; } if (sscanf_key(tbl.data, key)) { ret = -EINVAL; goto bad_key; } if (backup_data) { if (sscanf_key(backup_data, key + 4)) { ret = -EINVAL; goto bad_key; } } tcp_fastopen_reset_cipher(net, NULL, key, backup_data ? key + 4 : NULL); } bad_key: kfree(tbl.data); return ret; } static int proc_tfo_blackhole_detect_timeout(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.sysctl_tcp_fastopen_blackhole_timeout); int ret; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) atomic_set(&net->ipv4.tfo_active_disable_times, 0); return ret; } static int proc_tcp_available_ulp(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table tbl = { .maxlen = TCP_ULP_BUF_MAX, }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); if (!tbl.data) return -ENOMEM; tcp_get_available_ulp(tbl.data, TCP_ULP_BUF_MAX); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); kfree(tbl.data); return ret; } static int proc_tcp_ehash_entries(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.sysctl_tcp_child_ehash_entries); struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo; int tcp_ehash_entries; struct ctl_table tbl; tcp_ehash_entries = hinfo->ehash_mask + 1; /* A negative number indicates that the child netns * shares the global ehash. */ if (!net_eq(net, &init_net) && !hinfo->pernet) tcp_ehash_entries *= -1; memset(&tbl, 0, sizeof(tbl)); tbl.data = &tcp_ehash_entries; tbl.maxlen = sizeof(int); return proc_dointvec(&tbl, write, buffer, lenp, ppos); } static int proc_udp_hash_entries(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.sysctl_udp_child_hash_entries); int udp_hash_entries; struct ctl_table tbl; udp_hash_entries = net->ipv4.udp_table->mask + 1; /* A negative number indicates that the child netns * shares the global udp_table. */ if (!net_eq(net, &init_net) && net->ipv4.udp_table == &udp_table) udp_hash_entries *= -1; memset(&tbl, 0, sizeof(tbl)); tbl.data = &udp_hash_entries; tbl.maxlen = sizeof(int); return proc_dointvec(&tbl, write, buffer, lenp, ppos); } #ifdef CONFIG_IP_ROUTE_MULTIPATH static int proc_fib_multipath_hash_policy(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.sysctl_fib_multipath_hash_policy); int ret; ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); return ret; } static int proc_fib_multipath_hash_fields(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net; int ret; net = container_of(table->data, struct net, ipv4.sysctl_fib_multipath_hash_fields); ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); return ret; } static u32 proc_fib_multipath_hash_rand_seed __ro_after_init; static void proc_fib_multipath_hash_init_rand_seed(void) { get_random_bytes(&proc_fib_multipath_hash_rand_seed, sizeof(proc_fib_multipath_hash_rand_seed)); } static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed) { struct sysctl_fib_multipath_hash_seed new = { .user_seed = user_seed, .mp_seed = (user_seed ? user_seed : proc_fib_multipath_hash_rand_seed), }; WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new); } static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct sysctl_fib_multipath_hash_seed *mphs; struct net *net = table->data; struct ctl_table tmp; u32 user_seed; int ret; mphs = &net->ipv4.sysctl_fib_multipath_hash_seed; user_seed = mphs->user_seed; tmp = *table; tmp.data = &user_seed; ret = proc_douintvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { proc_fib_multipath_hash_set_seed(net, user_seed); call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); } return ret; } #else static void proc_fib_multipath_hash_init_rand_seed(void) { } static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed) { } #endif static struct ctl_table ipv4_table[] = { { .procname = "tcp_max_orphans", .data = &sysctl_tcp_max_orphans, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "inet_peer_minttl", .data = &inet_peer_minttl, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "inet_peer_maxttl", .data = &inet_peer_maxttl, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "tcp_mem", .maxlen = sizeof(sysctl_tcp_mem), .data = &sysctl_tcp_mem, .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, #ifdef CONFIG_NETLABEL { .procname = "cipso_cache_enable", .data = &cipso_v4_cache_enabled, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "cipso_cache_bucket_size", .data = &cipso_v4_cache_bucketsize, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "cipso_rbm_optfmt", .data = &cipso_v4_rbm_optfmt, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "cipso_rbm_strictvalid", .data = &cipso_v4_rbm_strictvalid, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, #endif /* CONFIG_NETLABEL */ { .procname = "tcp_available_ulp", .maxlen = TCP_ULP_BUF_MAX, .mode = 0444, .proc_handler = proc_tcp_available_ulp, }, { .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "fib_sync_mem", .data = &sysctl_fib_sync_mem, .maxlen = sizeof(sysctl_fib_sync_mem), .mode = 0644, .proc_handler = proc_douintvec_minmax, .extra1 = &sysctl_fib_sync_mem_min, .extra2 = &sysctl_fib_sync_mem_max, }, }; static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_max_tw_buckets", .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "icmp_echo_ignore_all", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, { .procname = "icmp_echo_enable_probe", .data = &init_net.ipv4.sysctl_icmp_echo_enable_probe, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, { .procname = "icmp_echo_ignore_broadcasts", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, { .procname = "icmp_ignore_bogus_error_responses", .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, { .procname = "icmp_errors_use_inbound_ifaddr", .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, { .procname = "icmp_ratelimit", .data = &init_net.ipv4.sysctl_icmp_ratelimit, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, { .procname = "icmp_ratemask", .data = &init_net.ipv4.sysctl_icmp_ratemask, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "icmp_msgs_per_sec", .data = &init_net.ipv4.sysctl_icmp_msgs_per_sec, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, { .procname = "icmp_msgs_burst", .data = &init_net.ipv4.sysctl_icmp_msgs_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, { .procname = "ping_group_range", .data = &init_net.ipv4.ping_group_range.range, .maxlen = sizeof(gid_t)*2, .mode = 0644, .proc_handler = ipv4_ping_group_range, }, #ifdef CONFIG_NET_L3_MASTER_DEV { .procname = "raw_l3mdev_accept", .data = &init_net.ipv4.sysctl_raw_l3mdev_accept, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, #endif { .procname = "tcp_ecn", .data = &init_net.ipv4.sysctl_tcp_ecn, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, { .procname = "tcp_ecn_fallback", .data = &init_net.ipv4.sysctl_tcp_ecn_fallback, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "ip_dynaddr", .data = &init_net.ipv4.sysctl_ip_dynaddr, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_early_demux", .data = &init_net.ipv4.sysctl_ip_early_demux, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "udp_early_demux", .data = &init_net.ipv4.sysctl_udp_early_demux, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_early_demux", .data = &init_net.ipv4.sysctl_tcp_early_demux, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "nexthop_compat_mode", .data = &init_net.ipv4.sysctl_nexthop_compat_mode, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "ip_default_ttl", .data = &init_net.ipv4.sysctl_ip_default_ttl, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = &ip_ttl_min, .extra2 = &ip_ttl_max, }, { .procname = "ip_local_port_range", .maxlen = 0, .data = &init_net, .mode = 0644, .proc_handler = ipv4_local_port_range, }, { .procname = "ip_local_reserved_ports", .data = &init_net.ipv4.sysctl_local_reserved_ports, .maxlen = 65536, .mode = 0644, .proc_handler = proc_do_large_bitmap, }, { .procname = "ip_no_pmtu_disc", .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_forward_use_pmtu", .data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_forward_update_priority", .data = &init_net.ipv4.sysctl_ip_fwd_update_priority, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = ipv4_fwd_update_priority, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "ip_nonlocal_bind", .data = &init_net.ipv4.sysctl_ip_nonlocal_bind, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_autobind_reuse", .data = &init_net.ipv4.sysctl_ip_autobind_reuse, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "fwmark_reflect", .data = &init_net.ipv4.sysctl_fwmark_reflect, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_fwmark_accept", .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, #ifdef CONFIG_NET_L3_MASTER_DEV { .procname = "tcp_l3mdev_accept", .data = &init_net.ipv4.sysctl_tcp_l3mdev_accept, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, #endif { .procname = "tcp_mtu_probing", .data = &init_net.ipv4.sysctl_tcp_mtu_probing, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_base_mss", .data = &init_net.ipv4.sysctl_tcp_base_mss, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "tcp_min_snd_mss", .data = &init_net.ipv4.sysctl_tcp_min_snd_mss, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &tcp_min_snd_mss_min, .extra2 = &tcp_min_snd_mss_max, }, { .procname = "tcp_mtu_probe_floor", .data = &init_net.ipv4.sysctl_tcp_mtu_probe_floor, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &tcp_min_snd_mss_min, .extra2 = &tcp_min_snd_mss_max, }, { .procname = "tcp_probe_threshold", .data = &init_net.ipv4.sysctl_tcp_probe_threshold, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "tcp_probe_interval", .data = &init_net.ipv4.sysctl_tcp_probe_interval, .maxlen = sizeof(u32), .mode = 0644, .proc_handler = proc_douintvec_minmax, .extra2 = &u32_max_div_HZ, }, { .procname = "igmp_link_local_mcast_reports", .data = &init_net.ipv4.sysctl_igmp_llm_reports, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "igmp_max_memberships", .data = &init_net.ipv4.sysctl_igmp_max_memberships, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "igmp_max_msf", .data = &init_net.ipv4.sysctl_igmp_max_msf, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, #ifdef CONFIG_IP_MULTICAST { .procname = "igmp_qrv", .data = &init_net.ipv4.sysctl_igmp_qrv, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE }, #endif { .procname = "tcp_congestion_control", .data = &init_net.ipv4.tcp_congestion_control, .mode = 0644, .maxlen = TCP_CA_NAME_MAX, .proc_handler = proc_tcp_congestion_control, }, { .procname = "tcp_available_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0444, .proc_handler = proc_tcp_available_congestion_control, }, { .procname = "tcp_allowed_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0644, .proc_handler = proc_allowed_congestion_control, }, { .procname = "tcp_keepalive_time", .data = &init_net.ipv4.sysctl_tcp_keepalive_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "tcp_keepalive_probes", .data = &init_net.ipv4.sysctl_tcp_keepalive_probes, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_keepalive_intvl", .data = &init_net.ipv4.sysctl_tcp_keepalive_intvl, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "tcp_syn_retries", .data = &init_net.ipv4.sysctl_tcp_syn_retries, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = &tcp_syn_retries_min, .extra2 = &tcp_syn_retries_max }, { .procname = "tcp_synack_retries", .data = &init_net.ipv4.sysctl_tcp_synack_retries, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, #ifdef CONFIG_SYN_COOKIES { .procname = "tcp_syncookies", .data = &init_net.ipv4.sysctl_tcp_syncookies, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, #endif { .procname = "tcp_migrate_req", .data = &init_net.ipv4.sysctl_tcp_migrate_req, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, { .procname = "tcp_reordering", .data = &init_net.ipv4.sysctl_tcp_reordering, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "tcp_retries1", .data = &init_net.ipv4.sysctl_tcp_retries1, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra2 = &tcp_retr1_max }, { .procname = "tcp_retries2", .data = &init_net.ipv4.sysctl_tcp_retries2, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_orphan_retries", .data = &init_net.ipv4.sysctl_tcp_orphan_retries, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_fin_timeout", .data = &init_net.ipv4.sysctl_tcp_fin_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "tcp_notsent_lowat", .data = &init_net.ipv4.sysctl_tcp_notsent_lowat, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_douintvec, }, { .procname = "tcp_tw_reuse", .data = &init_net.ipv4.sysctl_tcp_tw_reuse, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, { .procname = "tcp_tw_reuse_delay", .data = &init_net.ipv4.sysctl_tcp_tw_reuse_delay, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_douintvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &tcp_tw_reuse_delay_max, }, { .procname = "tcp_max_syn_backlog", .data = &init_net.ipv4.sysctl_max_syn_backlog, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "tcp_fastopen", .data = &init_net.ipv4.sysctl_tcp_fastopen, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "tcp_fastopen_key", .mode = 0600, .data = &init_net.ipv4.sysctl_tcp_fastopen, /* maxlen to print the list of keys in hex (*2), with dashes * separating doublewords and a comma in between keys. */ .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2 * TCP_FASTOPEN_KEY_MAX) + (TCP_FASTOPEN_KEY_MAX * 5)), .proc_handler = proc_tcp_fastopen_key, }, { .procname = "tcp_fastopen_blackhole_timeout_sec", .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_tfo_blackhole_detect_timeout, .extra1 = SYSCTL_ZERO, }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", .data = &init_net.ipv4.sysctl_fib_multipath_use_neigh, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "fib_multipath_hash_policy", .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_fib_multipath_hash_policy, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_THREE, }, { .procname = "fib_multipath_hash_fields", .data = &init_net.ipv4.sysctl_fib_multipath_hash_fields, .maxlen = sizeof(u32), .mode = 0644, .proc_handler = proc_fib_multipath_hash_fields, .extra1 = SYSCTL_ONE, .extra2 = &fib_multipath_hash_fields_all_mask, }, { .procname = "fib_multipath_hash_seed", .data = &init_net, .maxlen = sizeof(u32), .mode = 0644, .proc_handler = proc_fib_multipath_hash_seed, }, #endif { .procname = "ip_unprivileged_port_start", .maxlen = sizeof(int), .data = &init_net.ipv4.sysctl_ip_prot_sock, .mode = 0644, .proc_handler = ipv4_privileged_ports, }, #ifdef CONFIG_NET_L3_MASTER_DEV { .procname = "udp_l3mdev_accept", .data = &init_net.ipv4.sysctl_udp_l3mdev_accept, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, #endif { .procname = "tcp_sack", .data = &init_net.ipv4.sysctl_tcp_sack, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_window_scaling", .data = &init_net.ipv4.sysctl_tcp_window_scaling, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_timestamps", .data = &init_net.ipv4.sysctl_tcp_timestamps, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_early_retrans", .data = &init_net.ipv4.sysctl_tcp_early_retrans, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_FOUR, }, { .procname = "tcp_recovery", .data = &init_net.ipv4.sysctl_tcp_recovery, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_thin_linear_timeouts", .data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_slow_start_after_idle", .data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_retrans_collapse", .data = &init_net.ipv4.sysctl_tcp_retrans_collapse, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_stdurg", .data = &init_net.ipv4.sysctl_tcp_stdurg, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_rfc1337", .data = &init_net.ipv4.sysctl_tcp_rfc1337, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_abort_on_overflow", .data = &init_net.ipv4.sysctl_tcp_abort_on_overflow, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_fack", .data = &init_net.ipv4.sysctl_tcp_fack, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_max_reordering", .data = &init_net.ipv4.sysctl_tcp_max_reordering, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "tcp_dsack", .data = &init_net.ipv4.sysctl_tcp_dsack, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_app_win", .data = &init_net.ipv4.sysctl_tcp_app_win, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &tcp_app_win_max, }, { .procname = "tcp_adv_win_scale", .data = &init_net.ipv4.sysctl_tcp_adv_win_scale, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &tcp_adv_win_scale_min, .extra2 = &tcp_adv_win_scale_max, }, { .procname = "tcp_frto", .data = &init_net.ipv4.sysctl_tcp_frto, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_no_metrics_save", .data = &init_net.ipv4.sysctl_tcp_nometrics_save, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_no_ssthresh_metrics_save", .data = &init_net.ipv4.sysctl_tcp_no_ssthresh_metrics_save, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "tcp_moderate_rcvbuf", .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_tso_win_divisor", .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_workaround_signed_windows", .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_limit_output_bytes", .data = &init_net.ipv4.sysctl_tcp_limit_output_bytes, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "tcp_challenge_ack_limit", .data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "tcp_min_tso_segs", .data = &init_net.ipv4.sysctl_tcp_min_tso_segs, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ONE, }, { .procname = "tcp_tso_rtt_log", .data = &init_net.ipv4.sysctl_tcp_tso_rtt_log, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_min_rtt_wlen", .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &one_day_secs }, { .procname = "tcp_autocorking", .data = &init_net.ipv4.sysctl_tcp_autocorking, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "tcp_invalid_ratelimit", .data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, { .procname = "tcp_pacing_ss_ratio", .data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE_THOUSAND, }, { .procname = "tcp_pacing_ca_ratio", .data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE_THOUSAND, }, { .procname = "tcp_wmem", .data = &init_net.ipv4.sysctl_tcp_wmem, .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, }, { .procname = "tcp_rmem", .data = &init_net.ipv4.sysctl_tcp_rmem, .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, }, { .procname = "tcp_comp_sack_delay_ns", .data = &init_net.ipv4.sysctl_tcp_comp_sack_delay_ns, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "tcp_comp_sack_slack_ns", .data = &init_net.ipv4.sysctl_tcp_comp_sack_slack_ns, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "tcp_comp_sack_nr", .data = &init_net.ipv4.sysctl_tcp_comp_sack_nr, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, }, { .procname = "tcp_backlog_ack_defer", .data = &init_net.ipv4.sysctl_tcp_backlog_ack_defer, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "tcp_reflect_tos", .data = &init_net.ipv4.sysctl_tcp_reflect_tos, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "tcp_ehash_entries", .data = &init_net.ipv4.sysctl_tcp_child_ehash_entries, .mode = 0444, .proc_handler = proc_tcp_ehash_entries, }, { .procname = "tcp_child_ehash_entries", .data = &init_net.ipv4.sysctl_tcp_child_ehash_entries, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_douintvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &tcp_child_ehash_entries_max, }, { .procname = "udp_hash_entries", .data = &init_net.ipv4.sysctl_udp_child_hash_entries, .mode = 0444, .proc_handler = proc_udp_hash_entries, }, { .procname = "udp_child_hash_entries", .data = &init_net.ipv4.sysctl_udp_child_hash_entries, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_douintvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &udp_child_hash_entries_max, }, { .procname = "udp_rmem_min", .data = &init_net.ipv4.sysctl_udp_rmem_min, .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE }, { .procname = "udp_wmem_min", .data = &init_net.ipv4.sysctl_udp_wmem_min, .maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE }, { .procname = "fib_notify_on_flag_change", .data = &init_net.ipv4.sysctl_fib_notify_on_flag_change, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, { .procname = "tcp_plb_enabled", .data = &init_net.ipv4.sysctl_tcp_plb_enabled, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "tcp_plb_idle_rehash_rounds", .data = &init_net.ipv4.sysctl_tcp_plb_idle_rehash_rounds, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra2 = &tcp_plb_max_rounds, }, { .procname = "tcp_plb_rehash_rounds", .data = &init_net.ipv4.sysctl_tcp_plb_rehash_rounds, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra2 = &tcp_plb_max_rounds, }, { .procname = "tcp_plb_suspend_rto_sec", .data = &init_net.ipv4.sysctl_tcp_plb_suspend_rto_sec, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_plb_cong_thresh", .data = &init_net.ipv4.sysctl_tcp_plb_cong_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &tcp_plb_max_cong_thresh, }, { .procname = "tcp_syn_linear_timeouts", .data = &init_net.ipv4.sysctl_tcp_syn_linear_timeouts, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &tcp_syn_linear_timeouts_max, }, { .procname = "tcp_shrink_window", .data = &init_net.ipv4.sysctl_tcp_shrink_window, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "tcp_pingpong_thresh", .data = &init_net.ipv4.sysctl_tcp_pingpong_thresh, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ONE, }, { .procname = "tcp_rto_min_us", .data = &init_net.ipv4.sysctl_tcp_rto_min_us, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, }, { .procname = "tcp_rto_max_ms", .data = &init_net.ipv4.sysctl_tcp_rto_max_ms, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE_THOUSAND, .extra2 = &tcp_rto_max_max, }, }; static __net_init int ipv4_sysctl_init_net(struct net *net) { size_t table_size = ARRAY_SIZE(ipv4_net_table); struct ctl_table *table; table = ipv4_net_table; if (!net_eq(net, &init_net)) { int i; table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); if (!table) goto err_alloc; for (i = 0; i < table_size; i++) { if (table[i].data) { /* Update the variables to point into * the current struct net */ table[i].data += (void *)net - (void *)&init_net; } else { /* Entries without data pointer are global; * Make them read-only in non-init_net ns */ table[i].mode &= ~0222; } } } net->ipv4.ipv4_hdr = register_net_sysctl_sz(net, "net/ipv4", table, table_size); if (!net->ipv4.ipv4_hdr) goto err_reg; net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); if (!net->ipv4.sysctl_local_reserved_ports) goto err_ports; proc_fib_multipath_hash_set_seed(net, 0); return 0; err_ports: unregister_net_sysctl_table(net->ipv4.ipv4_hdr); err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static __net_exit void ipv4_sysctl_exit_net(struct net *net) { const struct ctl_table *table; kfree(net->ipv4.sysctl_local_reserved_ports); table = net->ipv4.ipv4_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.ipv4_hdr); kfree(table); } static __net_initdata struct pernet_operations ipv4_sysctl_ops = { .init = ipv4_sysctl_init_net, .exit = ipv4_sysctl_exit_net, }; static __init int sysctl_ipv4_init(void) { struct ctl_table_header *hdr; hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table); if (!hdr) return -ENOMEM; proc_fib_multipath_hash_init_rand_seed(); if (register_pernet_subsys(&ipv4_sysctl_ops)) { unregister_net_sysctl_table(hdr); return -ENOMEM; } return 0; } __initcall(sysctl_ipv4_init);
10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 // SPDX-License-Identifier: GPL-2.0 /* * Block stat tracking code * * Copyright (C) 2016 Jens Axboe */ #include <linux/kernel.h> #include <linux/rculist.h> #include "blk-stat.h" #include "blk-mq.h" #include "blk.h" struct blk_queue_stats { struct list_head callbacks; spinlock_t lock; int accounting; }; void blk_rq_stat_init(struct blk_rq_stat *stat) { stat->min = -1ULL; stat->max = stat->nr_samples = stat->mean = 0; stat->batch = 0; } /* src is a per-cpu stat, mean isn't initialized */ void blk_rq_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) { if (dst->nr_samples + src->nr_samples <= dst->nr_samples) return; dst->min = min(dst->min, src->min); dst->max = max(dst->max, src->max); dst->mean = div_u64(src->batch + dst->mean * dst->nr_samples, dst->nr_samples + src->nr_samples); dst->nr_samples += src->nr_samples; } void blk_rq_stat_add(struct blk_rq_stat *stat, u64 value) { stat->min = min(stat->min, value); stat->max = max(stat->max, value); stat->batch += value; stat->nr_samples++; } void blk_stat_add(struct request *rq, u64 now) { struct request_queue *q = rq->q; struct blk_stat_callback *cb; struct blk_rq_stat *stat; int bucket, cpu; u64 value; value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0; rcu_read_lock(); cpu = get_cpu(); list_for_each_entry_rcu(cb, &q->stats->callbacks, list) { if (!blk_stat_is_active(cb)) continue; bucket = cb->bucket_fn(rq); if (bucket < 0) continue; stat = &per_cpu_ptr(cb->cpu_stat, cpu)[bucket]; blk_rq_stat_add(stat, value); } put_cpu(); rcu_read_unlock(); } static void blk_stat_timer_fn(struct timer_list *t) { struct blk_stat_callback *cb = timer_container_of(cb, t, timer); unsigned int bucket; int cpu; for (bucket = 0; bucket < cb->buckets; bucket++) blk_rq_stat_init(&cb->stat[bucket]); for_each_online_cpu(cpu) { struct blk_rq_stat *cpu_stat; cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu); for (bucket = 0; bucket < cb->buckets; bucket++) { blk_rq_stat_sum(&cb->stat[bucket], &cpu_stat[bucket]); blk_rq_stat_init(&cpu_stat[bucket]); } } cb->timer_fn(cb); } struct blk_stat_callback * blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *), int (*bucket_fn)(const struct request *), unsigned int buckets, void *data) { struct blk_stat_callback *cb; cb = kmalloc(sizeof(*cb), GFP_KERNEL); if (!cb) return NULL; cb->stat = kmalloc_array(buckets, sizeof(struct blk_rq_stat), GFP_KERNEL); if (!cb->stat) { kfree(cb); return NULL; } cb->cpu_stat = __alloc_percpu(buckets * sizeof(struct blk_rq_stat), __alignof__(struct blk_rq_stat)); if (!cb->cpu_stat) { kfree(cb->stat); kfree(cb); return NULL; } cb->timer_fn = timer_fn; cb->bucket_fn = bucket_fn; cb->data = data; cb->buckets = buckets; timer_setup(&cb->timer, blk_stat_timer_fn, 0); return cb; } void blk_stat_add_callback(struct request_queue *q, struct blk_stat_callback *cb) { unsigned int bucket; unsigned long flags; int cpu; for_each_possible_cpu(cpu) { struct blk_rq_stat *cpu_stat; cpu_stat = per_cpu_ptr(cb->cpu_stat, cpu); for (bucket = 0; bucket < cb->buckets; bucket++) blk_rq_stat_init(&cpu_stat[bucket]); } spin_lock_irqsave(&q->stats->lock, flags); list_add_tail_rcu(&cb->list, &q->stats->callbacks); blk_queue_flag_set(QUEUE_FLAG_STATS, q); spin_unlock_irqrestore(&q->stats->lock, flags); } void blk_stat_remove_callback(struct request_queue *q, struct blk_stat_callback *cb) { unsigned long flags; spin_lock_irqsave(&q->stats->lock, flags); list_del_rcu(&cb->list); if (list_empty(&q->stats->callbacks) && !q->stats->accounting) blk_queue_flag_clear(QUEUE_FLAG_STATS, q); spin_unlock_irqrestore(&q->stats->lock, flags); timer_delete_sync(&cb->timer); } static void blk_stat_free_callback_rcu(struct rcu_head *head) { struct blk_stat_callback *cb; cb = container_of(head, struct blk_stat_callback, rcu); free_percpu(cb->cpu_stat); kfree(cb->stat); kfree(cb); } void blk_stat_free_callback(struct blk_stat_callback *cb) { if (cb) call_rcu(&cb->rcu, blk_stat_free_callback_rcu); } void blk_stat_disable_accounting(struct request_queue *q) { unsigned long flags; spin_lock_irqsave(&q->stats->lock, flags); if (!--q->stats->accounting && list_empty(&q->stats->callbacks)) blk_queue_flag_clear(QUEUE_FLAG_STATS, q); spin_unlock_irqrestore(&q->stats->lock, flags); } EXPORT_SYMBOL_GPL(blk_stat_disable_accounting); void blk_stat_enable_accounting(struct request_queue *q) { unsigned long flags; spin_lock_irqsave(&q->stats->lock, flags); if (!q->stats->accounting++ && list_empty(&q->stats->callbacks)) blk_queue_flag_set(QUEUE_FLAG_STATS, q); spin_unlock_irqrestore(&q->stats->lock, flags); } EXPORT_SYMBOL_GPL(blk_stat_enable_accounting); struct blk_queue_stats *blk_alloc_queue_stats(void) { struct blk_queue_stats *stats; stats = kmalloc(sizeof(*stats), GFP_KERNEL); if (!stats) return NULL; INIT_LIST_HEAD(&stats->callbacks); spin_lock_init(&stats->lock); stats->accounting = 0; return stats; } void blk_free_queue_stats(struct blk_queue_stats *stats) { if (!stats) return; WARN_ON(!list_empty(&stats->callbacks)); kfree(stats); }
87 87 87 87 87 6 20 81 14 81 50 50 50 49 1 50 49 8 8 8 1 8 58 7 50 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 // SPDX-License-Identifier: GPL-2.0 /* * Functions related to generic helpers functions */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/scatterlist.h> #include "blk.h" static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector) { unsigned int discard_granularity = bdev_discard_granularity(bdev); sector_t granularity_aligned_sector; if (bdev_is_partition(bdev)) sector += bdev->bd_start_sect; granularity_aligned_sector = round_up(sector, discard_granularity >> SECTOR_SHIFT); /* * Make sure subsequent bios start aligned to the discard granularity if * it needs to be split. */ if (granularity_aligned_sector != sector) return granularity_aligned_sector - sector; /* * Align the bio size to the discard granularity to make splitting the bio * at discard granularity boundaries easier in the driver if needed. */ return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT; } struct bio *blk_alloc_discard_bio(struct block_device *bdev, sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask) { sector_t bio_sects = min(*nr_sects, bio_discard_limit(bdev, *sector)); struct bio *bio; if (!bio_sects) return NULL; bio = bio_alloc(bdev, 0, REQ_OP_DISCARD, gfp_mask); if (!bio) return NULL; bio->bi_iter.bi_sector = *sector; bio->bi_iter.bi_size = bio_sects << SECTOR_SHIFT; *sector += bio_sects; *nr_sects -= bio_sects; /* * We can loop for a long time in here if someone does full device * discards (like mkfs). Be nice and allow us to schedule out to avoid * softlocking if preempt is disabled. */ cond_resched(); return bio; } int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop) { struct bio *bio; while ((bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects, gfp_mask))) *biop = bio_chain_and_submit(*biop, bio); return 0; } EXPORT_SYMBOL(__blkdev_issue_discard); /** * blkdev_issue_discard - queue a discard * @bdev: blockdev to issue discard for * @sector: start sector * @nr_sects: number of sectors to discard * @gfp_mask: memory allocation flags (for bio_alloc) * * Description: * Issue a discard request for the sectors in question. */ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask) { struct bio *bio = NULL; struct blk_plug plug; int ret; blk_start_plug(&plug); ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio); if (!ret && bio) { ret = submit_bio_wait(bio); if (ret == -EOPNOTSUPP) ret = 0; bio_put(bio); } blk_finish_plug(&plug); return ret; } EXPORT_SYMBOL(blkdev_issue_discard); static sector_t bio_write_zeroes_limit(struct block_device *bdev) { sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; return min(bdev_write_zeroes_sectors(bdev), (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask); } /* * There is no reliable way for the SCSI subsystem to determine whether a * device supports a WRITE SAME operation without actually performing a write * to media. As a result, write_zeroes is enabled by default and will be * disabled if a zeroing operation subsequently fails. This means that this * queue limit is likely to change at runtime. */ static void __blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags, sector_t limit) { while (nr_sects) { unsigned int len = min(nr_sects, limit); struct bio *bio; if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) break; bio = bio_alloc(bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask); bio->bi_iter.bi_sector = sector; if (flags & BLKDEV_ZERO_NOUNMAP) bio->bi_opf |= REQ_NOUNMAP; bio->bi_iter.bi_size = len << SECTOR_SHIFT; *biop = bio_chain_and_submit(*biop, bio); nr_sects -= len; sector += len; cond_resched(); } } static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp, unsigned flags) { sector_t limit = bio_write_zeroes_limit(bdev); struct bio *bio = NULL; struct blk_plug plug; int ret = 0; blk_start_plug(&plug); __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, flags, limit); if (bio) { if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) { bio_await_chain(bio); blk_finish_plug(&plug); return -EINTR; } ret = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); /* * For some devices there is no non-destructive way to verify whether * WRITE ZEROES is actually supported. These will clear the capability * on an I/O error, in which case we'll turn any error into * "not supported" here. */ if (ret && !bdev_write_zeroes_sectors(bdev)) return -EOPNOTSUPP; return ret; } /* * Convert a number of 512B sectors to a number of pages. * The result is limited to a number of pages that can fit into a BIO. * Also make sure that the result is always at least 1 (page) for the cases * where nr_sects is lower than the number of sectors in a page. */ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) { sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512); return min(pages, (sector_t)BIO_MAX_VECS); } static void __blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned int flags) { while (nr_sects) { unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects); struct bio *bio; bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask); bio->bi_iter.bi_sector = sector; if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) break; do { unsigned int len, added; len = min_t(sector_t, PAGE_SIZE, nr_sects << SECTOR_SHIFT); added = bio_add_page(bio, ZERO_PAGE(0), len, 0); if (added < len) break; nr_sects -= added >> SECTOR_SHIFT; sector += added >> SECTOR_SHIFT; } while (nr_sects); *biop = bio_chain_and_submit(*biop, bio); cond_resched(); } } static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp, unsigned flags) { struct bio *bio = NULL; struct blk_plug plug; int ret = 0; if (flags & BLKDEV_ZERO_NOFALLBACK) return -EOPNOTSUPP; blk_start_plug(&plug); __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags); if (bio) { if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) { bio_await_chain(bio); blk_finish_plug(&plug); return -EINTR; } ret = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); return ret; } /** * __blkdev_issue_zeroout - generate number of zero filed write bios * @bdev: blockdev to issue * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) * @biop: pointer to anchor bio * @flags: controls detailed behavior * * Description: * Zero-fill a block range, either using hardware offload or by explicitly * writing zeroes to the device. * * If a device is using logical block provisioning, the underlying space will * not be released if %flags contains BLKDEV_ZERO_NOUNMAP. * * If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return * -EOPNOTSUPP if no explicit hardware offload for zeroing is provided. */ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags) { sector_t limit = bio_write_zeroes_limit(bdev); if (bdev_read_only(bdev)) return -EPERM; if (limit) { __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask, biop, flags, limit); } else { if (flags & BLKDEV_ZERO_NOFALLBACK) return -EOPNOTSUPP; __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, biop, flags); } return 0; } EXPORT_SYMBOL(__blkdev_issue_zeroout); /** * blkdev_issue_zeroout - zero-fill a block range * @bdev: blockdev to write * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) * @flags: controls detailed behavior * * Description: * Zero-fill a block range, either using hardware offload or by explicitly * writing zeroes to the device. See __blkdev_issue_zeroout() for the * valid values for %flags. */ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned flags) { int ret; if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1)) return -EINVAL; if (bdev_read_only(bdev)) return -EPERM; if (bdev_write_zeroes_sectors(bdev)) { ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask, flags); if (ret != -EOPNOTSUPP) return ret; } return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, flags); } EXPORT_SYMBOL(blkdev_issue_zeroout); int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp) { sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev); struct bio *bio = NULL; struct blk_plug plug; int ret = 0; /* make sure that "len << SECTOR_SHIFT" doesn't overflow */ if (max_sectors > UINT_MAX >> SECTOR_SHIFT) max_sectors = UINT_MAX >> SECTOR_SHIFT; max_sectors &= ~bs_mask; if (max_sectors == 0) return -EOPNOTSUPP; if ((sector | nr_sects) & bs_mask) return -EINVAL; if (bdev_read_only(bdev)) return -EPERM; blk_start_plug(&plug); while (nr_sects) { unsigned int len = min_t(sector_t, nr_sects, max_sectors); bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp); bio->bi_iter.bi_sector = sector; bio->bi_iter.bi_size = len << SECTOR_SHIFT; sector += len; nr_sects -= len; cond_resched(); } if (bio) { ret = submit_bio_wait(bio); bio_put(bio); } blk_finish_plug(&plug); return ret; } EXPORT_SYMBOL(blkdev_issue_secure_erase);
99 99 99 113 113 113 113 7 96 5 38 38 72 72 99 99 3 94 5 96 3 113 113 6 110 96 84 60 38 38 72 72 52 9 49 52 3 24 36 36 47 47 47 47 4 4 4 4 17 24 6 82 34 23 25 73 36 6 25 7 3 191 13 13 32 188 189 110 107 97 4 99 99 76 24 34 25 12 12 82 110 110 111 111 110 14 13 111 111 97 96 6 24 92 29 4 73 90 1 77 14 90 1 16 74 96 96 58 35 96 90 92 4 80 6 4 64 1 9 55 19 73 1 1 1 14 58 14 80 80 73 63 19 15 15 15 3 3 15 7 7 7 7 2 7 5 5 4 10 10 10 2 8 2 2 6 7 3 3 7 6 6 2 3 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2004, OGAWA Hirofumi */ #include <linux/blkdev.h> #include <linux/sched/signal.h> #include <linux/backing-dev-defs.h> #include "fat.h" struct fatent_operations { void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); void (*ent_set_ptr)(struct fat_entry *, int); int (*ent_bread)(struct super_block *, struct fat_entry *, int, sector_t); int (*ent_get)(struct fat_entry *); void (*ent_put)(struct fat_entry *, int); int (*ent_next)(struct fat_entry *); }; static DEFINE_SPINLOCK(fat12_entry_lock); static void fat12_ent_blocknr(struct super_block *sb, int entry, int *offset, sector_t *blocknr) { struct msdos_sb_info *sbi = MSDOS_SB(sb); int bytes = entry + (entry >> 1); WARN_ON(!fat_valid_entry(sbi, entry)); *offset = bytes & (sb->s_blocksize - 1); *blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits); } static void fat_ent_blocknr(struct super_block *sb, int entry, int *offset, sector_t *blocknr) { struct msdos_sb_info *sbi = MSDOS_SB(sb); int bytes = (entry << sbi->fatent_shift); WARN_ON(!fat_valid_entry(sbi, entry)); *offset = bytes & (sb->s_blocksize - 1); *blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits); } static void fat12_ent_set_ptr(struct fat_entry *fatent, int offset) { struct buffer_head **bhs = fatent->bhs; if (fatent->nr_bhs == 1) { WARN_ON(offset >= (bhs[0]->b_size - 1)); fatent->u.ent12_p[0] = bhs[0]->b_data + offset; fatent->u.ent12_p[1] = bhs[0]->b_data + (offset + 1); } else { WARN_ON(offset != (bhs[0]->b_size - 1)); fatent->u.ent12_p[0] = bhs[0]->b_data + offset; fatent->u.ent12_p[1] = bhs[1]->b_data; } } static void fat16_ent_set_ptr(struct fat_entry *fatent, int offset) { WARN_ON(offset & (2 - 1)); fatent->u.ent16_p = (__le16 *)(fatent->bhs[0]->b_data + offset); } static void fat32_ent_set_ptr(struct fat_entry *fatent, int offset) { WARN_ON(offset & (4 - 1)); fatent->u.ent32_p = (__le32 *)(fatent->bhs[0]->b_data + offset); } static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, int offset, sector_t blocknr) { struct buffer_head **bhs = fatent->bhs; WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); fatent->fat_inode = MSDOS_SB(sb)->fat_inode; bhs[0] = sb_bread(sb, blocknr); if (!bhs[0]) goto err; if ((offset + 1) < sb->s_blocksize) fatent->nr_bhs = 1; else { /* This entry is block boundary, it needs the next block */ blocknr++; bhs[1] = sb_bread(sb, blocknr); if (!bhs[1]) goto err_brelse; fatent->nr_bhs = 2; } fat12_ent_set_ptr(fatent, offset); return 0; err_brelse: brelse(bhs[0]); err: fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); return -EIO; } static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, int offset, sector_t blocknr) { const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); fatent->fat_inode = MSDOS_SB(sb)->fat_inode; fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; ops->ent_set_ptr(fatent, offset); return 0; } static int fat12_ent_get(struct fat_entry *fatent) { u8 **ent12_p = fatent->u.ent12_p; int next; spin_lock(&fat12_entry_lock); if (fatent->entry & 1) next = (*ent12_p[0] >> 4) | (*ent12_p[1] << 4); else next = (*ent12_p[1] << 8) | *ent12_p[0]; spin_unlock(&fat12_entry_lock); next &= 0x0fff; if (next >= BAD_FAT12) next = FAT_ENT_EOF; return next; } static int fat16_ent_get(struct fat_entry *fatent) { int next = le16_to_cpu(*fatent->u.ent16_p); WARN_ON((unsigned long)fatent->u.ent16_p & (2 - 1)); if (next >= BAD_FAT16) next = FAT_ENT_EOF; return next; } static int fat32_ent_get(struct fat_entry *fatent) { int next = le32_to_cpu(*fatent->u.ent32_p) & 0x0fffffff; WARN_ON((unsigned long)fatent->u.ent32_p & (4 - 1)); if (next >= BAD_FAT32) next = FAT_ENT_EOF; return next; } static void fat12_ent_put(struct fat_entry *fatent, int new) { u8 **ent12_p = fatent->u.ent12_p; if (new == FAT_ENT_EOF) new = EOF_FAT12; spin_lock(&fat12_entry_lock); if (fatent->entry & 1) { *ent12_p[0] = (new << 4) | (*ent12_p[0] & 0x0f); *ent12_p[1] = new >> 4; } else { *ent12_p[0] = new & 0xff; *ent12_p[1] = (*ent12_p[1] & 0xf0) | (new >> 8); } spin_unlock(&fat12_entry_lock); mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); if (fatent->nr_bhs == 2) mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode); } static void fat16_ent_put(struct fat_entry *fatent, int new) { if (new == FAT_ENT_EOF) new = EOF_FAT16; *fatent->u.ent16_p = cpu_to_le16(new); mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); } static void fat32_ent_put(struct fat_entry *fatent, int new) { WARN_ON(new & 0xf0000000); new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; *fatent->u.ent32_p = cpu_to_le32(new); mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); } static int fat12_ent_next(struct fat_entry *fatent) { u8 **ent12_p = fatent->u.ent12_p; struct buffer_head **bhs = fatent->bhs; u8 *nextp = ent12_p[1] + 1 + (fatent->entry & 1); fatent->entry++; if (fatent->nr_bhs == 1) { WARN_ON(ent12_p[0] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 2))); WARN_ON(ent12_p[1] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); if (nextp < (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))) { ent12_p[0] = nextp - 1; ent12_p[1] = nextp; return 1; } } else { WARN_ON(ent12_p[0] != (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); WARN_ON(ent12_p[1] != (u8 *)bhs[1]->b_data); ent12_p[0] = nextp - 1; ent12_p[1] = nextp; brelse(bhs[0]); bhs[0] = bhs[1]; fatent->nr_bhs = 1; return 1; } ent12_p[0] = NULL; ent12_p[1] = NULL; return 0; } static int fat16_ent_next(struct fat_entry *fatent) { const struct buffer_head *bh = fatent->bhs[0]; fatent->entry++; if (fatent->u.ent16_p < (__le16 *)(bh->b_data + (bh->b_size - 2))) { fatent->u.ent16_p++; return 1; } fatent->u.ent16_p = NULL; return 0; } static int fat32_ent_next(struct fat_entry *fatent) { const struct buffer_head *bh = fatent->bhs[0]; fatent->entry++; if (fatent->u.ent32_p < (__le32 *)(bh->b_data + (bh->b_size - 4))) { fatent->u.ent32_p++; return 1; } fatent->u.ent32_p = NULL; return 0; } static const struct fatent_operations fat12_ops = { .ent_blocknr = fat12_ent_blocknr, .ent_set_ptr = fat12_ent_set_ptr, .ent_bread = fat12_ent_bread, .ent_get = fat12_ent_get, .ent_put = fat12_ent_put, .ent_next = fat12_ent_next, }; static const struct fatent_operations fat16_ops = { .ent_blocknr = fat_ent_blocknr, .ent_set_ptr = fat16_ent_set_ptr, .ent_bread = fat_ent_bread, .ent_get = fat16_ent_get, .ent_put = fat16_ent_put, .ent_next = fat16_ent_next, }; static const struct fatent_operations fat32_ops = { .ent_blocknr = fat_ent_blocknr, .ent_set_ptr = fat32_ent_set_ptr, .ent_bread = fat_ent_bread, .ent_get = fat32_ent_get, .ent_put = fat32_ent_put, .ent_next = fat32_ent_next, }; static inline void lock_fat(struct msdos_sb_info *sbi) { mutex_lock(&sbi->fat_lock); } static inline void unlock_fat(struct msdos_sb_info *sbi) { mutex_unlock(&sbi->fat_lock); } void fat_ent_access_init(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); mutex_init(&sbi->fat_lock); if (is_fat32(sbi)) { sbi->fatent_shift = 2; sbi->fatent_ops = &fat32_ops; } else if (is_fat16(sbi)) { sbi->fatent_shift = 1; sbi->fatent_ops = &fat16_ops; } else if (is_fat12(sbi)) { sbi->fatent_shift = -1; sbi->fatent_ops = &fat12_ops; } else { fat_fs_error(sb, "invalid FAT variant, %u bits", sbi->fat_bits); } } static void mark_fsinfo_dirty(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); if (sb_rdonly(sb) || !is_fat32(sbi)) return; __mark_inode_dirty(sbi->fsinfo_inode, I_DIRTY_SYNC); } static inline int fat_ent_update_ptr(struct super_block *sb, struct fat_entry *fatent, int offset, sector_t blocknr) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct buffer_head **bhs = fatent->bhs; /* Is this fatent's blocks including this entry? */ if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) return 0; if (is_fat12(sbi)) { if ((offset + 1) < sb->s_blocksize) { /* This entry is on bhs[0]. */ if (fatent->nr_bhs == 2) { brelse(bhs[1]); fatent->nr_bhs = 1; } } else { /* This entry needs the next block. */ if (fatent->nr_bhs != 2) return 0; if (bhs[1]->b_blocknr != (blocknr + 1)) return 0; } } ops->ent_set_ptr(fatent, offset); return 1; } int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); const struct fatent_operations *ops = sbi->fatent_ops; int err, offset; sector_t blocknr; if (!fat_valid_entry(sbi, entry)) { fatent_brelse(fatent); fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry); return -EIO; } fatent_set_entry(fatent, entry); ops->ent_blocknr(sb, entry, &offset, &blocknr); if (!fat_ent_update_ptr(sb, fatent, offset, blocknr)) { fatent_brelse(fatent); err = ops->ent_bread(sb, fatent, offset, blocknr); if (err) return err; } return ops->ent_get(fatent); } /* FIXME: We can write the blocks as more big chunk. */ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs, int nr_bhs) { struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *c_bh; int err, n, copy; err = 0; for (copy = 1; copy < sbi->fats; copy++) { sector_t backup_fat = sbi->fat_length * copy; for (n = 0; n < nr_bhs; n++) { c_bh = sb_getblk(sb, backup_fat + bhs[n]->b_blocknr); if (!c_bh) { err = -ENOMEM; goto error; } /* Avoid race with userspace read via bdev */ lock_buffer(c_bh); memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize); set_buffer_uptodate(c_bh); unlock_buffer(c_bh); mark_buffer_dirty_inode(c_bh, sbi->fat_inode); if (sb->s_flags & SB_SYNCHRONOUS) err = sync_dirty_buffer(c_bh); brelse(c_bh); if (err) goto error; } } error: return err; } int fat_ent_write(struct inode *inode, struct fat_entry *fatent, int new, int wait) { struct super_block *sb = inode->i_sb; const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; int err; ops->ent_put(fatent, new); if (wait) { err = fat_sync_bhs(fatent->bhs, fatent->nr_bhs); if (err) return err; } return fat_mirror_bhs(sb, fatent->bhs, fatent->nr_bhs); } static inline int fat_ent_next(struct msdos_sb_info *sbi, struct fat_entry *fatent) { if (sbi->fatent_ops->ent_next(fatent)) { if (fatent->entry < sbi->max_cluster) return 1; } return 0; } static inline int fat_ent_read_block(struct super_block *sb, struct fat_entry *fatent) { const struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; sector_t blocknr; int offset; fatent_brelse(fatent); ops->ent_blocknr(sb, fatent->entry, &offset, &blocknr); return ops->ent_bread(sb, fatent, offset, blocknr); } static void fat_collect_bhs(struct buffer_head **bhs, int *nr_bhs, struct fat_entry *fatent) { int n, i; for (n = 0; n < fatent->nr_bhs; n++) { for (i = 0; i < *nr_bhs; i++) { if (fatent->bhs[n] == bhs[i]) break; } if (i == *nr_bhs) { get_bh(fatent->bhs[n]); bhs[i] = fatent->bhs[n]; (*nr_bhs)++; } } } int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent, prev_ent; struct buffer_head *bhs[MAX_BUF_PER_PAGE]; int i, count, err, nr_bhs, idx_clus; BUG_ON(nr_cluster > (MAX_BUF_PER_PAGE / 2)); /* fixed limit */ lock_fat(sbi); if (sbi->free_clusters != -1 && sbi->free_clus_valid && sbi->free_clusters < nr_cluster) { unlock_fat(sbi); return -ENOSPC; } err = nr_bhs = idx_clus = 0; count = FAT_START_ENT; fatent_init(&prev_ent); fatent_init(&fatent); fatent_set_entry(&fatent, sbi->prev_free + 1); while (count < sbi->max_cluster) { if (fatent.entry >= sbi->max_cluster) fatent.entry = FAT_START_ENT; fatent_set_entry(&fatent, fatent.entry); err = fat_ent_read_block(sb, &fatent); if (err) goto out; /* Find the free entries in a block */ do { if (ops->ent_get(&fatent) == FAT_ENT_FREE) { int entry = fatent.entry; /* make the cluster chain */ ops->ent_put(&fatent, FAT_ENT_EOF); if (prev_ent.nr_bhs) ops->ent_put(&prev_ent, entry); fat_collect_bhs(bhs, &nr_bhs, &fatent); sbi->prev_free = entry; if (sbi->free_clusters != -1) sbi->free_clusters--; cluster[idx_clus] = entry; idx_clus++; if (idx_clus == nr_cluster) goto out; /* * fat_collect_bhs() gets ref-count of bhs, * so we can still use the prev_ent. */ prev_ent = fatent; } count++; if (count == sbi->max_cluster) break; } while (fat_ent_next(sbi, &fatent)); } /* Couldn't allocate the free entries */ sbi->free_clusters = 0; sbi->free_clus_valid = 1; err = -ENOSPC; out: unlock_fat(sbi); mark_fsinfo_dirty(sb); fatent_brelse(&fatent); if (!err) { if (inode_needs_sync(inode)) err = fat_sync_bhs(bhs, nr_bhs); if (!err) err = fat_mirror_bhs(sb, bhs, nr_bhs); } for (i = 0; i < nr_bhs; i++) brelse(bhs[i]); if (err && idx_clus) fat_free_clusters(inode, cluster[0]); return err; } int fat_free_clusters(struct inode *inode, int cluster) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent; struct buffer_head *bhs[MAX_BUF_PER_PAGE]; int i, err, nr_bhs; int first_cl = cluster, dirty_fsinfo = 0; nr_bhs = 0; fatent_init(&fatent); lock_fat(sbi); do { cluster = fat_ent_read(inode, &fatent, cluster); if (cluster < 0) { err = cluster; goto error; } else if (cluster == FAT_ENT_FREE) { fat_fs_error(sb, "%s: deleting FAT entry beyond EOF", __func__); err = -EIO; goto error; } if (sbi->options.discard) { /* * Issue discard for the sectors we no longer * care about, batching contiguous clusters * into one request */ if (cluster != fatent.entry + 1) { int nr_clus = fatent.entry - first_cl + 1; sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), nr_clus * sbi->sec_per_clus, GFP_NOFS, 0); first_cl = cluster; } } ops->ent_put(&fatent, FAT_ENT_FREE); if (sbi->free_clusters != -1) { sbi->free_clusters++; dirty_fsinfo = 1; } if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) { if (sb->s_flags & SB_SYNCHRONOUS) { err = fat_sync_bhs(bhs, nr_bhs); if (err) goto error; } err = fat_mirror_bhs(sb, bhs, nr_bhs); if (err) goto error; for (i = 0; i < nr_bhs; i++) brelse(bhs[i]); nr_bhs = 0; } fat_collect_bhs(bhs, &nr_bhs, &fatent); } while (cluster != FAT_ENT_EOF); if (sb->s_flags & SB_SYNCHRONOUS) { err = fat_sync_bhs(bhs, nr_bhs); if (err) goto error; } err = fat_mirror_bhs(sb, bhs, nr_bhs); error: fatent_brelse(&fatent); for (i = 0; i < nr_bhs; i++) brelse(bhs[i]); unlock_fat(sbi); if (dirty_fsinfo) mark_fsinfo_dirty(sb); return err; } EXPORT_SYMBOL_GPL(fat_free_clusters); struct fatent_ra { sector_t cur; sector_t limit; unsigned int ra_blocks; sector_t ra_advance; sector_t ra_next; sector_t ra_limit; }; static void fat_ra_init(struct super_block *sb, struct fatent_ra *ra, struct fat_entry *fatent, int ent_limit) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; sector_t blocknr, block_end; int offset; /* * This is the sequential read, so ra_pages * 2 (but try to * align the optimal hardware IO size). * [BTW, 128kb covers the whole sectors for FAT12 and FAT16] */ unsigned long ra_pages = sb->s_bdi->ra_pages; unsigned int reada_blocks; if (fatent->entry >= ent_limit) return; if (ra_pages > sb->s_bdi->io_pages) ra_pages = rounddown(ra_pages, sb->s_bdi->io_pages); reada_blocks = ra_pages << (PAGE_SHIFT - sb->s_blocksize_bits + 1); /* Initialize the range for sequential read */ ops->ent_blocknr(sb, fatent->entry, &offset, &blocknr); ops->ent_blocknr(sb, ent_limit - 1, &offset, &block_end); ra->cur = 0; ra->limit = (block_end + 1) - blocknr; /* Advancing the window at half size */ ra->ra_blocks = reada_blocks >> 1; ra->ra_advance = ra->cur; ra->ra_next = ra->cur; ra->ra_limit = ra->cur + min_t(sector_t, reada_blocks, ra->limit); } /* Assuming to be called before reading a new block (increments ->cur). */ static void fat_ent_reada(struct super_block *sb, struct fatent_ra *ra, struct fat_entry *fatent) { if (ra->ra_next >= ra->ra_limit) return; if (ra->cur >= ra->ra_advance) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct blk_plug plug; sector_t blocknr, diff; int offset; ops->ent_blocknr(sb, fatent->entry, &offset, &blocknr); diff = blocknr - ra->cur; blk_start_plug(&plug); /* * FIXME: we would want to directly use the bio with * pages to reduce the number of segments. */ for (; ra->ra_next < ra->ra_limit; ra->ra_next++) sb_breadahead(sb, ra->ra_next + diff); blk_finish_plug(&plug); /* Advance the readahead window */ ra->ra_advance += ra->ra_blocks; ra->ra_limit += min_t(sector_t, ra->ra_blocks, ra->limit - ra->ra_limit); } ra->cur++; } int fat_count_free_clusters(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent; struct fatent_ra fatent_ra; int err = 0, free; lock_fat(sbi); if (sbi->free_clusters != -1 && sbi->free_clus_valid) goto out; free = 0; fatent_init(&fatent); fatent_set_entry(&fatent, FAT_START_ENT); fat_ra_init(sb, &fatent_ra, &fatent, sbi->max_cluster); while (fatent.entry < sbi->max_cluster) { /* readahead of fat blocks */ fat_ent_reada(sb, &fatent_ra, &fatent); err = fat_ent_read_block(sb, &fatent); if (err) goto out; do { if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; mark_fsinfo_dirty(sb); fatent_brelse(&fatent); out: unlock_fat(sbi); return err; } static int fat_trim_clusters(struct super_block *sb, u32 clus, u32 nr_clus) { struct msdos_sb_info *sbi = MSDOS_SB(sb); return sb_issue_discard(sb, fat_clus_to_blknr(sbi, clus), nr_clus * sbi->sec_per_clus, GFP_NOFS, 0); } int fat_trim_fs(struct inode *inode, struct fstrim_range *range) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); const struct fatent_operations *ops = sbi->fatent_ops; struct fat_entry fatent; struct fatent_ra fatent_ra; u64 ent_start, ent_end, minlen, trimmed = 0; u32 free = 0; int err = 0; /* * FAT data is organized as clusters, trim at the granulary of cluster. * * fstrim_range is in byte, convert values to cluster index. * Treat sectors before data region as all used, not to trim them. */ ent_start = max_t(u64, range->start>>sbi->cluster_bits, FAT_START_ENT); ent_end = ent_start + (range->len >> sbi->cluster_bits) - 1; minlen = range->minlen >> sbi->cluster_bits; if (ent_start >= sbi->max_cluster || range->len < sbi->cluster_size) return -EINVAL; if (ent_end >= sbi->max_cluster) ent_end = sbi->max_cluster - 1; fatent_init(&fatent); lock_fat(sbi); fatent_set_entry(&fatent, ent_start); fat_ra_init(sb, &fatent_ra, &fatent, ent_end + 1); while (fatent.entry <= ent_end) { /* readahead of fat blocks */ fat_ent_reada(sb, &fatent_ra, &fatent); err = fat_ent_read_block(sb, &fatent); if (err) goto error; do { if (ops->ent_get(&fatent) == FAT_ENT_FREE) { free++; } else if (free) { if (free >= minlen) { u32 clus = fatent.entry - free; err = fat_trim_clusters(sb, clus, free); if (err && err != -EOPNOTSUPP) goto error; if (!err) trimmed += free; err = 0; } free = 0; } } while (fat_ent_next(sbi, &fatent) && fatent.entry <= ent_end); if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto error; } if (need_resched()) { fatent_brelse(&fatent); unlock_fat(sbi); cond_resched(); lock_fat(sbi); } } /* handle scenario when tail entries are all free */ if (free && free >= minlen) { u32 clus = fatent.entry - free; err = fat_trim_clusters(sb, clus, free); if (err && err != -EOPNOTSUPP) goto error; if (!err) trimmed += free; err = 0; } error: fatent_brelse(&fatent); unlock_fat(sbi); range->len = trimmed << sbi->cluster_bits; return err; }
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 /* SPDX-License-Identifier: GPL-2.0 */ /* * Device core Trace Support * Copyright (C) 2021, Intel Corporation * * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com> */ #undef TRACE_SYSTEM #define TRACE_SYSTEM dev #if !defined(__DEV_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define __DEV_TRACE_H #include <linux/device.h> #include <linux/tracepoint.h> #include <linux/types.h> DECLARE_EVENT_CLASS(devres, TP_PROTO(struct device *dev, const char *op, void *node, const char *name, size_t size), TP_ARGS(dev, op, node, name, size), TP_STRUCT__entry( __string(devname, dev_name(dev)) __field(struct device *, dev) __field(const char *, op) __field(void *, node) __string(name, name) __field(size_t, size) ), TP_fast_assign( __assign_str(devname); __entry->op = op; __entry->node = node; __assign_str(name); __entry->size = size; ), TP_printk("%s %3s %p %s (%zu bytes)", __get_str(devname), __entry->op, __entry->node, __get_str(name), __entry->size) ); DEFINE_EVENT(devres, devres_log, TP_PROTO(struct device *dev, const char *op, void *node, const char *name, size_t size), TP_ARGS(dev, op, node, name, size) ); #endif /* __DEV_TRACE_H */ /* this part has to be here */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h>
5 6 6 6 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 // SPDX-License-Identifier: GPL-2.0-or-later /* * Sound core. This file is composed of two parts. sound_class * which is common to both OSS and ALSA and OSS sound core which * is used OSS or emulation of it. */ /* * First, the common part. */ #include <linux/module.h> #include <linux/device.h> #include <linux/err.h> #include <linux/kdev_t.h> #include <linux/major.h> #include <sound/core.h> #ifdef CONFIG_SOUND_OSS_CORE static int __init init_oss_soundcore(void); static void cleanup_oss_soundcore(void); #else static inline int init_oss_soundcore(void) { return 0; } static inline void cleanup_oss_soundcore(void) { } #endif MODULE_DESCRIPTION("Core sound module"); MODULE_AUTHOR("Alan Cox"); MODULE_LICENSE("GPL"); static char *sound_devnode(const struct device *dev, umode_t *mode) { if (MAJOR(dev->devt) == SOUND_MAJOR) return NULL; return kasprintf(GFP_KERNEL, "snd/%s", dev_name(dev)); } const struct class sound_class = { .name = "sound", .devnode = sound_devnode, }; EXPORT_SYMBOL(sound_class); static int __init init_soundcore(void) { int rc; rc = init_oss_soundcore(); if (rc) return rc; rc = class_register(&sound_class); if (rc) { cleanup_oss_soundcore(); return rc; } return 0; } static void __exit cleanup_soundcore(void) { cleanup_oss_soundcore(); class_unregister(&sound_class); } subsys_initcall(init_soundcore); module_exit(cleanup_soundcore); #ifdef CONFIG_SOUND_OSS_CORE /* * OSS sound core handling. Breaks out sound functions to submodules * * Author: Alan Cox <alan@lxorguk.ukuu.org.uk> * * Fixes: * * -------------------- * * Top level handler for the sound subsystem. Various devices can * plug into this. The fact they don't all go via OSS doesn't mean * they don't have to implement the OSS API. There is a lot of logic * to keeping much of the OSS weight out of the code in a compatibility * module, but it's up to the driver to rember to load it... * * The code provides a set of functions for registration of devices * by type. This is done rather than providing a single call so that * we can hide any future changes in the internals (eg when we go to * 32bit dev_t) from the modules and their interface. * * Secondly we need to allocate the dsp, dsp16 and audio devices as * one. Thus we misuse the chains a bit to simplify this. * * Thirdly to make it more fun and for 2.3.x and above we do all * of this using fine grained locking. * * FIXME: we have to resolve modules and fine grained load/unload * locking at some point in 2.3.x. */ #include <linux/init.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sound.h> #include <linux/kmod.h> #define SOUND_STEP 16 struct sound_unit { int unit_minor; const struct file_operations *unit_fops; struct sound_unit *next; char name[32]; }; /* * By default, OSS sound_core claims full legacy minor range (0-255) * of SOUND_MAJOR to trap open attempts to any sound minor and * requests modules using custom sound-slot/service-* module aliases. * The only benefit of doing this is allowing use of custom module * aliases instead of the standard char-major-* ones. This behavior * prevents alternative OSS implementation and is scheduled to be * removed. * * CONFIG_SOUND_OSS_CORE_PRECLAIM and soundcore.preclaim_oss kernel * parameter are added to allow distros and developers to try and * switch to alternative implementations without needing to rebuild * the kernel in the meantime. If preclaim_oss is non-zero, the * kernel will behave the same as before. All SOUND_MAJOR minors are * preclaimed and the custom module aliases along with standard chrdev * ones are emitted if a missing device is opened. If preclaim_oss is * zero, sound_core only grabs what's actually in use and for missing * devices only the standard chrdev aliases are requested. * * All these clutters are scheduled to be removed along with * sound-slot/service-* module aliases. */ static int preclaim_oss = IS_ENABLED(CONFIG_SOUND_OSS_CORE_PRECLAIM); module_param(preclaim_oss, int, 0444); static int soundcore_open(struct inode *, struct file *); static const struct file_operations soundcore_fops = { /* We must have an owner or the module locking fails */ .owner = THIS_MODULE, .open = soundcore_open, .llseek = noop_llseek, }; /* * Low level list operator. Scan the ordered list, find a hole and * join into it. Called with the lock asserted */ static int __sound_insert_unit(struct sound_unit * s, struct sound_unit **list, const struct file_operations *fops, int index, int low, int top) { int n=low; if (index < 0) { /* first free */ while (*list && (*list)->unit_minor<n) list=&((*list)->next); while(n<top) { /* Found a hole ? */ if(*list==NULL || (*list)->unit_minor>n) break; list=&((*list)->next); n+=SOUND_STEP; } if(n>=top) return -ENOENT; } else { n = low+(index*16); while (*list) { if ((*list)->unit_minor==n) return -EBUSY; if ((*list)->unit_minor>n) break; list=&((*list)->next); } } /* * Fill it in */ s->unit_minor=n; s->unit_fops=fops; /* * Link it */ s->next=*list; *list=s; return n; } /* * Remove a node from the chain. Called with the lock asserted */ static struct sound_unit *__sound_remove_unit(struct sound_unit **list, int unit) { while(*list) { struct sound_unit *p=*list; if(p->unit_minor==unit) { *list=p->next; return p; } list=&(p->next); } printk(KERN_ERR "Sound device %d went missing!\n", unit); return NULL; } /* * This lock guards the sound loader list. */ static DEFINE_SPINLOCK(sound_loader_lock); /* * Allocate the controlling structure and add it to the sound driver * list. Acquires locks as needed */ static int sound_insert_unit(struct sound_unit **list, const struct file_operations *fops, int index, int low, int top, const char *name, umode_t mode, struct device *dev) { struct sound_unit *s = kmalloc(sizeof(*s), GFP_KERNEL); int r; if (!s) return -ENOMEM; spin_lock(&sound_loader_lock); retry: r = __sound_insert_unit(s, list, fops, index, low, top); spin_unlock(&sound_loader_lock); if (r < 0) goto fail; else if (r < SOUND_STEP) sprintf(s->name, "sound/%s", name); else sprintf(s->name, "sound/%s%d", name, r / SOUND_STEP); if (!preclaim_oss) { /* * Something else might have grabbed the minor. If * first free slot is requested, rescan with @low set * to the next unit; otherwise, -EBUSY. */ r = __register_chrdev(SOUND_MAJOR, s->unit_minor, 1, s->name, &soundcore_fops); if (r < 0) { spin_lock(&sound_loader_lock); __sound_remove_unit(list, s->unit_minor); if (index < 0) { low = s->unit_minor + SOUND_STEP; goto retry; } spin_unlock(&sound_loader_lock); r = -EBUSY; goto fail; } } device_create(&sound_class, dev, MKDEV(SOUND_MAJOR, s->unit_minor), NULL, "%s", s->name+6); return s->unit_minor; fail: kfree(s); return r; } /* * Remove a unit. Acquires locks as needed. The drivers MUST have * completed the removal before their file operations become * invalid. */ static void sound_remove_unit(struct sound_unit **list, int unit) { struct sound_unit *p; spin_lock(&sound_loader_lock); p = __sound_remove_unit(list, unit); spin_unlock(&sound_loader_lock); if (p) { if (!preclaim_oss) __unregister_chrdev(SOUND_MAJOR, p->unit_minor, 1, p->name); device_destroy(&sound_class, MKDEV(SOUND_MAJOR, p->unit_minor)); kfree(p); } } /* * Allocations * * 0 *16 Mixers * 1 *8 Sequencers * 2 *16 Midi * 3 *16 DSP * 4 *16 SunDSP * 5 *16 DSP16 * 6 -- sndstat (obsolete) * 7 *16 unused * 8 -- alternate sequencer (see above) * 9 *16 raw synthesizer access * 10 *16 unused * 11 *16 unused * 12 *16 unused * 13 *16 unused * 14 *16 unused * 15 *16 unused */ static struct sound_unit *chains[SOUND_STEP]; /** * register_sound_special_device - register a special sound node * @fops: File operations for the driver * @unit: Unit number to allocate * @dev: device pointer * * Allocate a special sound device by minor number from the sound * subsystem. * * Return: The allocated number is returned on success. On failure, * a negative error code is returned. */ int register_sound_special_device(const struct file_operations *fops, int unit, struct device *dev) { const int chain = unit % SOUND_STEP; int max_unit = 256; const char *name; char _name[16]; switch (chain) { case 0: name = "mixer"; break; case 1: name = "sequencer"; if (unit >= SOUND_STEP) goto __unknown; max_unit = unit + 1; break; case 2: name = "midi"; break; case 3: name = "dsp"; break; case 4: name = "audio"; break; case 5: name = "dspW"; break; case 8: name = "sequencer2"; if (unit >= SOUND_STEP) goto __unknown; max_unit = unit + 1; break; case 9: name = "dmmidi"; break; case 10: name = "dmfm"; break; case 12: name = "adsp"; break; case 13: name = "amidi"; break; case 14: name = "admmidi"; break; default: { __unknown: sprintf(_name, "unknown%d", chain); if (unit >= SOUND_STEP) strcat(_name, "-"); name = _name; } break; } return sound_insert_unit(&chains[chain], fops, -1, unit, max_unit, name, 0600, dev); } EXPORT_SYMBOL(register_sound_special_device); int register_sound_special(const struct file_operations *fops, int unit) { return register_sound_special_device(fops, unit, NULL); } EXPORT_SYMBOL(register_sound_special); /** * register_sound_mixer - register a mixer device * @fops: File operations for the driver * @dev: Unit number to allocate * * Allocate a mixer device. Unit is the number of the mixer requested. * Pass -1 to request the next free mixer unit. * * Return: On success, the allocated number is returned. On failure, * a negative error code is returned. */ int register_sound_mixer(const struct file_operations *fops, int dev) { return sound_insert_unit(&chains[0], fops, dev, 0, 128, "mixer", 0600, NULL); } EXPORT_SYMBOL(register_sound_mixer); /* * DSP's are registered as a triple. Register only one and cheat * in open - see below. */ /** * register_sound_dsp - register a DSP device * @fops: File operations for the driver * @dev: Unit number to allocate * * Allocate a DSP device. Unit is the number of the DSP requested. * Pass -1 to request the next free DSP unit. * * This function allocates both the audio and dsp device entries together * and will always allocate them as a matching pair - eg dsp3/audio3 * * Return: On success, the allocated number is returned. On failure, * a negative error code is returned. */ int register_sound_dsp(const struct file_operations *fops, int dev) { return sound_insert_unit(&chains[3], fops, dev, 3, 131, "dsp", 0600, NULL); } EXPORT_SYMBOL(register_sound_dsp); /** * unregister_sound_special - unregister a special sound device * @unit: unit number to allocate * * Release a sound device that was allocated with * register_sound_special(). The unit passed is the return value from * the register function. */ void unregister_sound_special(int unit) { sound_remove_unit(&chains[unit % SOUND_STEP], unit); } EXPORT_SYMBOL(unregister_sound_special); /** * unregister_sound_mixer - unregister a mixer * @unit: unit number to allocate * * Release a sound device that was allocated with register_sound_mixer(). * The unit passed is the return value from the register function. */ void unregister_sound_mixer(int unit) { sound_remove_unit(&chains[0], unit); } EXPORT_SYMBOL(unregister_sound_mixer); /** * unregister_sound_dsp - unregister a DSP device * @unit: unit number to allocate * * Release a sound device that was allocated with register_sound_dsp(). * The unit passed is the return value from the register function. * * Both of the allocated units are released together automatically. */ void unregister_sound_dsp(int unit) { sound_remove_unit(&chains[3], unit); } EXPORT_SYMBOL(unregister_sound_dsp); static struct sound_unit *__look_for_unit(int chain, int unit) { struct sound_unit *s; s=chains[chain]; while(s && s->unit_minor <= unit) { if(s->unit_minor==unit) return s; s=s->next; } return NULL; } static int soundcore_open(struct inode *inode, struct file *file) { int chain; int unit = iminor(inode); struct sound_unit *s; const struct file_operations *new_fops = NULL; chain=unit&0x0F; if(chain==4 || chain==5) /* dsp/audio/dsp16 */ { unit&=0xF0; unit|=3; chain=3; } spin_lock(&sound_loader_lock); s = __look_for_unit(chain, unit); if (s) new_fops = fops_get(s->unit_fops); if (preclaim_oss && !new_fops) { spin_unlock(&sound_loader_lock); /* * Please, don't change this order or code. * For ALSA slot means soundcard and OSS emulation code * comes as add-on modules which aren't depend on * ALSA toplevel modules for soundcards, thus we need * load them at first. [Jaroslav Kysela <perex@jcu.cz>] */ request_module("sound-slot-%i", unit>>4); request_module("sound-service-%i-%i", unit>>4, chain); /* * sound-slot/service-* module aliases are scheduled * for removal in favor of the standard char-major-* * module aliases. For the time being, generate both * the legacy and standard module aliases to ease * transition. */ if (request_module("char-major-%d-%d", SOUND_MAJOR, unit) > 0) request_module("char-major-%d", SOUND_MAJOR); spin_lock(&sound_loader_lock); s = __look_for_unit(chain, unit); if (s) new_fops = fops_get(s->unit_fops); } spin_unlock(&sound_loader_lock); if (!new_fops) return -ENODEV; /* * We rely upon the fact that we can't be unloaded while the * subdriver is there. */ replace_fops(file, new_fops); if (!file->f_op->open) return -ENODEV; return file->f_op->open(inode, file); } MODULE_ALIAS_CHARDEV_MAJOR(SOUND_MAJOR); static void cleanup_oss_soundcore(void) { /* We have nothing to really do here - we know the lists must be empty */ unregister_chrdev(SOUND_MAJOR, "sound"); } static int __init init_oss_soundcore(void) { if (preclaim_oss && register_chrdev(SOUND_MAJOR, "sound", &soundcore_fops) < 0) { printk(KERN_ERR "soundcore: sound device already in use.\n"); return -EBUSY; } return 0; } #endif /* CONFIG_SOUND_OSS_CORE */
25 25 53 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ #include <linux/mmzone.h> #include <linux/range.h> #include <linux/ioport.h> #include <linux/percpu-refcount.h> struct resource; struct device; /** * struct vmem_altmap - pre-allocated storage for vmemmap_populate * @base_pfn: base of the entire dev_pagemap mapping * @reserve: pages mapped, but reserved for driver use (relative to @base) * @free: free pages set aside in the mapping for memmap storage * @align: pages reserved to meet allocation alignments * @alloc: track pages consumed, private to vmemmap_populate() */ struct vmem_altmap { unsigned long base_pfn; const unsigned long end_pfn; const unsigned long reserve; unsigned long free; unsigned long align; unsigned long alloc; bool inaccessible; }; /* * Specialize ZONE_DEVICE memory into multiple types each has a different * usage. * * MEMORY_DEVICE_PRIVATE: * Device memory that is not directly addressable by the CPU: CPU can neither * read nor write private memory. In this case, we do still have struct pages * backing the device memory. Doing so simplifies the implementation, but it is * important to remember that there are certain points at which the struct page * must be treated as an opaque object, rather than a "normal" struct page. * * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/mm/hmm.rst. * * MEMORY_DEVICE_COHERENT: * Device memory that is cache coherent from device and CPU point of view. This * is used on platforms that have an advanced system bus (like CAPI or CXL). A * driver can hotplug the device memory using ZONE_DEVICE and with that memory * type. Any page of a process can be migrated to such memory. However no one * should be allowed to pin such memory so that it can always be evicted. * * MEMORY_DEVICE_FS_DAX: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. In support of coordinating page * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a * wakeup event whenever a page is unpinned and becomes idle. This * wakeup is used to coordinate physical address space management (ex: * fs truncate/hole punch) vs pinned pages (ex: device dma). * * MEMORY_DEVICE_GENERIC: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. This is for example used by DAX devices * that expose memory using a character device. * * MEMORY_DEVICE_PCI_P2PDMA: * Device memory residing in a PCI BAR intended for use with Peer-to-Peer * transactions. */ enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, MEMORY_DEVICE_COHERENT, MEMORY_DEVICE_FS_DAX, MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, }; struct dev_pagemap_ops { /* * Called once the page refcount reaches 0. The reference count will be * reset to one by the core code after the method is called to prepare * for handing out the page again. */ void (*page_free)(struct page *page); /* * Used for private (un-addressable) device memory only. Must migrate * the page back to a CPU accessible page. */ vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf); /* * Handle the memory failure happens on a range of pfns. Notify the * processes who are using these pfns, and try to recover the data on * them if necessary. The mf_flags is finally passed to the recover * function through the whole notify routine. * * When this is not implemented, or it returns -EOPNOTSUPP, the caller * will fall back to a common handler called mf_generic_kill_procs(). */ int (*memory_failure)(struct dev_pagemap *pgmap, unsigned long pfn, unsigned long nr_pages, int mf_flags); }; #define PGMAP_ALTMAP_VALID (1 << 0) /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations * @ref: reference count that pins the devm_memremap_pages() mapping * @done: completion for @ref * @type: memory type: see MEMORY_* above in memremap.h * @flags: PGMAP_* flags to specify defailed behavior * @vmemmap_shift: structural definition of how the vmemmap page metadata * is populated, specifically the metadata page order. * A zero value (default) uses base pages as the vmemmap metadata * representation. A bigger value will set up compound struct pages * of the requested order value. * @ops: method table * @owner: an opaque pointer identifying the entity that manages this * instance. Used by various helpers to make sure that no * foreign ZONE_DEVICE memory is accessed. * @nr_range: number of ranges to be mapped * @range: range to be mapped when nr_range == 1 * @ranges: array of ranges to be mapped when nr_range > 1 */ struct dev_pagemap { struct vmem_altmap altmap; struct percpu_ref ref; struct completion done; enum memory_type type; unsigned int flags; unsigned long vmemmap_shift; const struct dev_pagemap_ops *ops; void *owner; int nr_range; union { struct range range; DECLARE_FLEX_ARRAY(struct range, ranges); }; }; static inline bool pgmap_has_memory_failure(struct dev_pagemap *pgmap) { return pgmap->ops && pgmap->ops->memory_failure; } static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) { if (pgmap->flags & PGMAP_ALTMAP_VALID) return &pgmap->altmap; return NULL; } static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) { return 1 << pgmap->vmemmap_shift; } static inline bool is_device_private_page(const struct page *page) { return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && is_zone_device_page(page) && page_pgmap(page)->type == MEMORY_DEVICE_PRIVATE; } static inline bool folio_is_device_private(const struct folio *folio) { return is_device_private_page(&folio->page); } static inline bool is_pci_p2pdma_page(const struct page *page) { return IS_ENABLED(CONFIG_PCI_P2PDMA) && is_zone_device_page(page) && page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA; } static inline bool is_device_coherent_page(const struct page *page) { return is_zone_device_page(page) && page_pgmap(page)->type == MEMORY_DEVICE_COHERENT; } static inline bool folio_is_device_coherent(const struct folio *folio) { return is_device_coherent_page(&folio->page); } static inline bool is_fsdax_page(const struct page *page) { return is_zone_device_page(page) && page_pgmap(page)->type == MEMORY_DEVICE_FS_DAX; } static inline bool folio_is_fsdax(const struct folio *folio) { return is_fsdax_page(&folio->page); } #ifdef CONFIG_ZONE_DEVICE void zone_device_page_init(struct page *page); void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap); bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long memremap_compat_align(void); #else static inline void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { /* * Fail attempts to call devm_memremap_pages() without * ZONE_DEVICE support enabled, this requires callers to fall * back to plain devm_memremap() based on config */ WARN_ON_ONCE(1); return ERR_PTR(-ENXIO); } static inline void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap) { } static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap) { return NULL; } static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn) { return false; } /* when memremap_pages() is disabled all archs can remap a single page */ static inline unsigned long memremap_compat_align(void) { return PAGE_SIZE; } #endif /* CONFIG_ZONE_DEVICE */ static inline void put_dev_pagemap(struct dev_pagemap *pgmap) { if (pgmap) percpu_ref_put(&pgmap->ref); } #endif /* _LINUX_MEMREMAP_H_ */
1795 1797 1 1793 9 1 5751 5754 5742 5753 5731 1369 5746 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 // SPDX-License-Identifier: GPL-2.0 #include <linux/irq_work.h> #include <linux/spinlock.h> #include <linux/task_work.h> #include <linux/resume_user_mode.h> static struct callback_head work_exited; /* all we need is ->next == NULL */ #ifdef CONFIG_IRQ_WORK static void task_work_set_notify_irq(struct irq_work *entry) { test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); } static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = IRQ_WORK_INIT_HARD(task_work_set_notify_irq); #endif /** * task_work_add - ask the @task to execute @work->func() * @task: the task which should run the callback * @work: the callback to run * @notify: how to notify the targeted task * * Queue @work for task_work_run() below and notify the @task if @notify * is @TWA_RESUME, @TWA_SIGNAL, @TWA_SIGNAL_NO_IPI or @TWA_NMI_CURRENT. * * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted * task and run the task_work, regardless of whether the task is currently * running in the kernel or userspace. * @TWA_SIGNAL_NO_IPI works like @TWA_SIGNAL, except it doesn't send a * reschedule IPI to force the targeted task to reschedule and run task_work. * This can be advantageous if there's no strict requirement that the * task_work be run as soon as possible, just whenever the task enters the * kernel anyway. * @TWA_RESUME work is run only when the task exits the kernel and returns to * user mode, or before entering guest mode. * @TWA_NMI_CURRENT works like @TWA_RESUME, except it can only be used for the * current @task and if the current context is NMI. * * Fails if the @task is exiting/exited and thus it can't process this @work. * Otherwise @work->func() will be called when the @task goes through one of * the aforementioned transitions, or exits. * * If the targeted task is exiting, then an error is returned and the work item * is not queued. It's up to the caller to arrange for an alternative mechanism * in that case. * * Note: there is no ordering guarantee on works queued here. The task_work * list is LIFO. * * RETURNS: * 0 if succeeds or -ESRCH. */ int task_work_add(struct task_struct *task, struct callback_head *work, enum task_work_notify_mode notify) { struct callback_head *head; if (notify == TWA_NMI_CURRENT) { if (WARN_ON_ONCE(task != current)) return -EINVAL; if (!IS_ENABLED(CONFIG_IRQ_WORK)) return -EINVAL; } else { kasan_record_aux_stack(work); } head = READ_ONCE(task->task_works); do { if (unlikely(head == &work_exited)) return -ESRCH; work->next = head; } while (!try_cmpxchg(&task->task_works, &head, work)); switch (notify) { case TWA_NONE: break; case TWA_RESUME: set_notify_resume(task); break; case TWA_SIGNAL: set_notify_signal(task); break; case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; #ifdef CONFIG_IRQ_WORK case TWA_NMI_CURRENT: irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); break; #endif default: WARN_ON_ONCE(1); break; } return 0; } /** * task_work_cancel_match - cancel a pending work added by task_work_add() * @task: the task which should execute the work * @match: match function to call * @data: data to be passed in to match function * * RETURNS: * The found work or NULL if not found. */ struct callback_head * task_work_cancel_match(struct task_struct *task, bool (*match)(struct callback_head *, void *data), void *data) { struct callback_head **pprev = &task->task_works; struct callback_head *work; unsigned long flags; if (likely(!task_work_pending(task))) return NULL; /* * If cmpxchg() fails we continue without updating pprev. * Either we raced with task_work_add() which added the * new entry before this work, we will find it again. Or * we raced with task_work_run(), *pprev == NULL/exited. */ raw_spin_lock_irqsave(&task->pi_lock, flags); work = READ_ONCE(*pprev); while (work) { if (!match(work, data)) { pprev = &work->next; work = READ_ONCE(*pprev); } else if (try_cmpxchg(pprev, &work, work->next)) break; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); return work; } static bool task_work_func_match(struct callback_head *cb, void *data) { return cb->func == data; } /** * task_work_cancel_func - cancel a pending work matching a function added by task_work_add() * @task: the task which should execute the func's work * @func: identifies the func to match with a work to remove * * Find the last queued pending work with ->func == @func and remove * it from queue. * * RETURNS: * The found work or NULL if not found. */ struct callback_head * task_work_cancel_func(struct task_struct *task, task_work_func_t func) { return task_work_cancel_match(task, task_work_func_match, func); } static bool task_work_match(struct callback_head *cb, void *data) { return cb == data; } /** * task_work_cancel - cancel a pending work added by task_work_add() * @task: the task which should execute the work * @cb: the callback to remove if queued * * Remove a callback from a task's queue if queued. * * RETURNS: * True if the callback was queued and got cancelled, false otherwise. */ bool task_work_cancel(struct task_struct *task, struct callback_head *cb) { struct callback_head *ret; ret = task_work_cancel_match(task, task_work_match, cb); return ret == cb; } /** * task_work_run - execute the works added by task_work_add() * * Flush the pending works. Should be used by the core kernel code. * Called before the task returns to the user-mode or stops, or when * it exits. In the latter case task_work_add() can no longer add the * new work after task_work_run() returns. */ void task_work_run(void) { struct task_struct *task = current; struct callback_head *work, *head, *next; for (;;) { /* * work->func() can do task_work_add(), do not set * work_exited unless the list is empty. */ work = READ_ONCE(task->task_works); do { head = NULL; if (!work) { if (task->flags & PF_EXITING) head = &work_exited; else break; } } while (!try_cmpxchg(&task->task_works, &work, head)); if (!work) break; /* * Synchronize with task_work_cancel_match(). It can not remove * the first entry == work, cmpxchg(task_works) must fail. * But it can remove another entry from the ->next list. */ raw_spin_lock_irq(&task->pi_lock); raw_spin_unlock_irq(&task->pi_lock); do { next = work->next; work->func(work); work = next; cond_resched(); } while (work); } }
301 300 300 301 301 301 298 301 301 301 300 299 301 301 296 2 317 301 15 2 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs * * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes * 2000-2002 x86-64 support by Andi Kleen */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/kernel.h> #include <linux/kstrtox.h> #include <linux/errno.h> #include <linux/wait.h> #include <linux/unistd.h> #include <linux/stddef.h> #include <linux/personality.h> #include <linux/uaccess.h> #include <linux/user-return-notifier.h> #include <linux/uprobes.h> #include <linux/context_tracking.h> #include <linux/entry-common.h> #include <linux/syscalls.h> #include <linux/rseq.h> #include <asm/processor.h> #include <asm/ucontext.h> #include <asm/fpu/signal.h> #include <asm/fpu/xstate.h> #include <asm/vdso.h> #include <asm/mce.h> #include <asm/sighandling.h> #include <asm/vm86.h> #include <asm/syscall.h> #include <asm/sigframe.h> #include <asm/signal.h> #include <asm/shstk.h> static inline int is_ia32_compat_frame(struct ksignal *ksig) { return IS_ENABLED(CONFIG_IA32_EMULATION) && ksig->ka.sa.sa_flags & SA_IA32_ABI; } static inline int is_ia32_frame(struct ksignal *ksig) { return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(ksig); } static inline int is_x32_frame(struct ksignal *ksig) { return IS_ENABLED(CONFIG_X86_X32_ABI) && ksig->ka.sa.sa_flags & SA_X32_ABI; } /* * Enable all pkeys temporarily, so as to ensure that both the current * execution stack as well as the alternate signal stack are writeable. * The application can use any of the available pkeys to protect the * alternate signal stack, and we don't know which one it is, so enable * all. The PKRU register will be reset to init_pkru later in the flow, * in fpu__clear_user_states(), and it is the application's responsibility * to enable the appropriate pkey as the first step in the signal handler * so that the handler does not segfault. */ static inline u32 sig_prepare_pkru(void) { u32 orig_pkru = read_pkru(); write_pkru(0); return orig_pkru; } /* * Set up a signal frame. */ /* x86 ABI requires 16-byte alignment */ #define FRAME_ALIGNMENT 16UL #define MAX_FRAME_PADDING (FRAME_ALIGNMENT - 1) /* * Determine which stack to use.. */ void __user * get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { struct k_sigaction *ka = &ksig->ka; int ia32_frame = is_ia32_frame(ksig); /* Default to using normal stack */ bool nested_altstack = on_sig_stack(regs->sp); bool entering_altstack = false; unsigned long math_size = 0; unsigned long sp = regs->sp; unsigned long buf_fx = 0; u32 pkru; /* redzone */ if (!ia32_frame) sp -= 128; /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { /* * This checks nested_altstack via sas_ss_flags(). Sensible * programs use SS_AUTODISARM, which disables that check, and * programs that don't use SS_AUTODISARM get compatible. */ if (sas_ss_flags(sp) == 0) { sp = current->sas_ss_sp + current->sas_ss_size; entering_altstack = true; } } else if (ia32_frame && !nested_altstack && regs->ss != __USER_DS && !(ka->sa.sa_flags & SA_RESTORER) && ka->sa.sa_restorer) { /* This is the legacy signal stack switching. */ sp = (unsigned long) ka->sa.sa_restorer; entering_altstack = true; } sp = fpu__alloc_mathframe(sp, ia32_frame, &buf_fx, &math_size); *fpstate = (void __user *)sp; sp -= frame_size; if (ia32_frame) /* * Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ sp = ((sp + 4) & -FRAME_ALIGNMENT) - 4; else sp = round_down(sp, FRAME_ALIGNMENT) - 8; /* * If we are on the alternate signal stack and would overflow it, don't. * Return an always-bogus address instead so we will die with SIGSEGV. */ if (unlikely((nested_altstack || entering_altstack) && !__on_sig_stack(sp))) { if (show_unhandled_signals && printk_ratelimit()) pr_info("%s[%d] overflowed sigaltstack\n", current->comm, task_pid_nr(current)); return (void __user *)-1L; } /* Update PKRU to enable access to the alternate signal stack. */ pkru = sig_prepare_pkru(); /* save i387 and extended state */ if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size, pkru)) { /* * Restore PKRU to the original, user-defined value; disable * extra pkeys enabled for the alternate signal stack, if any. */ write_pkru(pkru); return (void __user *)-1L; } return (void __user *)sp; } /* * There are four different struct types for signal frame: sigframe_ia32, * rt_sigframe_ia32, rt_sigframe_x32, and rt_sigframe. Use the worst case * -- the largest size. It means the size for 64-bit apps is a bit more * than needed, but this keeps the code simple. */ #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) # define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct sigframe_ia32) #else # define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct rt_sigframe) #endif /* * The FP state frame contains an XSAVE buffer which must be 64-byte aligned. * If a signal frame starts at an unaligned address, extra space is required. * This is the max alignment padding, conservatively. */ #define MAX_XSAVE_PADDING 63UL /* * The frame data is composed of the following areas and laid out as: * * ------------------------- * | alignment padding | * ------------------------- * | (f)xsave frame | * ------------------------- * | fsave header | * ------------------------- * | alignment padding | * ------------------------- * | siginfo + ucontext | * ------------------------- */ /* max_frame_size tells userspace the worst case signal stack size. */ static unsigned long __ro_after_init max_frame_size; static unsigned int __ro_after_init fpu_default_state_size; static int __init init_sigframe_size(void) { fpu_default_state_size = fpu__get_fpstate_size(); max_frame_size = MAX_FRAME_SIGINFO_UCTXT_SIZE + MAX_FRAME_PADDING; max_frame_size += fpu_default_state_size + MAX_XSAVE_PADDING; /* Userspace expects an aligned size. */ max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT); pr_info("max sigframe size: %lu\n", max_frame_size); return 0; } early_initcall(init_sigframe_size); unsigned long get_sigframe_size(void) { return max_frame_size; } static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) { /* Perform fixup for the pre-signal frame. */ rseq_signal_deliver(ksig, regs); /* Set up the stack frame */ if (is_ia32_frame(ksig)) { if (ksig->ka.sa.sa_flags & SA_SIGINFO) return ia32_setup_rt_frame(ksig, regs); else return ia32_setup_frame(ksig, regs); } else if (is_x32_frame(ksig)) { return x32_setup_rt_frame(ksig, regs); } else { return x64_setup_rt_frame(ksig, regs); } } static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { bool stepping, failed; struct fpu *fpu = x86_task_fpu(current); if (v8086_mode(regs)) save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL); /* Are we from a system call? */ if (syscall_get_nr(current, regs) != -1) { /* If so, check system call restarting.. */ switch (syscall_get_error(current, regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: regs->ax = -EINTR; break; case -ERESTARTSYS: if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { regs->ax = -EINTR; break; } fallthrough; case -ERESTARTNOINTR: regs->ax = regs->orig_ax; regs->ip -= 2; break; } } /* * If TF is set due to a debugger (TIF_FORCED_TF), clear TF now * so that register information in the sigcontext is correct and * then notify the tracer before entering the signal handler. */ stepping = test_thread_flag(TIF_SINGLESTEP); if (stepping) user_disable_single_step(current); failed = (setup_rt_frame(ksig, regs) < 0); if (!failed) { /* * Clear the direction flag as per the ABI for function entry. * * Clear RF when entering the signal handler, because * it might disable possible debug exception from the * signal handler. * * Clear TF for the case when it wasn't set by debugger to * avoid the recursive send_sigtrap() in SIGTRAP handler. */ regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF); /* * Ensure the signal handler starts with the new fpu state. */ fpu__clear_user_states(fpu); } signal_setup_done(failed, ksig, stepping); } static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { #ifdef CONFIG_IA32_EMULATION if (current->restart_block.arch_data & TS_COMPAT) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); #else return __NR_restart_syscall; #endif } /* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ void arch_do_signal_or_restart(struct pt_regs *regs) { struct ksignal ksig; if (get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ handle_signal(&ksig, regs); return; } /* Did we come from a system call? */ if (syscall_get_nr(current, regs) != -1) { /* Restart the system call - no handlers present */ switch (syscall_get_error(current, regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: regs->ax = regs->orig_ax; regs->ip -= 2; break; case -ERESTART_RESTARTBLOCK: regs->ax = get_nr_restart_syscall(regs); regs->ip -= 2; break; } } /* * If there's no signal to deliver, we just put the saved sigmask * back. */ restore_saved_sigmask(); } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) { struct task_struct *me = current; if (show_unhandled_signals && printk_ratelimit()) { printk("%s" "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, me->comm, me->pid, where, frame, regs->ip, regs->sp, regs->orig_ax); print_vma_addr(KERN_CONT " in ", regs->ip); pr_cont("\n"); } force_sig(SIGSEGV); } #ifdef CONFIG_DYNAMIC_SIGFRAME #ifdef CONFIG_STRICT_SIGALTSTACK_SIZE static bool strict_sigaltstack_size __ro_after_init = true; #else static bool strict_sigaltstack_size __ro_after_init = false; #endif static int __init strict_sas_size(char *arg) { return kstrtobool(arg, &strict_sigaltstack_size) == 0; } __setup("strict_sas_size", strict_sas_size); /* * MINSIGSTKSZ is 2048 and can't be changed despite the fact that AVX512 * exceeds that size already. As such programs might never use the * sigaltstack they just continued to work. While always checking against * the real size would be correct, this might be considered a regression. * * Therefore avoid the sanity check, unless enforced by kernel * configuration or command line option. * * When dynamic FPU features are supported, the check is also enforced when * the task has permissions to use dynamic features. Tasks which have no * permission are checked against the size of the non-dynamic feature set * if strict checking is enabled. This avoids forcing all tasks on the * system to allocate large sigaltstacks even if they are never going * to use a dynamic feature. As this is serialized via sighand::siglock * any permission request for a dynamic feature either happened already * or will see the newly install sigaltstack size in the permission checks. */ bool sigaltstack_size_valid(size_t ss_size) { unsigned long fsize = max_frame_size - fpu_default_state_size; u64 mask; lockdep_assert_held(&current->sighand->siglock); if (!fpu_state_size_dynamic() && !strict_sigaltstack_size) return true; fsize += x86_task_fpu(current->group_leader)->perm.__user_state_size; if (likely(ss_size > fsize)) return true; if (strict_sigaltstack_size) return ss_size > fsize; mask = x86_task_fpu(current->group_leader)->perm.__state_perm; if (mask & XFEATURE_MASK_USER_DYNAMIC) return ss_size > fsize; return true; } #endif /* CONFIG_DYNAMIC_SIGFRAME */
1327 1327 1330 1329 1328 1328 1328 1324 1326 1330 2 2 2 2 1431 1430 229 1438 123 112 189 1325 11 1326 1325 591 1326 821 727 2 1326 1436 1324 1323 1330 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 // SPDX-License-Identifier: GPL-2.0 /* * kernel userspace event delivery * * Copyright (C) 2004 Red Hat, Inc. All rights reserved. * Copyright (C) 2004 Novell, Inc. All rights reserved. * Copyright (C) 2004 IBM, Inc. All rights reserved. * * Authors: * Robert Love <rml@novell.com> * Kay Sievers <kay.sievers@vrfy.org> * Arjan van de Ven <arjanv@redhat.com> * Greg Kroah-Hartman <greg@kroah.com> */ #include <linux/spinlock.h> #include <linux/string.h> #include <linux/kobject.h> #include <linux/export.h> #include <linux/kmod.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/uidgid.h> #include <linux/uuid.h> #include <linux/ctype.h> #include <net/sock.h> #include <net/netlink.h> #include <net/net_namespace.h> atomic64_t uevent_seqnum; #ifdef CONFIG_UEVENT_HELPER char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; #endif struct uevent_sock { struct list_head list; struct sock *sk; }; #ifdef CONFIG_NET static LIST_HEAD(uevent_sock_list); /* This lock protects uevent_sock_list */ static DEFINE_MUTEX(uevent_sock_mutex); #endif /* the strings here must match the enum in include/linux/kobject.h */ static const char *kobject_actions[] = { [KOBJ_ADD] = "add", [KOBJ_REMOVE] = "remove", [KOBJ_CHANGE] = "change", [KOBJ_MOVE] = "move", [KOBJ_ONLINE] = "online", [KOBJ_OFFLINE] = "offline", [KOBJ_BIND] = "bind", [KOBJ_UNBIND] = "unbind", }; static int kobject_action_type(const char *buf, size_t count, enum kobject_action *type, const char **args) { enum kobject_action action; size_t count_first; const char *args_start; int ret = -EINVAL; if (count && (buf[count-1] == '\n' || buf[count-1] == '\0')) count--; if (!count) goto out; args_start = strnchr(buf, count, ' '); if (args_start) { count_first = args_start - buf; args_start = args_start + 1; } else count_first = count; for (action = 0; action < ARRAY_SIZE(kobject_actions); action++) { if (strncmp(kobject_actions[action], buf, count_first) != 0) continue; if (kobject_actions[action][count_first] != '\0') continue; if (args) *args = args_start; *type = action; ret = 0; break; } out: return ret; } static const char *action_arg_word_end(const char *buf, const char *buf_end, char delim) { const char *next = buf; while (next <= buf_end && *next != delim) if (!isalnum(*next++)) return NULL; if (next == buf) return NULL; return next; } static int kobject_action_args(const char *buf, size_t count, struct kobj_uevent_env **ret_env) { struct kobj_uevent_env *env = NULL; const char *next, *buf_end, *key; int key_len; int r = -EINVAL; if (count && (buf[count - 1] == '\n' || buf[count - 1] == '\0')) count--; if (!count) return -EINVAL; env = kzalloc(sizeof(*env), GFP_KERNEL); if (!env) return -ENOMEM; /* first arg is UUID */ if (count < UUID_STRING_LEN || !uuid_is_valid(buf) || add_uevent_var(env, "SYNTH_UUID=%.*s", UUID_STRING_LEN, buf)) goto out; /* * the rest are custom environment variables in KEY=VALUE * format with ' ' delimiter between each KEY=VALUE pair */ next = buf + UUID_STRING_LEN; buf_end = buf + count - 1; while (next <= buf_end) { if (*next != ' ') goto out; /* skip the ' ', key must follow */ key = ++next; if (key > buf_end) goto out; buf = next; next = action_arg_word_end(buf, buf_end, '='); if (!next || next > buf_end || *next != '=') goto out; key_len = next - buf; /* skip the '=', value must follow */ if (++next > buf_end) goto out; buf = next; next = action_arg_word_end(buf, buf_end, ' '); if (!next) goto out; if (add_uevent_var(env, "SYNTH_ARG_%.*s=%.*s", key_len, key, (int) (next - buf), buf)) goto out; } r = 0; out: if (r) kfree(env); else *ret_env = env; return r; } /** * kobject_synth_uevent - send synthetic uevent with arguments * * @kobj: struct kobject for which synthetic uevent is to be generated * @buf: buffer containing action type and action args, newline is ignored * @count: length of buffer * * Returns 0 if kobject_synthetic_uevent() is completed with success or the * corresponding error when it fails. */ int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count) { char *no_uuid_envp[] = { "SYNTH_UUID=0", NULL }; enum kobject_action action; const char *action_args; struct kobj_uevent_env *env; const char *msg = NULL, *devpath; int r; r = kobject_action_type(buf, count, &action, &action_args); if (r) { msg = "unknown uevent action string"; goto out; } if (!action_args) { r = kobject_uevent_env(kobj, action, no_uuid_envp); goto out; } r = kobject_action_args(action_args, count - (action_args - buf), &env); if (r == -EINVAL) { msg = "incorrect uevent action arguments"; goto out; } if (r) goto out; r = kobject_uevent_env(kobj, action, env->envp); kfree(env); out: if (r) { devpath = kobject_get_path(kobj, GFP_KERNEL); pr_warn("synth uevent: %s: %s\n", devpath ?: "unknown device", msg ?: "failed to send uevent"); kfree(devpath); } return r; } #ifdef CONFIG_UEVENT_HELPER static int kobj_usermode_filter(struct kobject *kobj) { const struct kobj_ns_type_operations *ops; ops = kobj_ns_ops(kobj); if (ops) { const void *init_ns, *ns; ns = kobj->ktype->namespace(kobj); init_ns = ops->initial_ns(); return ns != init_ns; } return 0; } static int init_uevent_argv(struct kobj_uevent_env *env, const char *subsystem) { int buffer_size = sizeof(env->buf) - env->buflen; int len; len = strscpy(&env->buf[env->buflen], subsystem, buffer_size); if (len < 0) { pr_warn("%s: insufficient buffer space (%u left) for %s\n", __func__, buffer_size, subsystem); return -ENOMEM; } env->argv[0] = uevent_helper; env->argv[1] = &env->buf[env->buflen]; env->argv[2] = NULL; env->buflen += len + 1; return 0; } static void cleanup_uevent_env(struct subprocess_info *info) { kfree(info->data); } #endif #ifdef CONFIG_NET static struct sk_buff *alloc_uevent_skb(struct kobj_uevent_env *env, const char *action_string, const char *devpath) { struct netlink_skb_parms *parms; struct sk_buff *skb = NULL; char *scratch; size_t len; /* allocate message with maximum possible size */ len = strlen(action_string) + strlen(devpath) + 2; skb = alloc_skb(len + env->buflen, GFP_KERNEL); if (!skb) return NULL; /* add header */ scratch = skb_put(skb, len); sprintf(scratch, "%s@%s", action_string, devpath); skb_put_data(skb, env->buf, env->buflen); parms = &NETLINK_CB(skb); parms->creds.uid = GLOBAL_ROOT_UID; parms->creds.gid = GLOBAL_ROOT_GID; parms->dst_group = 1; parms->portid = 0; return skb; } static int uevent_net_broadcast_untagged(struct kobj_uevent_env *env, const char *action_string, const char *devpath) { struct sk_buff *skb = NULL; struct uevent_sock *ue_sk; int retval = 0; /* send netlink message */ mutex_lock(&uevent_sock_mutex); list_for_each_entry(ue_sk, &uevent_sock_list, list) { struct sock *uevent_sock = ue_sk->sk; if (!netlink_has_listeners(uevent_sock, 1)) continue; if (!skb) { retval = -ENOMEM; skb = alloc_uevent_skb(env, action_string, devpath); if (!skb) continue; } retval = netlink_broadcast(uevent_sock, skb_get(skb), 0, 1, GFP_KERNEL); /* ENOBUFS should be handled in userspace */ if (retval == -ENOBUFS || retval == -ESRCH) retval = 0; } mutex_unlock(&uevent_sock_mutex); consume_skb(skb); return retval; } static int uevent_net_broadcast_tagged(struct sock *usk, struct kobj_uevent_env *env, const char *action_string, const char *devpath) { struct user_namespace *owning_user_ns = sock_net(usk)->user_ns; struct sk_buff *skb = NULL; int ret = 0; skb = alloc_uevent_skb(env, action_string, devpath); if (!skb) return -ENOMEM; /* fix credentials */ if (owning_user_ns != &init_user_ns) { struct netlink_skb_parms *parms = &NETLINK_CB(skb); kuid_t root_uid; kgid_t root_gid; /* fix uid */ root_uid = make_kuid(owning_user_ns, 0); if (uid_valid(root_uid)) parms->creds.uid = root_uid; /* fix gid */ root_gid = make_kgid(owning_user_ns, 0); if (gid_valid(root_gid)) parms->creds.gid = root_gid; } ret = netlink_broadcast(usk, skb, 0, 1, GFP_KERNEL); /* ENOBUFS should be handled in userspace */ if (ret == -ENOBUFS || ret == -ESRCH) ret = 0; return ret; } #endif static int kobject_uevent_net_broadcast(struct kobject *kobj, struct kobj_uevent_env *env, const char *action_string, const char *devpath) { int ret = 0; #ifdef CONFIG_NET const struct kobj_ns_type_operations *ops; const struct net *net = NULL; ops = kobj_ns_ops(kobj); if (!ops && kobj->kset) { struct kobject *ksobj = &kobj->kset->kobj; if (ksobj->parent != NULL) ops = kobj_ns_ops(ksobj->parent); } /* kobjects currently only carry network namespace tags and they * are the only tag relevant here since we want to decide which * network namespaces to broadcast the uevent into. */ if (ops && ops->netlink_ns && kobj->ktype->namespace) if (ops->type == KOBJ_NS_TYPE_NET) net = kobj->ktype->namespace(kobj); if (!net) ret = uevent_net_broadcast_untagged(env, action_string, devpath); else ret = uevent_net_broadcast_tagged(net->uevent_sock->sk, env, action_string, devpath); #endif return ret; } static void zap_modalias_env(struct kobj_uevent_env *env) { static const char modalias_prefix[] = "MODALIAS="; size_t len; int i, j; for (i = 0; i < env->envp_idx;) { if (strncmp(env->envp[i], modalias_prefix, sizeof(modalias_prefix) - 1)) { i++; continue; } len = strlen(env->envp[i]) + 1; if (i != env->envp_idx - 1) { /* @env->envp[] contains pointers to @env->buf[] * with @env->buflen chars, and we are removing * variable MODALIAS here pointed by @env->envp[i] * with length @len as shown below: * * 0 @env->buf[] @env->buflen * --------------------------------------------- * ^ ^ ^ ^ * | |-> @len <-| target block | * @env->envp[0] @env->envp[i] @env->envp[i + 1] * * so the "target block" indicated above is moved * backward by @len, and its right size is * @env->buflen - (@env->envp[i + 1] - @env->envp[0]). */ memmove(env->envp[i], env->envp[i + 1], env->buflen - (env->envp[i + 1] - env->envp[0])); for (j = i; j < env->envp_idx - 1; j++) env->envp[j] = env->envp[j + 1] - len; } env->envp_idx--; env->buflen -= len; } } /** * kobject_uevent_env - send an uevent with environmental data * * @kobj: struct kobject that the action is happening to * @action: action that is happening * @envp_ext: pointer to environmental data * * Returns 0 if kobject_uevent_env() is completed with success or the * corresponding error when it fails. */ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, char *envp_ext[]) { struct kobj_uevent_env *env; const char *action_string = kobject_actions[action]; const char *devpath = NULL; const char *subsystem; struct kobject *top_kobj; struct kset *kset; const struct kset_uevent_ops *uevent_ops; int i = 0; int retval = 0; /* * Mark "remove" event done regardless of result, for some subsystems * do not want to re-trigger "remove" event via automatic cleanup. */ if (action == KOBJ_REMOVE) kobj->state_remove_uevent_sent = 1; pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); /* search the kset we belong to */ top_kobj = kobj; while (!top_kobj->kset && top_kobj->parent) top_kobj = top_kobj->parent; if (!top_kobj->kset) { pr_debug("kobject: '%s' (%p): %s: attempted to send uevent " "without kset!\n", kobject_name(kobj), kobj, __func__); return -EINVAL; } kset = top_kobj->kset; uevent_ops = kset->uevent_ops; /* skip the event, if uevent_suppress is set*/ if (kobj->uevent_suppress) { pr_debug("kobject: '%s' (%p): %s: uevent_suppress " "caused the event to drop!\n", kobject_name(kobj), kobj, __func__); return 0; } /* skip the event, if the filter returns zero. */ if (uevent_ops && uevent_ops->filter) if (!uevent_ops->filter(kobj)) { pr_debug("kobject: '%s' (%p): %s: filter function " "caused the event to drop!\n", kobject_name(kobj), kobj, __func__); return 0; } /* originating subsystem */ if (uevent_ops && uevent_ops->name) subsystem = uevent_ops->name(kobj); else subsystem = kobject_name(&kset->kobj); if (!subsystem) { pr_debug("kobject: '%s' (%p): %s: unset subsystem caused the " "event to drop!\n", kobject_name(kobj), kobj, __func__); return 0; } /* environment buffer */ env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); if (!env) return -ENOMEM; /* complete object path */ devpath = kobject_get_path(kobj, GFP_KERNEL); if (!devpath) { retval = -ENOENT; goto exit; } /* default keys */ retval = add_uevent_var(env, "ACTION=%s", action_string); if (retval) goto exit; retval = add_uevent_var(env, "DEVPATH=%s", devpath); if (retval) goto exit; retval = add_uevent_var(env, "SUBSYSTEM=%s", subsystem); if (retval) goto exit; /* keys passed in from the caller */ if (envp_ext) { for (i = 0; envp_ext[i]; i++) { retval = add_uevent_var(env, "%s", envp_ext[i]); if (retval) goto exit; } } /* let the kset specific function add its stuff */ if (uevent_ops && uevent_ops->uevent) { retval = uevent_ops->uevent(kobj, env); if (retval) { pr_debug("kobject: '%s' (%p): %s: uevent() returned " "%d\n", kobject_name(kobj), kobj, __func__, retval); goto exit; } } switch (action) { case KOBJ_ADD: /* * Mark "add" event so we can make sure we deliver "remove" * event to userspace during automatic cleanup. If * the object did send an "add" event, "remove" will * automatically generated by the core, if not already done * by the caller. */ kobj->state_add_uevent_sent = 1; break; case KOBJ_UNBIND: zap_modalias_env(env); break; default: break; } /* we will send an event, so request a new sequence number */ retval = add_uevent_var(env, "SEQNUM=%llu", atomic64_inc_return(&uevent_seqnum)); if (retval) goto exit; retval = kobject_uevent_net_broadcast(kobj, env, action_string, devpath); #ifdef CONFIG_UEVENT_HELPER /* call uevent_helper, usually only enabled during early boot */ if (uevent_helper[0] && !kobj_usermode_filter(kobj)) { struct subprocess_info *info; retval = add_uevent_var(env, "HOME=/"); if (retval) goto exit; retval = add_uevent_var(env, "PATH=/sbin:/bin:/usr/sbin:/usr/bin"); if (retval) goto exit; retval = init_uevent_argv(env, subsystem); if (retval) goto exit; retval = -ENOMEM; info = call_usermodehelper_setup(env->argv[0], env->argv, env->envp, GFP_KERNEL, NULL, cleanup_uevent_env, env); if (info) { retval = call_usermodehelper_exec(info, UMH_NO_WAIT); env = NULL; /* freed by cleanup_uevent_env */ } } #endif exit: kfree(devpath); kfree(env); return retval; } EXPORT_SYMBOL_GPL(kobject_uevent_env); /** * kobject_uevent - notify userspace by sending an uevent * * @kobj: struct kobject that the action is happening to * @action: action that is happening * * Returns 0 if kobject_uevent() is completed with success or the * corresponding error when it fails. */ int kobject_uevent(struct kobject *kobj, enum kobject_action action) { return kobject_uevent_env(kobj, action, NULL); } EXPORT_SYMBOL_GPL(kobject_uevent); /** * add_uevent_var - add key value string to the environment buffer * @env: environment buffer structure * @format: printf format for the key=value pair * * Returns 0 if environment variable was added successfully or -ENOMEM * if no space was available. */ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...) { va_list args; int len; if (env->envp_idx >= ARRAY_SIZE(env->envp)) { WARN(1, KERN_ERR "add_uevent_var: too many keys\n"); return -ENOMEM; } va_start(args, format); len = vsnprintf(&env->buf[env->buflen], sizeof(env->buf) - env->buflen, format, args); va_end(args); if (len >= (sizeof(env->buf) - env->buflen)) { WARN(1, KERN_ERR "add_uevent_var: buffer size too small\n"); return -ENOMEM; } env->envp[env->envp_idx++] = &env->buf[env->buflen]; env->buflen += len + 1; return 0; } EXPORT_SYMBOL_GPL(add_uevent_var); #if defined(CONFIG_NET) static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb, struct netlink_ext_ack *extack) { /* u64 to chars: 2^64 - 1 = 21 chars */ char buf[sizeof("SEQNUM=") + 21]; struct sk_buff *skbc; int ret; /* bump and prepare sequence number */ ret = snprintf(buf, sizeof(buf), "SEQNUM=%llu", atomic64_inc_return(&uevent_seqnum)); if (ret < 0 || (size_t)ret >= sizeof(buf)) return -ENOMEM; ret++; /* verify message does not overflow */ if ((skb->len + ret) > UEVENT_BUFFER_SIZE) { NL_SET_ERR_MSG(extack, "uevent message too big"); return -EINVAL; } /* copy skb and extend to accommodate sequence number */ skbc = skb_copy_expand(skb, 0, ret, GFP_KERNEL); if (!skbc) return -ENOMEM; /* append sequence number */ skb_put_data(skbc, buf, ret); /* remove msg header */ skb_pull(skbc, NLMSG_HDRLEN); /* set portid 0 to inform userspace message comes from kernel */ NETLINK_CB(skbc).portid = 0; NETLINK_CB(skbc).dst_group = 1; ret = netlink_broadcast(usk, skbc, 0, 1, GFP_KERNEL); /* ENOBUFS should be handled in userspace */ if (ret == -ENOBUFS || ret == -ESRCH) ret = 0; return ret; } static int uevent_net_rcv_skb(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net; int ret; if (!nlmsg_data(nlh)) return -EINVAL; /* * Verify that we are allowed to send messages to the target * network namespace. The caller must have CAP_SYS_ADMIN in the * owning user namespace of the target network namespace. */ net = sock_net(NETLINK_CB(skb).sk); if (!netlink_ns_capable(skb, net->user_ns, CAP_SYS_ADMIN)) { NL_SET_ERR_MSG(extack, "missing CAP_SYS_ADMIN capability"); return -EPERM; } ret = uevent_net_broadcast(net->uevent_sock->sk, skb, extack); return ret; } static void uevent_net_rcv(struct sk_buff *skb) { netlink_rcv_skb(skb, &uevent_net_rcv_skb); } static int uevent_net_init(struct net *net) { struct uevent_sock *ue_sk; struct netlink_kernel_cfg cfg = { .groups = 1, .input = uevent_net_rcv, .flags = NL_CFG_F_NONROOT_RECV }; ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL); if (!ue_sk) return -ENOMEM; ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, &cfg); if (!ue_sk->sk) { pr_err("kobject_uevent: unable to create netlink socket!\n"); kfree(ue_sk); return -ENODEV; } net->uevent_sock = ue_sk; /* Restrict uevents to initial user namespace. */ if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) { mutex_lock(&uevent_sock_mutex); list_add_tail(&ue_sk->list, &uevent_sock_list); mutex_unlock(&uevent_sock_mutex); } return 0; } static void uevent_net_exit(struct net *net) { struct uevent_sock *ue_sk = net->uevent_sock; if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) { mutex_lock(&uevent_sock_mutex); list_del(&ue_sk->list); mutex_unlock(&uevent_sock_mutex); } netlink_kernel_release(ue_sk->sk); kfree(ue_sk); } static struct pernet_operations uevent_net_ops = { .init = uevent_net_init, .exit = uevent_net_exit, }; static int __init kobject_uevent_init(void) { return register_pernet_subsys(&uevent_net_ops); } postcore_initcall(kobject_uevent_init); #endif
1 3 3 3 2 1 1 1 3 3 1 2 2 1 3 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 // SPDX-License-Identifier: GPL-2.0-only /* * Pluggable TCP congestion control support and newReno * congestion control. * Based on ideas from I/O scheduler support and Web100. * * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> */ #define pr_fmt(fmt) "TCP: " fmt #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> #include <linux/list.h> #include <linux/gfp.h> #include <linux/jhash.h> #include <net/tcp.h> #include <trace/events/tcp.h> static DEFINE_SPINLOCK(tcp_cong_list_lock); static LIST_HEAD(tcp_cong_list); /* Simple linear search, don't expect many entries! */ struct tcp_congestion_ops *tcp_ca_find(const char *name) { struct tcp_congestion_ops *e; list_for_each_entry_rcu(e, &tcp_cong_list, list) { if (strcmp(e->name, name) == 0) return e; } return NULL; } void tcp_set_ca_state(struct sock *sk, const u8 ca_state) { struct inet_connection_sock *icsk = inet_csk(sk); trace_tcp_cong_state_set(sk, ca_state); if (icsk->icsk_ca_ops->set_state) icsk->icsk_ca_ops->set_state(sk, ca_state); icsk->icsk_ca_state = ca_state; } /* Must be called with rcu lock held */ static struct tcp_congestion_ops *tcp_ca_find_autoload(const char *name) { struct tcp_congestion_ops *ca = tcp_ca_find(name); #ifdef CONFIG_MODULES if (!ca && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); request_module("tcp_%s", name); rcu_read_lock(); ca = tcp_ca_find(name); } #endif return ca; } /* Simple linear search, not much in here. */ struct tcp_congestion_ops *tcp_ca_find_key(u32 key) { struct tcp_congestion_ops *e; list_for_each_entry_rcu(e, &tcp_cong_list, list) { if (e->key == key) return e; } return NULL; } int tcp_validate_congestion_control(struct tcp_congestion_ops *ca) { /* all algorithms must implement these */ if (!ca->ssthresh || !ca->undo_cwnd || !(ca->cong_avoid || ca->cong_control)) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } return 0; } /* Attach new congestion control algorithm to the list * of available options. */ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) { int ret; ret = tcp_validate_congestion_control(ca); if (ret) return ret; ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); spin_lock(&tcp_cong_list_lock); if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) { pr_notice("%s already registered or non-unique key\n", ca->name); ret = -EEXIST; } else { list_add_tail_rcu(&ca->list, &tcp_cong_list); pr_debug("%s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); return ret; } EXPORT_SYMBOL_GPL(tcp_register_congestion_control); /* * Remove congestion control algorithm, called from * the module's remove function. Module ref counts are used * to ensure that this can't be done till all sockets using * that method are closed. */ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) { spin_lock(&tcp_cong_list_lock); list_del_rcu(&ca->list); spin_unlock(&tcp_cong_list_lock); /* Wait for outstanding readers to complete before the * module gets removed entirely. * * A try_module_get() should fail by now as our module is * in "going" state since no refs are held anymore and * module_exit() handler being called. */ synchronize_rcu(); } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); /* Replace a registered old ca with a new one. * * The new ca must have the same name as the old one, that has been * registered. */ int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca) { struct tcp_congestion_ops *existing; int ret = 0; ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); spin_lock(&tcp_cong_list_lock); existing = tcp_ca_find_key(old_ca->key); if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) { pr_notice("%s not registered or non-unique key\n", ca->name); ret = -EINVAL; } else if (existing != old_ca) { pr_notice("invalid old congestion control algorithm to replace\n"); ret = -EINVAL; } else { /* Add the new one before removing the old one to keep * one implementation available all the time. */ list_add_tail_rcu(&ca->list, &tcp_cong_list); list_del_rcu(&existing->list); pr_debug("%s updated\n", ca->name); } spin_unlock(&tcp_cong_list_lock); /* Wait for outstanding readers to complete before the * module or struct_ops gets removed entirely. */ if (!ret) synchronize_rcu(); return ret; } u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; u32 key = TCP_CA_UNSPEC; might_sleep(); rcu_read_lock(); ca = tcp_ca_find_autoload(name); if (ca) { key = ca->key; *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; } rcu_read_unlock(); return key; } char *tcp_ca_get_name_by_key(u32 key, char *buffer) { const struct tcp_congestion_ops *ca; char *ret = NULL; rcu_read_lock(); ca = tcp_ca_find_key(key); if (ca) { strscpy(buffer, ca->name, TCP_CA_NAME_MAX); ret = buffer; } rcu_read_unlock(); return ret; } /* Assign choice of congestion control. */ void tcp_assign_congestion_control(struct sock *sk) { struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; rcu_read_lock(); ca = rcu_dereference(net->ipv4.tcp_congestion_control); if (unlikely(!bpf_try_module_get(ca, ca->owner))) ca = &tcp_reno; icsk->icsk_ca_ops = ca; rcu_read_unlock(); memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) INET_ECN_xmit(sk); else INET_ECN_dontxmit(sk); } void tcp_init_congestion_control(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_sk(sk)->prior_ssthresh = 0; if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) INET_ECN_xmit(sk); else INET_ECN_dontxmit(sk); icsk->icsk_ca_initialized = 1; } static void tcp_reinit_congestion_control(struct sock *sk, const struct tcp_congestion_ops *ca) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; icsk->icsk_ca_setsockopt = 1; memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) INET_ECN_xmit(sk); else INET_ECN_dontxmit(sk); if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) tcp_init_congestion_control(sk); } /* Manage refcounts on socket close. */ void tcp_cleanup_congestion_control(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release) icsk->icsk_ca_ops->release(sk); icsk->icsk_ca_initialized = 0; bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner); } /* Used by sysctl to change default congestion control */ int tcp_set_default_congestion_control(struct net *net, const char *name) { struct tcp_congestion_ops *ca; const struct tcp_congestion_ops *prev; int ret; rcu_read_lock(); ca = tcp_ca_find_autoload(name); if (!ca) { ret = -ENOENT; } else if (!bpf_try_module_get(ca, ca->owner)) { ret = -EBUSY; } else if (!net_eq(net, &init_net) && !(ca->flags & TCP_CONG_NON_RESTRICTED)) { /* Only init netns can set default to a restricted algorithm */ ret = -EPERM; } else { prev = xchg(&net->ipv4.tcp_congestion_control, ca); if (prev) bpf_module_put(prev, prev->owner); ca->flags |= TCP_CONG_NON_RESTRICTED; ret = 0; } rcu_read_unlock(); return ret; } /* Set default value from kernel configuration at bootup */ static int __init tcp_congestion_default(void) { return tcp_set_default_congestion_control(&init_net, CONFIG_DEFAULT_TCP_CONG); } late_initcall(tcp_congestion_default); /* Build string with list of available congestion control values */ void tcp_get_available_congestion_control(char *buf, size_t maxlen) { struct tcp_congestion_ops *ca; size_t offs = 0; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); if (WARN_ON_ONCE(offs >= maxlen)) break; } rcu_read_unlock(); } /* Get current default congestion control */ void tcp_get_default_congestion_control(struct net *net, char *name) { const struct tcp_congestion_ops *ca; rcu_read_lock(); ca = rcu_dereference(net->ipv4.tcp_congestion_control); strscpy(name, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); } /* Built list of non-restricted congestion control values */ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) { struct tcp_congestion_ops *ca; size_t offs = 0; *buf = '\0'; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (!(ca->flags & TCP_CONG_NON_RESTRICTED)) continue; offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); if (WARN_ON_ONCE(offs >= maxlen)) break; } rcu_read_unlock(); } /* Change list of non-restricted congestion control */ int tcp_set_allowed_congestion_control(char *val) { struct tcp_congestion_ops *ca; char *saved_clone, *clone, *name; int ret = 0; saved_clone = clone = kstrdup(val, GFP_USER); if (!clone) return -ENOMEM; spin_lock(&tcp_cong_list_lock); /* pass 1 check for bad entries */ while ((name = strsep(&clone, " ")) && *name) { ca = tcp_ca_find(name); if (!ca) { ret = -ENOENT; goto out; } } /* pass 2 clear old values */ list_for_each_entry_rcu(ca, &tcp_cong_list, list) ca->flags &= ~TCP_CONG_NON_RESTRICTED; /* pass 3 mark as allowed */ while ((name = strsep(&val, " ")) && *name) { ca = tcp_ca_find(name); WARN_ON(!ca); if (ca) ca->flags |= TCP_CONG_NON_RESTRICTED; } out: spin_unlock(&tcp_cong_list_lock); kfree(saved_clone); return ret; } /* Change congestion control for socket. If load is false, then it is the * responsibility of the caller to call tcp_init_congestion_control or * tcp_reinit_congestion_control (if the current congestion control was * already initialized. */ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool cap_net_admin) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; int err = 0; if (icsk->icsk_ca_dst_locked) return -EPERM; rcu_read_lock(); if (!load) ca = tcp_ca_find(name); else ca = tcp_ca_find_autoload(name); /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { icsk->icsk_ca_setsockopt = 1; goto out; } if (!ca) err = -ENOENT; else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) err = -EPERM; else if (!bpf_try_module_get(ca, ca->owner)) err = -EBUSY; else tcp_reinit_congestion_control(sk, ca); out: rcu_read_unlock(); return err; } /* Slow start is used when congestion window is no greater than the slow start * threshold. We base on RFC2581 and also handle stretch ACKs properly. * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but * something better;) a packet is only considered (s)acked in its entirety to * defend the ACK attacks described in the RFC. Slow start processes a stretch * ACK of degree N as if N acks of degree 1 are received back to back except * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and * returns the leftover acks to adjust cwnd in congestion avoidance mode. */ __bpf_kfunc u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) { u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh); acked -= cwnd - tcp_snd_cwnd(tp); tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); return acked; } EXPORT_SYMBOL_GPL(tcp_slow_start); /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w), * for every packet that was ACKed. */ __bpf_kfunc void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) { /* If credits accumulated at a higher w, apply them gently now. */ if (tp->snd_cwnd_cnt >= w) { tp->snd_cwnd_cnt = 0; tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } tp->snd_cwnd_cnt += acked; if (tp->snd_cwnd_cnt >= w) { u32 delta = tp->snd_cwnd_cnt / w; tp->snd_cwnd_cnt -= delta * w; tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta); } tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp)); } EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); /* * TCP Reno congestion control * This is special case used for fallback as well. */ /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ __bpf_kfunc void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); if (!tcp_is_cwnd_limited(sk)) return; /* In "safe" area, increase. */ if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) return; } /* In dangerous area, increase slowly. */ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked); } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); /* Slow start threshold is half the congestion window (min 2) */ __bpf_kfunc u32 tcp_reno_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); return max(tcp_snd_cwnd(tp) >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); __bpf_kfunc u32 tcp_reno_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); return max(tcp_snd_cwnd(tp), tp->prior_cwnd); } EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd); struct tcp_congestion_ops tcp_reno = { .flags = TCP_CONG_NON_RESTRICTED, .name = "reno", .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, .undo_cwnd = tcp_reno_undo_cwnd, };
15 2 37 12 8 7 41 23 23 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 // SPDX-License-Identifier: GPL-2.0 #include <linux/buffer_head.h> #include <linux/slab.h> #include "minix.h" enum {DEPTH = 3, DIRECT = 7}; /* Only double indirect */ typedef u16 block_t; /* 16 bit, host order */ static inline unsigned long block_to_cpu(block_t n) { return n; } static inline block_t cpu_to_block(unsigned long n) { return n; } static inline block_t *i_data(struct inode *inode) { return (block_t *)minix_i(inode)->u.i1_data; } static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) { int n = 0; if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, inode->i_sb->s_bdev); return 0; } if ((u64)block * BLOCK_SIZE >= inode->i_sb->s_maxbytes) return 0; if (block < 7) { offsets[n++] = block; } else if ((block -= 7) < 512) { offsets[n++] = 7; offsets[n++] = block; } else { block -= 512; offsets[n++] = 8; offsets[n++] = block>>9; offsets[n++] = block & 511; } return n; } #include "itree_common.c" int V1_minix_get_block(struct inode * inode, long block, struct buffer_head *bh_result, int create) { return get_block(inode, block, bh_result, create); } void V1_minix_truncate(struct inode * inode) { truncate(inode); } unsigned V1_minix_blocks(loff_t size, struct super_block *sb) { return nblocks(size, sb); }
3514 2423 166 407 5607 5760 166 4 1149 116 5763 5759 5671 5671 308 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM block #if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_BLOCK_H #include <linux/blktrace_api.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/tracepoint.h> #include <uapi/linux/ioprio.h> #define RWBS_LEN 9 #define IOPRIO_CLASS_STRINGS \ { IOPRIO_CLASS_NONE, "none" }, \ { IOPRIO_CLASS_RT, "rt" }, \ { IOPRIO_CLASS_BE, "be" }, \ { IOPRIO_CLASS_IDLE, "idle" }, \ { IOPRIO_CLASS_INVALID, "invalid"} #ifdef CONFIG_BUFFER_HEAD DECLARE_EVENT_CLASS(block_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh), TP_STRUCT__entry ( __field( dev_t, dev ) __field( sector_t, sector ) __field( size_t, size ) ), TP_fast_assign( __entry->dev = bh->b_bdev->bd_dev; __entry->sector = bh->b_blocknr; __entry->size = bh->b_size; ), TP_printk("%d,%d sector=%llu size=%zu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->sector, __entry->size ) ); /** * block_touch_buffer - mark a buffer accessed * @bh: buffer_head being touched * * Called from touch_buffer(). */ DEFINE_EVENT(block_buffer, block_touch_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh) ); /** * block_dirty_buffer - mark a buffer dirty * @bh: buffer_head being dirtied * * Called from mark_buffer_dirty(). */ DEFINE_EVENT(block_buffer, block_dirty_buffer, TP_PROTO(struct buffer_head *bh), TP_ARGS(bh) ); #endif /* CONFIG_BUFFER_HEAD */ /** * block_rq_requeue - place block IO request back on a queue * @rq: block IO operation request * * The block operation request @rq is being placed back into queue * @q. For some reason the request was not completed and needs to be * put back in the queue. */ TRACE_EVENT(block_rq_requeue, TP_PROTO(struct request *rq), TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->ioprio = req_get_ioprio(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), IOPRIO_CLASS_STRINGS), IOPRIO_PRIO_HINT(__entry->ioprio), IOPRIO_PRIO_LEVEL(__entry->ioprio), 0) ); DECLARE_EVENT_CLASS(block_rq_completion, TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( int , error ) __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_pos(rq); __entry->nr_sector = nr_bytes >> 9; __entry->error = blk_status_to_errno(error); __entry->ioprio = req_get_ioprio(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), IOPRIO_CLASS_STRINGS), IOPRIO_PRIO_HINT(__entry->ioprio), IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->error) ); /** * block_rq_complete - block IO operation completed by device driver * @rq: block operations request * @error: status code * @nr_bytes: number of completed bytes * * The block_rq_complete tracepoint event indicates that some portion * of operation request has been completed by the device driver. If * the @rq->bio is %NULL, then there is absolutely no additional work to * do for the request. If @rq->bio is non-NULL then there is * additional work required to complete the request. */ DEFINE_EVENT(block_rq_completion, block_rq_complete, TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes) ); /** * block_rq_error - block IO operation error reported by device driver * @rq: block operations request * @error: status code * @nr_bytes: number of completed bytes * * The block_rq_error tracepoint event indicates that some portion * of operation request has failed as reported by the device driver. */ DEFINE_EVENT(block_rq_completion, block_rq_error, TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), TP_ARGS(rq, error, nr_bytes) ); DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request *rq), TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->bytes = blk_rq_bytes(rq); __entry->ioprio = req_get_ioprio(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u,%u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __entry->bytes, __get_str(cmd), (unsigned long long)__entry->sector, __entry->nr_sector, __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), IOPRIO_CLASS_STRINGS), IOPRIO_PRIO_HINT(__entry->ioprio), IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->comm) ); /** * block_rq_insert - insert block operation request into queue * @rq: block IO operation request * * Called immediately before block operation request @rq is inserted * into queue @q. The fields in the operation request @rq struct can * be examined to determine which device and sectors the pending * operation would access. */ DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_rq_issue - issue pending block IO request operation to device driver * @rq: block IO operation request * * Called when block operation request @rq from queue @q is sent to a * device driver for processing. */ DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_rq_merge - merge request with another one in the elevator * @rq: block IO operation request * * Called when block operation request @rq from queue @q is merged to another * request queued in the elevator. */ DEFINE_EVENT(block_rq, block_rq_merge, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_io_start - insert a request for execution * @rq: block IO operation request * * Called when block operation request @rq is queued for execution */ DEFINE_EVENT(block_rq, block_io_start, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_io_done - block IO operation request completed * @rq: block IO operation request * * Called when block operation request @rq is completed */ DEFINE_EVENT(block_rq, block_io_done, TP_PROTO(struct request *rq), TP_ARGS(rq) ); /** * block_bio_complete - completed all work on the block operation * @q: queue holding the block operation * @bio: block operation completed * * This tracepoint indicates there is no further work to do on this * block IO operation @bio. */ TRACE_EVENT(block_bio_complete, TP_PROTO(struct request_queue *q, struct bio *bio), TP_ARGS(q, bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned, nr_sector ) __field( int, error ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->error = blk_status_to_errno(bio->bi_status); blk_fill_rwbs(__entry->rwbs, bio->bi_opf); ), TP_printk("%d,%d %s %llu + %u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->error) ); DECLARE_EVENT_CLASS(block_bio, TP_PROTO(struct bio *bio), TP_ARGS(bio), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu + %u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->comm) ); /** * block_bio_backmerge - merging block operation to the end of an existing operation * @bio: new block operation to merge * * Merging block request @bio to the end of an existing block request. */ DEFINE_EVENT(block_bio, block_bio_backmerge, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation * @bio: new block operation to merge * * Merging block IO operation @bio to the beginning of an existing block request. */ DEFINE_EVENT(block_bio, block_bio_frontmerge, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_bio_queue - putting new block IO operation in queue * @bio: new block operation * * About to place the block IO operation @bio into queue @q. */ DEFINE_EVENT(block_bio, block_bio_queue, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_getrq - get a free request entry in queue for block IO operations * @bio: pending block IO operation (can be %NULL) * * A request struct has been allocated to handle the block IO operation @bio. */ DEFINE_EVENT(block_bio, block_getrq, TP_PROTO(struct bio *bio), TP_ARGS(bio) ); /** * block_plug - keep operations requests in request queue * @q: request queue to plug * * Plug the request queue @q. Do not allow block operation requests * to be sent to the device driver. Instead, accumulate requests in * the queue to improve throughput performance of the block device. */ TRACE_EVENT(block_plug, TP_PROTO(struct request_queue *q), TP_ARGS(q), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("[%s]", __entry->comm) ); DECLARE_EVENT_CLASS(block_unplug, TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), TP_ARGS(q, depth, explicit), TP_STRUCT__entry( __field( int, nr_rq ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->nr_rq = depth; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); /** * block_unplug - release of operations requests in request queue * @q: request queue to unplug * @depth: number of requests just added to the queue * @explicit: whether this was an explicit unplug, or one from schedule() * * Unplug request queue @q because device driver is scheduled to work * on elements in the request queue. */ DEFINE_EVENT(block_unplug, block_unplug, TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit), TP_ARGS(q, depth, explicit) ); /** * block_split - split a single bio struct into two bio structs * @bio: block operation being split * @new_sector: The starting sector for the new bio * * The bio request @bio needs to be split into two bio requests. The newly * created @bio request starts at @new_sector. This split may be required due to * hardware limitations such as operation crossing device boundaries in a RAID * system. */ TRACE_EVENT(block_split, TP_PROTO(struct bio *bio, unsigned int new_sector), TP_ARGS(bio, new_sector), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( sector_t, new_sector ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->new_sector = new_sector; blk_fill_rwbs(__entry->rwbs, bio->bi_opf); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), TP_printk("%d,%d %s %llu / %llu [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, (unsigned long long)__entry->new_sector, __entry->comm) ); /** * block_bio_remap - map request for a logical device to the raw device * @bio: revised operation * @dev: original device for the operation * @from: original sector for the operation * * An operation for a logical device has been mapped to the * raw block device. */ TRACE_EVENT(block_bio_remap, TP_PROTO(struct bio *bio, dev_t dev, sector_t from), TP_ARGS(bio, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->old_dev = dev; __entry->old_sector = from; blk_fill_rwbs(__entry->rwbs, bio->bi_opf); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), (unsigned long long)__entry->old_sector) ); /** * block_rq_remap - map request for a block operation request * @rq: block IO operation request * @dev: device for the operation * @from: original sector for the operation * * The block operation request @rq in @q has been remapped. The block * operation request @rq holds the current information and @from hold * the original sector. */ TRACE_EVENT(block_rq_remap, TP_PROTO(struct request *rq, dev_t dev, sector_t from), TP_ARGS(rq, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) __field( unsigned int, nr_bios ) __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( __entry->dev = disk_devt(rq->q->disk); __entry->sector = blk_rq_pos(rq); __entry->nr_sector = blk_rq_sectors(rq); __entry->old_dev = dev; __entry->old_sector = from; __entry->nr_bios = blk_rq_count_bios(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, (unsigned long long)__entry->sector, __entry->nr_sector, MAJOR(__entry->old_dev), MINOR(__entry->old_dev), (unsigned long long)__entry->old_sector, __entry->nr_bios) ); #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_COMPRESS_H #define _BCACHEFS_COMPRESS_H #include "extents_types.h" static const unsigned __bch2_compression_opt_to_type[] = { #define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_COMPRESSION_TYPE_##t, BCH_COMPRESSION_OPTS() #undef x }; struct bch_compression_opt { u8 type:4, level:4; }; static inline struct bch_compression_opt __bch2_compression_decode(unsigned v) { return (struct bch_compression_opt) { .type = v & 15, .level = v >> 4, }; } static inline bool bch2_compression_opt_valid(unsigned v) { struct bch_compression_opt opt = __bch2_compression_decode(v); return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level); } static inline struct bch_compression_opt bch2_compression_decode(unsigned v) { return bch2_compression_opt_valid(v) ? __bch2_compression_decode(v) : (struct bch_compression_opt) { 0 }; } static inline unsigned bch2_compression_encode(struct bch_compression_opt opt) { return opt.type|(opt.level << 4); } static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) { return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; } struct bch_write_op; int bch2_bio_uncompress_inplace(struct bch_write_op *, struct bio *); int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *, struct bvec_iter, struct bch_extent_crc_unpacked); unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *, struct bio *, size_t *, unsigned); int bch2_check_set_has_compressed_data(struct bch_fs *, unsigned); void bch2_fs_compress_exit(struct bch_fs *); int bch2_fs_compress_init(struct bch_fs *); void bch2_compression_opt_to_text(struct printbuf *, u64); int bch2_opt_compression_parse(struct bch_fs *, const char *, u64 *, struct printbuf *); void bch2_opt_compression_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); int bch2_opt_compression_validate(u64, struct printbuf *); #define bch2_opt_compression (struct bch_opt_fn) { \ .parse = bch2_opt_compression_parse, \ .to_text = bch2_opt_compression_to_text, \ .validate = bch2_opt_compression_validate, \ } #endif /* _BCACHEFS_COMPRESS_H */
6 6 6 6 6 5 65 53 60 3 56 65 58 60 4 6 56 39 20 56 5 5 5 5 5 4 4 4 4 65 6 6 6 5 4 1 5 48 50 50 1 14 37 35 44 15 14 14 14 38 5 5 5 9 9 9 2 2 2 8 8 8 8 7 3 1 1 1 1 1 1 3 3 1 2 1 3 3 3 4 4 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 // SPDX-License-Identifier: GPL-2.0 /* * /proc/sys support */ #include <linux/init.h> #include <linux/sysctl.h> #include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/printk.h> #include <linux/security.h> #include <linux/sched.h> #include <linux/cred.h> #include <linux/namei.h> #include <linux/mm.h> #include <linux/uio.h> #include <linux/module.h> #include <linux/bpf-cgroup.h> #include <linux/mount.h> #include <linux/kmemleak.h> #include <linux/lockdep.h> #include "internal.h" #define list_for_each_table_entry(entry, header) \ entry = header->ctl_table; \ for (size_t i = 0 ; i < header->ctl_table_size; ++i, entry++) static const struct dentry_operations proc_sys_dentry_operations; static const struct file_operations proc_sys_file_operations; static const struct inode_operations proc_sys_inode_operations; static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; /* * Support for permanently empty directories. * Must be non-empty to avoid sharing an address with other tables. */ static const struct ctl_table sysctl_mount_point[] = { { } }; /** * register_sysctl_mount_point() - registers a sysctl mount point * @path: path for the mount point * * Used to create a permanently empty directory to serve as mount point. * There are some subtle but important permission checks this allows in the * case of unprivileged mounts. */ struct ctl_table_header *register_sysctl_mount_point(const char *path) { return register_sysctl_sz(path, sysctl_mount_point, 0); } EXPORT_SYMBOL(register_sysctl_mount_point); #define sysctl_is_perm_empty_ctl_header(hptr) \ (hptr->type == SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY) #define sysctl_set_perm_empty_ctl_header(hptr) \ (hptr->type = SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY) #define sysctl_clear_perm_empty_ctl_header(hptr) \ (hptr->type = SYSCTL_TABLE_TYPE_DEFAULT) void proc_sys_poll_notify(struct ctl_table_poll *poll) { if (!poll) return; atomic_inc(&poll->event); wake_up_interruptible(&poll->wait); } static const struct ctl_table root_table[] = { { .procname = "", .mode = S_IFDIR|S_IRUGO|S_IXUGO, }, }; static struct ctl_table_root sysctl_table_root = { .default_set.dir.header = { {{.count = 1, .nreg = 1, .ctl_table = root_table }}, .ctl_table_arg = root_table, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, }, }; static DEFINE_SPINLOCK(sysctl_lock); static void drop_sysctl_table(struct ctl_table_header *header); static int sysctl_follow_link(struct ctl_table_header **phead, const struct ctl_table **pentry); static int insert_links(struct ctl_table_header *head); static void put_links(struct ctl_table_header *header); static void sysctl_print_dir(struct ctl_dir *dir) { if (dir->header.parent) sysctl_print_dir(dir->header.parent); pr_cont("%s/", dir->header.ctl_table[0].procname); } static int namecmp(const char *name1, int len1, const char *name2, int len2) { int cmp; cmp = memcmp(name1, name2, min(len1, len2)); if (cmp == 0) cmp = len1 - len2; return cmp; } static const struct ctl_table *find_entry(struct ctl_table_header **phead, struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; const struct ctl_table *entry; struct rb_node *node = dir->root.rb_node; lockdep_assert_held(&sysctl_lock); while (node) { struct ctl_node *ctl_node; const char *procname; int cmp; ctl_node = rb_entry(node, struct ctl_node, node); head = ctl_node->header; entry = &head->ctl_table[ctl_node - head->node]; procname = entry->procname; cmp = namecmp(name, namelen, procname, strlen(procname)); if (cmp < 0) node = node->rb_left; else if (cmp > 0) node = node->rb_right; else { *phead = head; return entry; } } return NULL; } static int insert_entry(struct ctl_table_header *head, const struct ctl_table *entry) { struct rb_node *node = &head->node[entry - head->ctl_table].node; struct rb_node **p = &head->parent->root.rb_node; struct rb_node *parent = NULL; const char *name = entry->procname; int namelen = strlen(name); while (*p) { struct ctl_table_header *parent_head; const struct ctl_table *parent_entry; struct ctl_node *parent_node; const char *parent_name; int cmp; parent = *p; parent_node = rb_entry(parent, struct ctl_node, node); parent_head = parent_node->header; parent_entry = &parent_head->ctl_table[parent_node - parent_head->node]; parent_name = parent_entry->procname; cmp = namecmp(name, namelen, parent_name, strlen(parent_name)); if (cmp < 0) p = &(*p)->rb_left; else if (cmp > 0) p = &(*p)->rb_right; else { pr_err("sysctl duplicate entry: "); sysctl_print_dir(head->parent); pr_cont("%s\n", entry->procname); return -EEXIST; } } rb_link_node(node, parent, p); rb_insert_color(node, &head->parent->root); return 0; } static void erase_entry(struct ctl_table_header *head, const struct ctl_table *entry) { struct rb_node *node = &head->node[entry - head->ctl_table].node; rb_erase(node, &head->parent->root); } static void init_header(struct ctl_table_header *head, struct ctl_table_root *root, struct ctl_table_set *set, struct ctl_node *node, const struct ctl_table *table, size_t table_size) { head->ctl_table = table; head->ctl_table_size = table_size; head->ctl_table_arg = table; head->used = 0; head->count = 1; head->nreg = 1; head->unregistering = NULL; head->root = root; head->set = set; head->parent = NULL; head->node = node; INIT_HLIST_HEAD(&head->inodes); if (node) { const struct ctl_table *entry; list_for_each_table_entry(entry, head) { node->header = head; node++; } } if (table == sysctl_mount_point) sysctl_set_perm_empty_ctl_header(head); } static void erase_header(struct ctl_table_header *head) { const struct ctl_table *entry; list_for_each_table_entry(entry, head) erase_entry(head, entry); } static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { const struct ctl_table *entry; struct ctl_table_header *dir_h = &dir->header; int err; /* Is this a permanently empty directory? */ if (sysctl_is_perm_empty_ctl_header(dir_h)) return -EROFS; /* Am I creating a permanently empty directory? */ if (sysctl_is_perm_empty_ctl_header(header)) { if (!RB_EMPTY_ROOT(&dir->root)) return -EINVAL; sysctl_set_perm_empty_ctl_header(dir_h); } dir_h->nreg++; header->parent = dir; err = insert_links(header); if (err) goto fail_links; list_for_each_table_entry(entry, header) { err = insert_entry(header, entry); if (err) goto fail; } return 0; fail: erase_header(header); put_links(header); fail_links: if (header->ctl_table == sysctl_mount_point) sysctl_clear_perm_empty_ctl_header(dir_h); header->parent = NULL; drop_sysctl_table(dir_h); return err; } static int use_table(struct ctl_table_header *p) { lockdep_assert_held(&sysctl_lock); if (unlikely(p->unregistering)) return 0; p->used++; return 1; } static void unuse_table(struct ctl_table_header *p) { lockdep_assert_held(&sysctl_lock); if (!--p->used) if (unlikely(p->unregistering)) complete(p->unregistering); } static void proc_sys_invalidate_dcache(struct ctl_table_header *head) { proc_invalidate_siblings_dcache(&head->inodes, &sysctl_lock); } static void start_unregistering(struct ctl_table_header *p) { /* will reacquire if has to wait */ lockdep_assert_held(&sysctl_lock); /* * if p->used is 0, nobody will ever touch that entry again; * we'll eliminate all paths to it before dropping sysctl_lock */ if (unlikely(p->used)) { struct completion wait; init_completion(&wait); p->unregistering = &wait; spin_unlock(&sysctl_lock); wait_for_completion(&wait); } else { /* anything non-NULL; we'll never dereference it */ p->unregistering = ERR_PTR(-EINVAL); spin_unlock(&sysctl_lock); } /* * Invalidate dentries for unregistered sysctls: namespaced sysctls * can have duplicate names and contaminate dcache very badly. */ proc_sys_invalidate_dcache(p); /* * do not remove from the list until nobody holds it; walking the * list in do_sysctl() relies on that. */ spin_lock(&sysctl_lock); erase_header(p); } static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) { BUG_ON(!head); spin_lock(&sysctl_lock); if (!use_table(head)) head = ERR_PTR(-ENOENT); spin_unlock(&sysctl_lock); return head; } static void sysctl_head_finish(struct ctl_table_header *head) { if (!head) return; spin_lock(&sysctl_lock); unuse_table(head); spin_unlock(&sysctl_lock); } static struct ctl_table_set * lookup_header_set(struct ctl_table_root *root) { struct ctl_table_set *set = &root->default_set; if (root->lookup) set = root->lookup(root); return set; } static const struct ctl_table *lookup_entry(struct ctl_table_header **phead, struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; const struct ctl_table *entry; spin_lock(&sysctl_lock); entry = find_entry(&head, dir, name, namelen); if (entry && use_table(head)) *phead = head; else entry = NULL; spin_unlock(&sysctl_lock); return entry; } static struct ctl_node *first_usable_entry(struct rb_node *node) { struct ctl_node *ctl_node; for (;node; node = rb_next(node)) { ctl_node = rb_entry(node, struct ctl_node, node); if (use_table(ctl_node->header)) return ctl_node; } return NULL; } static void first_entry(struct ctl_dir *dir, struct ctl_table_header **phead, const struct ctl_table **pentry) { struct ctl_table_header *head = NULL; const struct ctl_table *entry = NULL; struct ctl_node *ctl_node; spin_lock(&sysctl_lock); ctl_node = first_usable_entry(rb_first(&dir->root)); spin_unlock(&sysctl_lock); if (ctl_node) { head = ctl_node->header; entry = &head->ctl_table[ctl_node - head->node]; } *phead = head; *pentry = entry; } static void next_entry(struct ctl_table_header **phead, const struct ctl_table **pentry) { struct ctl_table_header *head = *phead; const struct ctl_table *entry = *pentry; struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; spin_lock(&sysctl_lock); unuse_table(head); ctl_node = first_usable_entry(rb_next(&ctl_node->node)); spin_unlock(&sysctl_lock); head = NULL; if (ctl_node) { head = ctl_node->header; entry = &head->ctl_table[ctl_node - head->node]; } *phead = head; *pentry = entry; } /* * sysctl_perm does NOT grant the superuser all rights automatically, because * some sysctl variables are readonly even to root. */ static int test_perm(int mode, int op) { if (uid_eq(current_euid(), GLOBAL_ROOT_UID)) mode >>= 6; else if (in_egroup_p(GLOBAL_ROOT_GID)) mode >>= 3; if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) return 0; return -EACCES; } static int sysctl_perm(struct ctl_table_header *head, const struct ctl_table *table, int op) { struct ctl_table_root *root = head->root; int mode; if (root->permissions) mode = root->permissions(head, table); else mode = table->mode; return test_perm(mode, op); } static struct inode *proc_sys_make_inode(struct super_block *sb, struct ctl_table_header *head, const struct ctl_table *table) { struct ctl_table_root *root = head->root; struct inode *inode; struct proc_inode *ei; inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); inode->i_ino = get_next_ino(); ei = PROC_I(inode); spin_lock(&sysctl_lock); if (unlikely(head->unregistering)) { spin_unlock(&sysctl_lock); iput(inode); return ERR_PTR(-ENOENT); } ei->sysctl = head; ei->sysctl_entry = table; hlist_add_head_rcu(&ei->sibling_inodes, &head->inodes); head->count++; spin_unlock(&sysctl_lock); simple_inode_init_ts(inode); inode->i_mode = table->mode; if (!S_ISDIR(table->mode)) { inode->i_mode |= S_IFREG; inode->i_op = &proc_sys_inode_operations; inode->i_fop = &proc_sys_file_operations; } else { inode->i_mode |= S_IFDIR; inode->i_op = &proc_sys_dir_operations; inode->i_fop = &proc_sys_dir_file_operations; if (sysctl_is_perm_empty_ctl_header(head)) make_empty_dir_inode(inode); } inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; if (root->set_ownership) root->set_ownership(head, &inode->i_uid, &inode->i_gid); return inode; } void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) { spin_lock(&sysctl_lock); hlist_del_init_rcu(&PROC_I(inode)->sibling_inodes); if (!--head->count) kfree_rcu(head, rcu); spin_unlock(&sysctl_lock); } static struct ctl_table_header *grab_header(struct inode *inode) { struct ctl_table_header *head = PROC_I(inode)->sysctl; if (!head) head = &sysctl_table_root.default_set.dir.header; return sysctl_head_grab(head); } static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct ctl_table_header *head = grab_header(dir); struct ctl_table_header *h = NULL; const struct qstr *name = &dentry->d_name; const struct ctl_table *p; struct inode *inode; struct dentry *err = ERR_PTR(-ENOENT); struct ctl_dir *ctl_dir; int ret; if (IS_ERR(head)) return ERR_CAST(head); ctl_dir = container_of(head, struct ctl_dir, header); p = lookup_entry(&h, ctl_dir, name->name, name->len); if (!p) goto out; if (S_ISLNK(p->mode)) { ret = sysctl_follow_link(&h, &p); err = ERR_PTR(ret); if (ret) goto out; } d_set_d_op(dentry, &proc_sys_dentry_operations); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); err = d_splice_alias(inode, dentry); out: if (h) sysctl_head_finish(h); sysctl_head_finish(head); return err; } static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter, int write) { struct inode *inode = file_inode(iocb->ki_filp); struct ctl_table_header *head = grab_header(inode); const struct ctl_table *table = PROC_I(inode)->sysctl_entry; size_t count = iov_iter_count(iter); char *kbuf; ssize_t error; if (IS_ERR(head)) return PTR_ERR(head); /* * At this point we know that the sysctl was not unregistered * and won't be until we finish. */ error = -EPERM; if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ)) goto out; /* if that can happen at all, it should be -EINVAL, not -EISDIR */ error = -EINVAL; if (!table->proc_handler) goto out; /* don't even try if the size is too large */ error = -ENOMEM; if (count >= KMALLOC_MAX_SIZE) goto out; kbuf = kvzalloc(count + 1, GFP_KERNEL); if (!kbuf) goto out; if (write) { error = -EFAULT; if (!copy_from_iter_full(kbuf, count, iter)) goto out_free_buf; kbuf[count] = '\0'; } error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count, &iocb->ki_pos); if (error) goto out_free_buf; /* careful: calling conventions are nasty here */ error = table->proc_handler(table, write, kbuf, &count, &iocb->ki_pos); if (error) goto out_free_buf; if (!write) { error = -EFAULT; if (copy_to_iter(kbuf, count, iter) < count) goto out_free_buf; } error = count; out_free_buf: kvfree(kbuf); out: sysctl_head_finish(head); return error; } static ssize_t proc_sys_read(struct kiocb *iocb, struct iov_iter *iter) { return proc_sys_call_handler(iocb, iter, 0); } static ssize_t proc_sys_write(struct kiocb *iocb, struct iov_iter *iter) { return proc_sys_call_handler(iocb, iter, 1); } static int proc_sys_open(struct inode *inode, struct file *filp) { struct ctl_table_header *head = grab_header(inode); const struct ctl_table *table = PROC_I(inode)->sysctl_entry; /* sysctl was unregistered */ if (IS_ERR(head)) return PTR_ERR(head); if (table->poll) filp->private_data = proc_sys_poll_event(table->poll); sysctl_head_finish(head); return 0; } static __poll_t proc_sys_poll(struct file *filp, poll_table *wait) { struct inode *inode = file_inode(filp); struct ctl_table_header *head = grab_header(inode); const struct ctl_table *table = PROC_I(inode)->sysctl_entry; __poll_t ret = DEFAULT_POLLMASK; unsigned long event; /* sysctl was unregistered */ if (IS_ERR(head)) return EPOLLERR | EPOLLHUP; if (!table->proc_handler) goto out; if (!table->poll) goto out; event = (unsigned long)filp->private_data; poll_wait(filp, &table->poll->wait, wait); if (event != atomic_read(&table->poll->event)) { filp->private_data = proc_sys_poll_event(table->poll); ret = EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI; } out: sysctl_head_finish(head); return ret; } static bool proc_sys_fill_cache(struct file *file, struct dir_context *ctx, struct ctl_table_header *head, const struct ctl_table *table) { struct dentry *child, *dir = file->f_path.dentry; struct inode *inode; struct qstr qname; ino_t ino = 0; unsigned type = DT_UNKNOWN; qname.name = table->procname; qname.len = strlen(table->procname); qname.hash = full_name_hash(dir, qname.name, qname.len); child = d_lookup(dir, &qname); if (!child) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); child = d_alloc_parallel(dir, &qname, &wq); if (IS_ERR(child)) return false; if (d_in_lookup(child)) { struct dentry *res; d_set_d_op(child, &proc_sys_dentry_operations); inode = proc_sys_make_inode(dir->d_sb, head, table); res = d_splice_alias(inode, child); d_lookup_done(child); if (unlikely(res)) { dput(child); if (IS_ERR(res)) return false; child = res; } } } inode = d_inode(child); ino = inode->i_ino; type = inode->i_mode >> 12; dput(child); return dir_emit(ctx, qname.name, qname.len, ino, type); } static bool proc_sys_link_fill_cache(struct file *file, struct dir_context *ctx, struct ctl_table_header *head, const struct ctl_table *table) { bool ret = true; head = sysctl_head_grab(head); if (IS_ERR(head)) return false; /* It is not an error if we can not follow the link ignore it */ if (sysctl_follow_link(&head, &table)) goto out; ret = proc_sys_fill_cache(file, ctx, head, table); out: sysctl_head_finish(head); return ret; } static int scan(struct ctl_table_header *head, const struct ctl_table *table, unsigned long *pos, struct file *file, struct dir_context *ctx) { bool res; if ((*pos)++ < ctx->pos) return true; if (unlikely(S_ISLNK(table->mode))) res = proc_sys_link_fill_cache(file, ctx, head, table); else res = proc_sys_fill_cache(file, ctx, head, table); if (res) ctx->pos = *pos; return res; } static int proc_sys_readdir(struct file *file, struct dir_context *ctx) { struct ctl_table_header *head = grab_header(file_inode(file)); struct ctl_table_header *h = NULL; const struct ctl_table *entry; struct ctl_dir *ctl_dir; unsigned long pos; if (IS_ERR(head)) return PTR_ERR(head); ctl_dir = container_of(head, struct ctl_dir, header); if (!dir_emit_dots(file, ctx)) goto out; pos = 2; for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { if (!scan(h, entry, &pos, file, ctx)) { sysctl_head_finish(h); break; } } out: sysctl_head_finish(head); return 0; } static int proc_sys_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { /* * sysctl entries that are not writeable, * are _NOT_ writeable, capabilities or not. */ struct ctl_table_header *head; const struct ctl_table *table; int error; /* Executable files are not allowed under /proc/sys/ */ if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) return -EACCES; head = grab_header(inode); if (IS_ERR(head)) return PTR_ERR(head); table = PROC_I(inode)->sysctl_entry; if (!table) /* global root - r-xr-xr-x */ error = mask & MAY_WRITE ? -EACCES : 0; else /* Use the permissions on the sysctl table entry */ error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK); sysctl_head_finish(head); return error; } static int proc_sys_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int error; if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) return -EPERM; error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; setattr_copy(&nop_mnt_idmap, inode, attr); return 0; } static int proc_sys_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct ctl_table_header *head = grab_header(inode); const struct ctl_table *table = PROC_I(inode)->sysctl_entry; if (IS_ERR(head)) return PTR_ERR(head); generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (table) stat->mode = (stat->mode & S_IFMT) | table->mode; sysctl_head_finish(head); return 0; } static const struct file_operations proc_sys_file_operations = { .open = proc_sys_open, .poll = proc_sys_poll, .read_iter = proc_sys_read, .write_iter = proc_sys_write, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, .llseek = default_llseek, }; static const struct file_operations proc_sys_dir_file_operations = { .read = generic_read_dir, .iterate_shared = proc_sys_readdir, .llseek = generic_file_llseek, }; static const struct inode_operations proc_sys_inode_operations = { .permission = proc_sys_permission, .setattr = proc_sys_setattr, .getattr = proc_sys_getattr, }; static const struct inode_operations proc_sys_dir_operations = { .lookup = proc_sys_lookup, .permission = proc_sys_permission, .setattr = proc_sys_setattr, .getattr = proc_sys_getattr, }; static int proc_sys_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) return -ECHILD; return !PROC_I(d_inode(dentry))->sysctl->unregistering; } static int proc_sys_delete(const struct dentry *dentry) { return !!PROC_I(d_inode(dentry))->sysctl->unregistering; } static int sysctl_is_seen(struct ctl_table_header *p) { struct ctl_table_set *set = p->set; int res; spin_lock(&sysctl_lock); if (p->unregistering) res = 0; else if (!set->is_seen) res = 1; else res = set->is_seen(set); spin_unlock(&sysctl_lock); return res; } static int proc_sys_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct ctl_table_header *head; struct inode *inode; /* Although proc doesn't have negative dentries, rcu-walk means * that inode here can be NULL */ /* AV: can it, indeed? */ inode = d_inode_rcu(dentry); if (!inode) return 1; if (name->len != len) return 1; if (memcmp(name->name, str, len)) return 1; head = rcu_dereference(PROC_I(inode)->sysctl); return !head || !sysctl_is_seen(head); } static const struct dentry_operations proc_sys_dentry_operations = { .d_revalidate = proc_sys_revalidate, .d_delete = proc_sys_delete, .d_compare = proc_sys_compare, }; static struct ctl_dir *find_subdir(struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; const struct ctl_table *entry; entry = find_entry(&head, dir, name, namelen); if (!entry) return ERR_PTR(-ENOENT); if (!S_ISDIR(entry->mode)) return ERR_PTR(-ENOTDIR); return container_of(head, struct ctl_dir, header); } static struct ctl_dir *new_dir(struct ctl_table_set *set, const char *name, int namelen) { struct ctl_table *table; struct ctl_dir *new; struct ctl_node *node; char *new_name; new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) + sizeof(struct ctl_table) + namelen + 1, GFP_KERNEL); if (!new) return NULL; node = (struct ctl_node *)(new + 1); table = (struct ctl_table *)(node + 1); new_name = (char *)(table + 1); memcpy(new_name, name, namelen); table[0].procname = new_name; table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; init_header(&new->header, set->dir.header.root, set, node, table, 1); return new; } /** * get_subdir - find or create a subdir with the specified name. * @dir: Directory to create the subdirectory in * @name: The name of the subdirectory to find or create * @namelen: The length of name * * Takes a directory with an elevated reference count so we know that * if we drop the lock the directory will not go away. Upon success * the reference is moved from @dir to the returned subdirectory. * Upon error an error code is returned and the reference on @dir is * simply dropped. */ static struct ctl_dir *get_subdir(struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_set *set = dir->header.set; struct ctl_dir *subdir, *new = NULL; int err; spin_lock(&sysctl_lock); subdir = find_subdir(dir, name, namelen); if (!IS_ERR(subdir)) goto found; if (PTR_ERR(subdir) != -ENOENT) goto failed; spin_unlock(&sysctl_lock); new = new_dir(set, name, namelen); spin_lock(&sysctl_lock); subdir = ERR_PTR(-ENOMEM); if (!new) goto failed; /* Was the subdir added while we dropped the lock? */ subdir = find_subdir(dir, name, namelen); if (!IS_ERR(subdir)) goto found; if (PTR_ERR(subdir) != -ENOENT) goto failed; /* Nope. Use the our freshly made directory entry. */ err = insert_header(dir, &new->header); subdir = ERR_PTR(err); if (err) goto failed; subdir = new; found: subdir->header.nreg++; failed: if (IS_ERR(subdir)) { pr_err("sysctl could not get directory: "); sysctl_print_dir(dir); pr_cont("%*.*s %ld\n", namelen, namelen, name, PTR_ERR(subdir)); } drop_sysctl_table(&dir->header); if (new) drop_sysctl_table(&new->header); spin_unlock(&sysctl_lock); return subdir; } static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) { struct ctl_dir *parent; const char *procname; if (!dir->header.parent) return &set->dir; parent = xlate_dir(set, dir->header.parent); if (IS_ERR(parent)) return parent; procname = dir->header.ctl_table[0].procname; return find_subdir(parent, procname, strlen(procname)); } static int sysctl_follow_link(struct ctl_table_header **phead, const struct ctl_table **pentry) { struct ctl_table_header *head; const struct ctl_table *entry; struct ctl_table_root *root; struct ctl_table_set *set; struct ctl_dir *dir; int ret; spin_lock(&sysctl_lock); root = (*pentry)->data; set = lookup_header_set(root); dir = xlate_dir(set, (*phead)->parent); if (IS_ERR(dir)) ret = PTR_ERR(dir); else { const char *procname = (*pentry)->procname; head = NULL; entry = find_entry(&head, dir, procname, strlen(procname)); ret = -ENOENT; if (entry && use_table(head)) { unuse_table(*phead); *phead = head; *pentry = entry; ret = 0; } } spin_unlock(&sysctl_lock); return ret; } static int sysctl_err(const char *path, const struct ctl_table *table, char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; pr_err("sysctl table check failed: %s/%s %pV\n", path, table->procname, &vaf); va_end(args); return -EINVAL; } static int sysctl_check_table_array(const char *path, const struct ctl_table *table) { unsigned int extra; int err = 0; if ((table->proc_handler == proc_douintvec) || (table->proc_handler == proc_douintvec_minmax)) { if (table->maxlen != sizeof(unsigned int)) err |= sysctl_err(path, table, "array not allowed"); } if (table->proc_handler == proc_dou8vec_minmax) { if (table->maxlen != sizeof(u8)) err |= sysctl_err(path, table, "array not allowed"); if (table->extra1) { extra = *(unsigned int *) table->extra1; if (extra > 255U) err |= sysctl_err(path, table, "range value too large for proc_dou8vec_minmax"); } if (table->extra2) { extra = *(unsigned int *) table->extra2; if (extra > 255U) err |= sysctl_err(path, table, "range value too large for proc_dou8vec_minmax"); } } if (table->proc_handler == proc_dobool) { if (table->maxlen != sizeof(bool)) err |= sysctl_err(path, table, "array not allowed"); } return err; } static int sysctl_check_table(const char *path, struct ctl_table_header *header) { const struct ctl_table *entry; int err = 0; list_for_each_table_entry(entry, header) { if (!entry->procname) err |= sysctl_err(path, entry, "procname is null"); if ((entry->proc_handler == proc_dostring) || (entry->proc_handler == proc_dobool) || (entry->proc_handler == proc_dointvec) || (entry->proc_handler == proc_douintvec) || (entry->proc_handler == proc_douintvec_minmax) || (entry->proc_handler == proc_dointvec_minmax) || (entry->proc_handler == proc_dou8vec_minmax) || (entry->proc_handler == proc_dointvec_jiffies) || (entry->proc_handler == proc_dointvec_userhz_jiffies) || (entry->proc_handler == proc_dointvec_ms_jiffies) || (entry->proc_handler == proc_doulongvec_minmax) || (entry->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { if (!entry->data) err |= sysctl_err(path, entry, "No data"); if (!entry->maxlen) err |= sysctl_err(path, entry, "No maxlen"); else err |= sysctl_check_table_array(path, entry); } if (!entry->proc_handler) err |= sysctl_err(path, entry, "No proc_handler"); if ((entry->mode & (S_IRUGO|S_IWUGO)) != entry->mode) err |= sysctl_err(path, entry, "bogus .mode 0%o", entry->mode); } return err; } static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table_header *head) { struct ctl_table *link_table, *link; struct ctl_table_header *links; const struct ctl_table *entry; struct ctl_node *node; char *link_name; int name_bytes; name_bytes = 0; list_for_each_table_entry(entry, head) { name_bytes += strlen(entry->procname) + 1; } links = kzalloc(sizeof(struct ctl_table_header) + sizeof(struct ctl_node)*head->ctl_table_size + sizeof(struct ctl_table)*head->ctl_table_size + name_bytes, GFP_KERNEL); if (!links) return NULL; node = (struct ctl_node *)(links + 1); link_table = (struct ctl_table *)(node + head->ctl_table_size); link_name = (char *)(link_table + head->ctl_table_size); link = link_table; list_for_each_table_entry(entry, head) { int len = strlen(entry->procname) + 1; memcpy(link_name, entry->procname, len); link->procname = link_name; link->mode = S_IFLNK|S_IRWXUGO; link->data = head->root; link_name += len; link++; } init_header(links, dir->header.root, dir->header.set, node, link_table, head->ctl_table_size); links->nreg = head->ctl_table_size; return links; } static bool get_links(struct ctl_dir *dir, struct ctl_table_header *header, struct ctl_table_root *link_root) { struct ctl_table_header *tmp_head; const struct ctl_table *entry, *link; if (header->ctl_table_size == 0 || sysctl_is_perm_empty_ctl_header(header)) return true; /* Are there links available for every entry in table? */ list_for_each_table_entry(entry, header) { const char *procname = entry->procname; link = find_entry(&tmp_head, dir, procname, strlen(procname)); if (!link) return false; if (S_ISDIR(link->mode) && S_ISDIR(entry->mode)) continue; if (S_ISLNK(link->mode) && (link->data == link_root)) continue; return false; } /* The checks passed. Increase the registration count on the links */ list_for_each_table_entry(entry, header) { const char *procname = entry->procname; link = find_entry(&tmp_head, dir, procname, strlen(procname)); tmp_head->nreg++; } return true; } static int insert_links(struct ctl_table_header *head) { struct ctl_table_set *root_set = &sysctl_table_root.default_set; struct ctl_dir *core_parent; struct ctl_table_header *links; int err; if (head->set == root_set) return 0; core_parent = xlate_dir(root_set, head->parent); if (IS_ERR(core_parent)) return 0; if (get_links(core_parent, head, head->root)) return 0; core_parent->header.nreg++; spin_unlock(&sysctl_lock); links = new_links(core_parent, head); spin_lock(&sysctl_lock); err = -ENOMEM; if (!links) goto out; err = 0; if (get_links(core_parent, head, head->root)) { kfree(links); goto out; } err = insert_header(core_parent, links); if (err) kfree(links); out: drop_sysctl_table(&core_parent->header); return err; } /* Find the directory for the ctl_table. If one is not found create it. */ static struct ctl_dir *sysctl_mkdir_p(struct ctl_dir *dir, const char *path) { const char *name, *nextname; for (name = path; name; name = nextname) { int namelen; nextname = strchr(name, '/'); if (nextname) { namelen = nextname - name; nextname++; } else { namelen = strlen(name); } if (namelen == 0) continue; /* * namelen ensures if name is "foo/bar/yay" only foo is * registered first. We traverse as if using mkdir -p and * return a ctl_dir for the last directory entry. */ dir = get_subdir(dir, name, namelen); if (IS_ERR(dir)) break; } return dir; } /** * __register_sysctl_table - register a leaf sysctl table * @set: Sysctl tree to register on * @path: The path to the directory the sysctl table is in. * * @table: the top-level table structure. This table should not be free'd * after registration. So it should not be used on stack. It can either * be a global or dynamically allocated by the caller and free'd later * after sysctl unregistration. * @table_size : The number of elements in table * * Register a sysctl table hierarchy. @table should be a filled in ctl_table * array. * * The members of the &struct ctl_table structure are used as follows: * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not * enter a sysctl file * data - a pointer to data for use by proc_handler * maxlen - the maximum size in bytes of the data * mode - the file permissions for the /proc/sys file * type - Defines the target type (described in struct definition) * proc_handler - the text handler routine (described below) * * extra1, extra2 - extra pointers usable by the proc handler routines * XXX: we should eventually modify these to use long min / max [0] * [0] https://lkml.kernel.org/87zgpte9o4.fsf@email.froward.int.ebiederm.org * * Leaf nodes in the sysctl tree will be represented by a single file * under /proc; non-leaf nodes are not allowed. * * There must be a proc_handler routine for any terminal nodes. * Several default handlers are available to cover common cases - * * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() * * It is the handler's job to read the input buffer from user memory * and process it. The handler should return 0 on success. * * This routine returns %NULL on a failure to register, and a pointer * to the table header on success. */ struct ctl_table_header *__register_sysctl_table( struct ctl_table_set *set, const char *path, const struct ctl_table *table, size_t table_size) { struct ctl_table_root *root = set->dir.header.root; struct ctl_table_header *header; struct ctl_dir *dir; struct ctl_node *node; header = kzalloc(sizeof(struct ctl_table_header) + sizeof(struct ctl_node)*table_size, GFP_KERNEL_ACCOUNT); if (!header) return NULL; node = (struct ctl_node *)(header + 1); init_header(header, root, set, node, table, table_size); if (sysctl_check_table(path, header)) goto fail; spin_lock(&sysctl_lock); dir = &set->dir; /* Reference moved down the directory tree get_subdir */ dir->header.nreg++; spin_unlock(&sysctl_lock); dir = sysctl_mkdir_p(dir, path); if (IS_ERR(dir)) goto fail; spin_lock(&sysctl_lock); if (insert_header(dir, header)) goto fail_put_dir_locked; drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); return header; fail_put_dir_locked: drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); fail: kfree(header); return NULL; } /** * register_sysctl_sz - register a sysctl table * @path: The path to the directory the sysctl table is in. If the path * doesn't exist we will create it for you. * @table: the table structure. The calller must ensure the life of the @table * will be kept during the lifetime use of the syctl. It must not be freed * until unregister_sysctl_table() is called with the given returned table * with this registration. If your code is non modular then you don't need * to call unregister_sysctl_table() and can instead use something like * register_sysctl_init() which does not care for the result of the syctl * registration. * @table_size: The number of elements in table. * * Register a sysctl table. @table should be a filled in ctl_table * array. A completely 0 filled entry terminates the table. * * See __register_sysctl_table for more details. */ struct ctl_table_header *register_sysctl_sz(const char *path, const struct ctl_table *table, size_t table_size) { return __register_sysctl_table(&sysctl_table_root.default_set, path, table, table_size); } EXPORT_SYMBOL(register_sysctl_sz); /** * __register_sysctl_init() - register sysctl table to path * @path: path name for sysctl base. If that path doesn't exist we will create * it for you. * @table: This is the sysctl table that needs to be registered to the path. * The caller must ensure the life of the @table will be kept during the * lifetime use of the sysctl. * @table_name: The name of sysctl table, only used for log printing when * registration fails * @table_size: The number of elements in table * * The sysctl interface is used by userspace to query or modify at runtime * a predefined value set on a variable. These variables however have default * values pre-set. Code which depends on these variables will always work even * if register_sysctl() fails. If register_sysctl() fails you'd just loose the * ability to query or modify the sysctls dynamically at run time. Chances of * register_sysctl() failing on init are extremely low, and so for both reasons * this function does not return any error as it is used by initialization code. * * Context: if your base directory does not exist it will be created for you. */ void __init __register_sysctl_init(const char *path, const struct ctl_table *table, const char *table_name, size_t table_size) { struct ctl_table_header *hdr = register_sysctl_sz(path, table, table_size); if (unlikely(!hdr)) { pr_err("failed when register_sysctl_sz %s to %s\n", table_name, path); return; } kmemleak_not_leak(hdr); } static void put_links(struct ctl_table_header *header) { struct ctl_table_set *root_set = &sysctl_table_root.default_set; struct ctl_table_root *root = header->root; struct ctl_dir *parent = header->parent; struct ctl_dir *core_parent; const struct ctl_table *entry; if (header->set == root_set) return; core_parent = xlate_dir(root_set, parent); if (IS_ERR(core_parent)) return; list_for_each_table_entry(entry, header) { struct ctl_table_header *link_head; const struct ctl_table *link; const char *name = entry->procname; link = find_entry(&link_head, core_parent, name, strlen(name)); if (link && ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) || (S_ISLNK(link->mode) && (link->data == root)))) { drop_sysctl_table(link_head); } else { pr_err("sysctl link missing during unregister: "); sysctl_print_dir(parent); pr_cont("%s\n", name); } } } static void drop_sysctl_table(struct ctl_table_header *header) { struct ctl_dir *parent = header->parent; if (--header->nreg) return; if (parent) { put_links(header); start_unregistering(header); } if (!--header->count) kfree_rcu(header, rcu); if (parent) drop_sysctl_table(&parent->header); } /** * unregister_sysctl_table - unregister a sysctl table hierarchy * @header: the header returned from register_sysctl or __register_sysctl_table * * Unregisters the sysctl table and all children. proc entries may not * actually be removed until they are no longer used by anyone. */ void unregister_sysctl_table(struct ctl_table_header * header) { might_sleep(); if (header == NULL) return; spin_lock(&sysctl_lock); drop_sysctl_table(header); spin_unlock(&sysctl_lock); } EXPORT_SYMBOL(unregister_sysctl_table); void setup_sysctl_set(struct ctl_table_set *set, struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { memset(set, 0, sizeof(*set)); set->is_seen = is_seen; init_header(&set->dir.header, root, set, NULL, root_table, 1); } void retire_sysctl_set(struct ctl_table_set *set) { WARN_ON(!RB_EMPTY_ROOT(&set->dir.root)); } int __init proc_sys_init(void) { struct proc_dir_entry *proc_sys_root; proc_sys_root = proc_mkdir("sys", NULL); proc_sys_root->proc_iops = &proc_sys_dir_operations; proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations; proc_sys_root->nlink = 0; return sysctl_init_bases(); } struct sysctl_alias { const char *kernel_param; const char *sysctl_param; }; /* * Historically some settings had both sysctl and a command line parameter. * With the generic sysctl. parameter support, we can handle them at a single * place and only keep the historical name for compatibility. This is not meant * to add brand new aliases. When adding existing aliases, consider whether * the possibly different moment of changing the value (e.g. from early_param * to the moment do_sysctl_args() is called) is an issue for the specific * parameter. */ static const struct sysctl_alias sysctl_aliases[] = { {"hardlockup_all_cpu_backtrace", "kernel.hardlockup_all_cpu_backtrace" }, {"hung_task_panic", "kernel.hung_task_panic" }, {"numa_zonelist_order", "vm.numa_zonelist_order" }, {"softlockup_all_cpu_backtrace", "kernel.softlockup_all_cpu_backtrace" }, { } }; static const char *sysctl_find_alias(char *param) { const struct sysctl_alias *alias; for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) { if (strcmp(alias->kernel_param, param) == 0) return alias->sysctl_param; } return NULL; } bool sysctl_is_alias(char *param) { const char *alias = sysctl_find_alias(param); return alias != NULL; } /* Set sysctl value passed on kernel command line. */ static int process_sysctl_arg(char *param, char *val, const char *unused, void *arg) { char *path; struct vfsmount **proc_mnt = arg; struct file_system_type *proc_fs_type; struct file *file; int len; int err; loff_t pos = 0; ssize_t wret; if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) { param += sizeof("sysctl") - 1; if (param[0] != '/' && param[0] != '.') return 0; param++; } else { param = (char *) sysctl_find_alias(param); if (!param) return 0; } if (!val) return -EINVAL; len = strlen(val); if (len == 0) return -EINVAL; /* * To set sysctl options, we use a temporary mount of proc, look up the * respective sys/ file and write to it. To avoid mounting it when no * options were given, we mount it only when the first sysctl option is * found. Why not a persistent mount? There are problems with a * persistent mount of proc in that it forces userspace not to use any * proc mount options. */ if (!*proc_mnt) { proc_fs_type = get_fs_type("proc"); if (!proc_fs_type) { pr_err("Failed to find procfs to set sysctl from command line\n"); return 0; } *proc_mnt = kern_mount(proc_fs_type); put_filesystem(proc_fs_type); if (IS_ERR(*proc_mnt)) { pr_err("Failed to mount procfs to set sysctl from command line\n"); return 0; } } path = kasprintf(GFP_KERNEL, "sys/%s", param); if (!path) panic("%s: Failed to allocate path for %s\n", __func__, param); strreplace(path, '.', '/'); file = file_open_root_mnt(*proc_mnt, path, O_WRONLY, 0); if (IS_ERR(file)) { err = PTR_ERR(file); if (err == -ENOENT) pr_err("Failed to set sysctl parameter '%s=%s': parameter not found\n", param, val); else if (err == -EACCES) pr_err("Failed to set sysctl parameter '%s=%s': permission denied (read-only?)\n", param, val); else pr_err("Error %pe opening proc file to set sysctl parameter '%s=%s'\n", file, param, val); goto out; } wret = kernel_write(file, val, len, &pos); if (wret < 0) { err = wret; if (err == -EINVAL) pr_err("Failed to set sysctl parameter '%s=%s': invalid value\n", param, val); else pr_err("Error %pe writing to proc file to set sysctl parameter '%s=%s'\n", ERR_PTR(err), param, val); } else if (wret != len) { pr_err("Wrote only %zd bytes of %d writing to proc file %s to set sysctl parameter '%s=%s\n", wret, len, path, param, val); } err = filp_close(file, NULL); if (err) pr_err("Error %pe closing proc file to set sysctl parameter '%s=%s\n", ERR_PTR(err), param, val); out: kfree(path); return 0; } void do_sysctl_args(void) { char *command_line; struct vfsmount *proc_mnt = NULL; command_line = kstrdup(saved_command_line, GFP_KERNEL); if (!command_line) panic("%s: Failed to allocate copy of command line\n", __func__); parse_args("Setting sysctl args", command_line, NULL, 0, -1, -1, &proc_mnt, process_sysctl_arg); if (proc_mnt) kern_unmount(proc_mnt); kfree(command_line); }
81 3 62 6 56 86 89 91 2363 81 2064 1878 433 401 7 592 33 275 220 560 1429 1 1 2 346 120 2773 6002 362 373 103 102 268 625 121 5928 128 263 18 631 118 21 434 434 234 435 435 1726 6 99 2075 99 2075 124 6962 6432 1089 2010 5907 6731 8 50 883 1384 5964 6165 992 50 42 1386 124 124 84 85 86 1 5747 14 5742 2972 1068 57 2 29 435 1704 470 19 20 52 13 205 205 44 27 54 5 58 503 117 12 563 222 350 380 2457 342 383 2407 198 203 21 25 5 20 4 60 5 501 2372 1453 26 756 3903 76 1688 1468 4 326 802 3 10 2 370 2 2 151 43 186 2 3 185 2 1 188 1 952 6 294 1706 226 125 556 55 529 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FS_H #define _LINUX_FS_H #include <linux/vfsdebug.h> #include <linux/linkage.h> #include <linux/wait_bit.h> #include <linux/kdev_t.h> #include <linux/dcache.h> #include <linux/path.h> #include <linux/stat.h> #include <linux/cache.h> #include <linux/list.h> #include <linux/list_lru.h> #include <linux/llist.h> #include <linux/radix-tree.h> #include <linux/xarray.h> #include <linux/rbtree.h> #include <linux/init.h> #include <linux/pid.h> #include <linux/bug.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/mm_types.h> #include <linux/capability.h> #include <linux/semaphore.h> #include <linux/fcntl.h> #include <linux/rculist_bl.h> #include <linux/atomic.h> #include <linux/shrinker.h> #include <linux/migrate_mode.h> #include <linux/uidgid.h> #include <linux/lockdep.h> #include <linux/percpu-rwsem.h> #include <linux/workqueue.h> #include <linux/delayed_call.h> #include <linux/uuid.h> #include <linux/errseq.h> #include <linux/ioprio.h> #include <linux/fs_types.h> #include <linux/build_bug.h> #include <linux/stddef.h> #include <linux/mount.h> #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> #include <linux/maple_tree.h> #include <linux/rw_hint.h> #include <linux/file_ref.h> #include <linux/unicode.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> struct backing_dev_info; struct bdi_writeback; struct bio; struct io_comp_batch; struct export_operations; struct fiemap_extent_info; struct hd_geometry; struct iovec; struct kiocb; struct kobject; struct pipe_inode_info; struct poll_table_struct; struct kstatfs; struct vm_area_struct; struct vfsmount; struct cred; struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; struct fsnotify_mark_connector; struct fsnotify_sb_info; struct fs_context; struct fs_parameter_spec; struct fileattr; struct iomap_ops; extern void __init inode_init(void); extern void __init inode_init_early(void); extern void __init files_init(void); extern void __init files_maxfiles_init(void); extern unsigned long get_max_files(void); extern unsigned int sysctl_nr_open; typedef __kernel_rwf_t rwf_t; struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); #define MAY_EXEC 0x00000001 #define MAY_WRITE 0x00000002 #define MAY_READ 0x00000004 #define MAY_APPEND 0x00000008 #define MAY_ACCESS 0x00000010 #define MAY_OPEN 0x00000020 #define MAY_CHDIR 0x00000040 /* called from RCU mode, don't block */ #define MAY_NOT_BLOCK 0x00000080 /* * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond * to O_WRONLY and O_RDWR via the strange trick in do_dentry_open() */ /* file is open for reading */ #define FMODE_READ ((__force fmode_t)(1 << 0)) /* file is open for writing */ #define FMODE_WRITE ((__force fmode_t)(1 << 1)) /* file is seekable */ #define FMODE_LSEEK ((__force fmode_t)(1 << 2)) /* file can be accessed using pread */ #define FMODE_PREAD ((__force fmode_t)(1 << 3)) /* file can be accessed using pwrite */ #define FMODE_PWRITE ((__force fmode_t)(1 << 4)) /* File is opened for execution with sys_execve / sys_uselib */ #define FMODE_EXEC ((__force fmode_t)(1 << 5)) /* File writes are restricted (block device specific) */ #define FMODE_WRITE_RESTRICTED ((__force fmode_t)(1 << 6)) /* File supports atomic writes */ #define FMODE_CAN_ATOMIC_WRITE ((__force fmode_t)(1 << 7)) /* FMODE_* bit 8 */ /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)(1 << 9)) /* 64bit hashes as llseek() offset (for directories) */ #define FMODE_64BITHASH ((__force fmode_t)(1 << 10)) /* * Don't update ctime and mtime. * * Currently a special hack for the XFS open_by_handle ioctl, but we'll * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. */ #define FMODE_NOCMTIME ((__force fmode_t)(1 << 11)) /* Expect random access pattern */ #define FMODE_RANDOM ((__force fmode_t)(1 << 12)) /* FMODE_* bit 13 */ /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)(1 << 14)) /* File needs atomic accesses to f_pos */ #define FMODE_ATOMIC_POS ((__force fmode_t)(1 << 15)) /* Write access to underlying fs */ #define FMODE_WRITER ((__force fmode_t)(1 << 16)) /* Has read method(s) */ #define FMODE_CAN_READ ((__force fmode_t)(1 << 17)) /* Has write method(s) */ #define FMODE_CAN_WRITE ((__force fmode_t)(1 << 18)) #define FMODE_OPENED ((__force fmode_t)(1 << 19)) #define FMODE_CREATED ((__force fmode_t)(1 << 20)) /* File is stream-like */ #define FMODE_STREAM ((__force fmode_t)(1 << 21)) /* File supports DIRECT IO */ #define FMODE_CAN_ODIRECT ((__force fmode_t)(1 << 22)) #define FMODE_NOREUSE ((__force fmode_t)(1 << 23)) /* File is embedded in backing_file object */ #define FMODE_BACKING ((__force fmode_t)(1 << 24)) /* * Together with FMODE_NONOTIFY_PERM defines which fsnotify events shouldn't be * generated (see below) */ #define FMODE_NONOTIFY ((__force fmode_t)(1 << 25)) /* * Together with FMODE_NONOTIFY defines which fsnotify events shouldn't be * generated (see below) */ #define FMODE_NONOTIFY_PERM ((__force fmode_t)(1 << 26)) /* File is capable of returning -EAGAIN if I/O will block */ #define FMODE_NOWAIT ((__force fmode_t)(1 << 27)) /* File represents mount that needs unmounting */ #define FMODE_NEED_UNMOUNT ((__force fmode_t)(1 << 28)) /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29)) /* * The two FMODE_NONOTIFY* define which fsnotify events should not be generated * for a file. These are the possible values of (f->f_mode & * FMODE_FSNOTIFY_MASK) and their meaning: * * FMODE_NONOTIFY - suppress all (incl. non-permission) events. * FMODE_NONOTIFY_PERM - suppress permission (incl. pre-content) events. * FMODE_NONOTIFY | FMODE_NONOTIFY_PERM - suppress only pre-content events. */ #define FMODE_FSNOTIFY_MASK \ (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM) #define FMODE_FSNOTIFY_NONE(mode) \ ((mode & FMODE_FSNOTIFY_MASK) == FMODE_NONOTIFY) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS #define FMODE_FSNOTIFY_PERM(mode) \ ((mode & FMODE_FSNOTIFY_MASK) == 0 || \ (mode & FMODE_FSNOTIFY_MASK) == (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM)) #define FMODE_FSNOTIFY_HSM(mode) \ ((mode & FMODE_FSNOTIFY_MASK) == 0) #else #define FMODE_FSNOTIFY_PERM(mode) 0 #define FMODE_FSNOTIFY_HSM(mode) 0 #endif /* * Attribute flags. These should be or-ed together to figure out what * has been changed! */ #define ATTR_MODE (1 << 0) #define ATTR_UID (1 << 1) #define ATTR_GID (1 << 2) #define ATTR_SIZE (1 << 3) #define ATTR_ATIME (1 << 4) #define ATTR_MTIME (1 << 5) #define ATTR_CTIME (1 << 6) #define ATTR_ATIME_SET (1 << 7) #define ATTR_MTIME_SET (1 << 8) #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ #define ATTR_KILL_SUID (1 << 11) #define ATTR_KILL_SGID (1 << 12) #define ATTR_FILE (1 << 13) #define ATTR_KILL_PRIV (1 << 14) #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ #define ATTR_TIMES_SET (1 << 16) #define ATTR_TOUCH (1 << 17) #define ATTR_DELEG (1 << 18) /* Delegated attrs. Don't break write delegations */ /* * Whiteout is represented by a char device. The following constants define the * mode and device number to use. */ #define WHITEOUT_MODE 0 #define WHITEOUT_DEV 0 /* * This is the Inode Attributes structure, used for notify_change(). It * uses the above definitions as flags, to know which values have changed. * Also, in this manner, a Filesystem can look at only the values it cares * about. Basically, these are the attributes that the VFS layer can * request to change from the FS layer. * * Derek Atkins <warlord@MIT.EDU> 94-10-20 */ struct iattr { unsigned int ia_valid; umode_t ia_mode; /* * The two anonymous unions wrap structures with the same member. * * Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which * are a dedicated type requiring the filesystem to use the dedicated * helpers. Other filesystem can continue to use ia_{g,u}id until they * have been ported. * * They always contain the same value. In other words FS_ALLOW_IDMAP * pass down the same value on idmapped mounts as they would on regular * mounts. */ union { kuid_t ia_uid; vfsuid_t ia_vfsuid; }; union { kgid_t ia_gid; vfsgid_t ia_vfsgid; }; loff_t ia_size; struct timespec64 ia_atime; struct timespec64 ia_mtime; struct timespec64 ia_ctime; /* * Not an attribute, but an auxiliary info for filesystems wanting to * implement an ftruncate() like method. NOTE: filesystem should * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). */ struct file *ia_file; }; /* * Includes for diskquotas. */ #include <linux/quota.h> /* * Maximum number of layers of fs stack. Needs to be limited to * prevent kernel stack overflow */ #define FILESYSTEM_MAX_STACK_DEPTH 2 /** * enum positive_aop_returns - aop return codes with specific semantics * * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has * completed, that the page is still locked, and * should be considered active. The VM uses this hint * to return the page to the active list -- it won't * be a candidate for writeback again in the near * future. Other callers must be careful to unlock * the page if they get this return. Returned by * writepage(); * * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has * unlocked it and the page might have been truncated. * The caller should back up to acquiring a new page and * trying again. The aop will be taking reasonable * precautions not to livelock. If the caller held a page * reference, it should drop it before retrying. Returned * by read_folio(). * * address_space_operation functions return these large constants to indicate * special semantics to the caller. These are much larger than the bytes in a * page to allow for functions that return the number of bytes operated on in a * given page. */ enum positive_aop_returns { AOP_WRITEPAGE_ACTIVATE = 0x80000, AOP_TRUNCATED_PAGE = 0x80001, }; /* * oh the beauties of C type declarations. */ struct page; struct address_space; struct writeback_control; struct readahead_control; /* Match RWF_* bits to IOCB bits */ #define IOCB_HIPRI (__force int) RWF_HIPRI #define IOCB_DSYNC (__force int) RWF_DSYNC #define IOCB_SYNC (__force int) RWF_SYNC #define IOCB_NOWAIT (__force int) RWF_NOWAIT #define IOCB_APPEND (__force int) RWF_APPEND #define IOCB_ATOMIC (__force int) RWF_ATOMIC #define IOCB_DONTCACHE (__force int) RWF_DONTCACHE /* non-RWF related bits - start at 16 */ #define IOCB_EVENTFD (1 << 16) #define IOCB_DIRECT (1 << 17) #define IOCB_WRITE (1 << 18) /* iocb->ki_waitq is valid */ #define IOCB_WAITQ (1 << 19) #define IOCB_NOIO (1 << 20) /* can use bio alloc cache */ #define IOCB_ALLOC_CACHE (1 << 21) /* * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the * iocb completion can be passed back to the owner for execution from a safe * context rather than needing to be punted through a workqueue. If this * flag is set, the bio completion handling may set iocb->dio_complete to a * handler function and iocb->private to context information for that handler. * The issuer should call the handler with that context information from task * context to complete the processing of the iocb. Note that while this * provides a task context for the dio_complete() callback, it should only be * used on the completion side for non-IO generating completions. It's fine to * call blocking functions from this callback, but they should not wait for * unrelated IO (like cache flushing, new IO generation, etc). */ #define IOCB_DIO_CALLER_COMP (1 << 22) /* kiocb is a read or write operation submitted by fs/aio.c. */ #define IOCB_AIO_RW (1 << 23) #define IOCB_HAS_METADATA (1 << 24) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ { IOCB_HIPRI, "HIPRI" }, \ { IOCB_DSYNC, "DSYNC" }, \ { IOCB_SYNC, "SYNC" }, \ { IOCB_NOWAIT, "NOWAIT" }, \ { IOCB_APPEND, "APPEND" }, \ { IOCB_ATOMIC, "ATOMIC" }, \ { IOCB_DONTCACHE, "DONTCACHE" }, \ { IOCB_EVENTFD, "EVENTFD"}, \ { IOCB_DIRECT, "DIRECT" }, \ { IOCB_WRITE, "WRITE" }, \ { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \ { IOCB_AIO_RW, "AIO_RW" }, \ { IOCB_HAS_METADATA, "AIO_HAS_METADATA" } struct kiocb { struct file *ki_filp; loff_t ki_pos; void (*ki_complete)(struct kiocb *iocb, long ret); void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ u8 ki_write_stream; union { /* * Only used for async buffered reads, where it denotes the * page waitqueue associated with completing the read. Valid * IFF IOCB_WAITQ is set. */ struct wait_page_queue *ki_waitq; /* * Can be used for O_DIRECT IO, where the completion handling * is punted back to the issuer of the IO. May only be set * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer * must then check for presence of this handler when ki_complete * is invoked. The data passed in to this handler must be * assigned to ->private when dio_complete is assigned. */ ssize_t (*dio_complete)(void *data); }; }; static inline bool is_sync_kiocb(struct kiocb *kiocb) { return kiocb->ki_complete == NULL; } struct address_space_operations { int (*read_folio)(struct file *, struct folio *); /* Write back some dirty pages from this mapping. */ int (*writepages)(struct address_space *, struct writeback_control *); /* Mark a folio dirty. Return true if this dirtied it */ bool (*dirty_folio)(struct address_space *, struct folio *); void (*readahead)(struct readahead_control *); int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, void **fsdata); int (*write_end)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct folio *folio, void *fsdata); /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); void (*invalidate_folio) (struct folio *, size_t offset, size_t len); bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *folio); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* * migrate the contents of a folio to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. */ int (*migrate_folio)(struct address_space *, struct folio *dst, struct folio *src, enum migrate_mode); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); void (*is_dirty_writeback) (struct folio *, bool *dirty, bool *wb); int (*error_remove_folio)(struct address_space *, struct folio *); /* swapfile support */ int (*swap_activate)(struct swap_info_struct *sis, struct file *file, sector_t *span); void (*swap_deactivate)(struct file *file); int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter); }; extern const struct address_space_operations empty_aops; /** * struct address_space - Contents of a cacheable, mappable object. * @host: Owner, either the inode or the block_device. * @i_pages: Cached pages. * @invalidate_lock: Guards coherency between page cache contents and * file offset->disk block mappings in the filesystem during invalidates. * It is also used to block modification of page cache contents through * memory mappings. * @gfp_mask: Memory allocation flags to use for allocating pages. * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings. * @nr_thps: Number of THPs in the pagecache (non-shmem only). * @i_mmap: Tree of private and shared mappings. * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. * @nrpages: Number of page entries, protected by the i_pages lock. * @writeback_index: Writeback starts here. * @a_ops: Methods. * @flags: Error bits and flags (AS_*). * @wb_err: The most recent error which has occurred. * @i_private_lock: For use by the owner of the address_space. * @i_private_list: For use by the owner of the address_space. * @i_private_data: For use by the owner of the address_space. */ struct address_space { struct inode *host; struct xarray i_pages; struct rw_semaphore invalidate_lock; gfp_t gfp_mask; atomic_t i_mmap_writable; #ifdef CONFIG_READ_ONLY_THP_FOR_FS /* number of thp, only for non-shmem files */ atomic_t nr_thps; #endif struct rb_root_cached i_mmap; unsigned long nrpages; pgoff_t writeback_index; const struct address_space_operations *a_ops; unsigned long flags; errseq_t wb_err; spinlock_t i_private_lock; struct list_head i_private_list; struct rw_semaphore i_mmap_rwsem; void * i_private_data; } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but * must be enforced here for CRIS, to let the least significant bit * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. */ /* XArray tags, for tagging dirty and writeback pages in the pagecache. */ #define PAGECACHE_TAG_DIRTY XA_MARK_0 #define PAGECACHE_TAG_WRITEBACK XA_MARK_1 #define PAGECACHE_TAG_TOWRITE XA_MARK_2 /* * Returns true if any of the pages in the mapping are marked with the tag. */ static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag) { return xa_marked(&mapping->i_pages, tag); } static inline void i_mmap_lock_write(struct address_space *mapping) { down_write(&mapping->i_mmap_rwsem); } static inline int i_mmap_trylock_write(struct address_space *mapping) { return down_write_trylock(&mapping->i_mmap_rwsem); } static inline void i_mmap_unlock_write(struct address_space *mapping) { up_write(&mapping->i_mmap_rwsem); } static inline int i_mmap_trylock_read(struct address_space *mapping) { return down_read_trylock(&mapping->i_mmap_rwsem); } static inline void i_mmap_lock_read(struct address_space *mapping) { down_read(&mapping->i_mmap_rwsem); } static inline void i_mmap_unlock_read(struct address_space *mapping) { up_read(&mapping->i_mmap_rwsem); } static inline void i_mmap_assert_locked(struct address_space *mapping) { lockdep_assert_held(&mapping->i_mmap_rwsem); } static inline void i_mmap_assert_write_locked(struct address_space *mapping) { lockdep_assert_held_write(&mapping->i_mmap_rwsem); } /* * Might pages of this file be mapped into userspace? */ static inline int mapping_mapped(struct address_space *mapping) { return !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root); } /* * Might pages of this file have been modified in userspace? * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. * * If i_mmap_writable is negative, no new writable mappings are allowed. You * can only deny writable mappings, if none exists right now. */ static inline int mapping_writably_mapped(struct address_space *mapping) { return atomic_read(&mapping->i_mmap_writable) > 0; } static inline int mapping_map_writable(struct address_space *mapping) { return atomic_inc_unless_negative(&mapping->i_mmap_writable) ? 0 : -EPERM; } static inline void mapping_unmap_writable(struct address_space *mapping) { atomic_dec(&mapping->i_mmap_writable); } static inline int mapping_deny_writable(struct address_space *mapping) { return atomic_dec_unless_positive(&mapping->i_mmap_writable) ? 0 : -EBUSY; } static inline void mapping_allow_writable(struct address_space *mapping) { atomic_inc(&mapping->i_mmap_writable); } /* * Use sequence counter to get consistent i_size on 32-bit processors. */ #if BITS_PER_LONG==32 && defined(CONFIG_SMP) #include <linux/seqlock.h> #define __NEED_I_SIZE_ORDERED #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) #else #define i_size_ordered_init(inode) do { } while (0) #endif struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) /* * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to * cache the ACL. This also means that ->get_inode_acl() can be called in RCU * mode with the LOOKUP_RCU flag. */ #define ACL_DONT_CACHE ((void *)(-3)) static inline struct posix_acl * uncached_acl_sentinel(struct task_struct *task) { return (void *)task + 1; } static inline bool is_uncached_acl(struct posix_acl *acl) { return (long)acl & 1; } #define IOP_FASTPERM 0x0001 #define IOP_LOOKUP 0x0002 #define IOP_NOFOLLOW 0x0004 #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 #define IOP_MGTIME 0x0020 #define IOP_CACHED_LINK 0x0040 /* * Keep mostly read-only and often accessed (especially for * the RCU path lookup and 'stat' data) fields at the beginning * of the 'struct inode' */ struct inode { umode_t i_mode; unsigned short i_opflags; kuid_t i_uid; kgid_t i_gid; unsigned int i_flags; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; #endif const struct inode_operations *i_op; struct super_block *i_sb; struct address_space *i_mapping; #ifdef CONFIG_SECURITY void *i_security; #endif /* Stat data, not accessed from path walking */ unsigned long i_ino; /* * Filesystems may only read i_nlink directly. They shall use the * following functions for modification: * * (set|clear|inc|drop)_nlink * inode_(inc|dec)_link_count */ union { const unsigned int i_nlink; unsigned int __i_nlink; }; dev_t i_rdev; loff_t i_size; time64_t i_atime_sec; time64_t i_mtime_sec; time64_t i_ctime_sec; u32 i_atime_nsec; u32 i_mtime_nsec; u32 i_ctime_nsec; u32 i_generation; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; #endif /* Misc */ u32 i_state; /* 32-bit hole */ struct rw_semaphore i_rwsem; unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_time_when; struct hlist_node i_hash; struct list_head i_io_list; /* backing dev IO list */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *i_wb; /* the associated cgroup wb */ /* foreign inode detection, see wbc_detach_inode() */ int i_wb_frn_winner; u16 i_wb_frn_avg_time; u16 i_wb_frn_history; #endif struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; struct list_head i_wb_list; /* backing dev writeback list */ union { struct hlist_head i_dentry; struct rcu_head i_rcu; }; atomic64_t i_version; atomic64_t i_sequence; /* see futex */ atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) atomic_t i_readcount; /* struct files open RO */ #endif union { const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ void (*free_inode)(struct inode *); }; struct file_lock_context *i_flctx; struct address_space i_data; union { struct list_head i_devices; int i_linklen; }; union { struct pipe_inode_info *i_pipe; struct cdev *i_cdev; char *i_link; unsigned i_dir_seq; }; #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ /* 32-bit hole reserved for expanding i_fsnotify_mask */ struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_inode_info *i_crypt_info; #endif #ifdef CONFIG_FS_VERITY struct fsverity_info *i_verity_info; #endif void *i_private; /* fs or device private pointer */ } __randomize_layout; static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen) { VFS_WARN_ON_INODE(strlen(link) != linklen, inode); VFS_WARN_ON_INODE(inode->i_opflags & IOP_CACHED_LINK, inode); inode->i_link = link; inode->i_linklen = linklen; inode->i_opflags |= IOP_CACHED_LINK; } /* * Get bit address from inode->i_state to use with wait_var_event() * infrastructre. */ #define inode_state_wait_address(inode, bit) ((char *)&(inode)->i_state + (bit)) struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe, struct inode *inode, u32 bit); static inline void inode_wake_up_bit(struct inode *inode, u32 bit) { /* Caller is responsible for correct memory barriers. */ wake_up_var(inode_state_wait_address(inode, bit)); } struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode); static inline unsigned int i_blocksize(const struct inode *node) { return (1 << node->i_blkbits); } static inline int inode_unhashed(struct inode *inode) { return hlist_unhashed(&inode->i_hash); } /* * __mark_inode_dirty expects inodes to be hashed. Since we don't * want special inodes in the fileset inode space, we make them * appear hashed, but do not put on any lists. hlist_del() * will work fine and require no locking. */ static inline void inode_fake_hash(struct inode *inode) { hlist_add_fake(&inode->i_hash); } /* * inode->i_mutex nesting subclasses for the lock validator: * * 0: the object of the current VFS operation * 1: parent * 2: child/target * 3: xattr * 4: second non-directory * 5: second parent (when locking independent directories in rename) * * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two * non-directories at once. * * The locking order between these classes is * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory */ enum inode_i_mutex_lock_class { I_MUTEX_NORMAL, I_MUTEX_PARENT, I_MUTEX_CHILD, I_MUTEX_XATTR, I_MUTEX_NONDIR2, I_MUTEX_PARENT2, }; static inline void inode_lock(struct inode *inode) { down_write(&inode->i_rwsem); } static inline __must_check int inode_lock_killable(struct inode *inode) { return down_write_killable(&inode->i_rwsem); } static inline void inode_unlock(struct inode *inode) { up_write(&inode->i_rwsem); } static inline void inode_lock_shared(struct inode *inode) { down_read(&inode->i_rwsem); } static inline __must_check int inode_lock_shared_killable(struct inode *inode) { return down_read_killable(&inode->i_rwsem); } static inline void inode_unlock_shared(struct inode *inode) { up_read(&inode->i_rwsem); } static inline int inode_trylock(struct inode *inode) { return down_write_trylock(&inode->i_rwsem); } static inline int inode_trylock_shared(struct inode *inode) { return down_read_trylock(&inode->i_rwsem); } static inline int inode_is_locked(struct inode *inode) { return rwsem_is_locked(&inode->i_rwsem); } static inline void inode_lock_nested(struct inode *inode, unsigned subclass) { down_write_nested(&inode->i_rwsem, subclass); } static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass) { down_read_nested(&inode->i_rwsem, subclass); } static inline void filemap_invalidate_lock(struct address_space *mapping) { down_write(&mapping->invalidate_lock); } static inline void filemap_invalidate_unlock(struct address_space *mapping) { up_write(&mapping->invalidate_lock); } static inline void filemap_invalidate_lock_shared(struct address_space *mapping) { down_read(&mapping->invalidate_lock); } static inline int filemap_invalidate_trylock_shared( struct address_space *mapping) { return down_read_trylock(&mapping->invalidate_lock); } static inline void filemap_invalidate_unlock_shared( struct address_space *mapping) { up_read(&mapping->invalidate_lock); } void lock_two_nondirectories(struct inode *, struct inode*); void unlock_two_nondirectories(struct inode *, struct inode*); void filemap_invalidate_lock_two(struct address_space *mapping1, struct address_space *mapping2); void filemap_invalidate_unlock_two(struct address_space *mapping1, struct address_space *mapping2); /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic * with respect to the local cpu (unlike with preempt disabled), * but they don't need to be atomic with respect to other cpus like in * true SMP (so they need either to either locally disable irq around * the read or for example on x86 they can be still implemented as a * cmpxchg8b without the need of the lock prefix). For SMP compiles * and 64bit archs it makes no difference if preempt is enabled or not. */ static inline loff_t i_size_read(const struct inode *inode) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) loff_t i_size; unsigned int seq; do { seq = read_seqcount_begin(&inode->i_size_seqcount); i_size = inode->i_size; } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); return i_size; #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) loff_t i_size; preempt_disable(); i_size = inode->i_size; preempt_enable(); return i_size; #else /* Pairs with smp_store_release() in i_size_write() */ return smp_load_acquire(&inode->i_size); #endif } /* * NOTE: unlike i_size_read(), i_size_write() does need locking around it * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount * can be lost, resulting in subsequent i_size_read() calls spinning forever. */ static inline void i_size_write(struct inode *inode, loff_t i_size) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) preempt_disable(); write_seqcount_begin(&inode->i_size_seqcount); inode->i_size = i_size; write_seqcount_end(&inode->i_size_seqcount); preempt_enable(); #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) preempt_disable(); inode->i_size = i_size; preempt_enable(); #else /* * Pairs with smp_load_acquire() in i_size_read() to ensure * changes related to inode size (such as page contents) are * visible before we see the changed inode size. */ smp_store_release(&inode->i_size, i_size); #endif } static inline unsigned iminor(const struct inode *inode) { return MINOR(inode->i_rdev); } static inline unsigned imajor(const struct inode *inode) { return MAJOR(inode->i_rdev); } struct fown_struct { struct file *file; /* backpointer for security modules */ rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ kuid_t uid, euid; /* uid/euid of process setting the owner */ int signum; /* posix.1b rt signal to be delivered on IO */ }; /** * struct file_ra_state - Track a file's readahead state. * @start: Where the most recent readahead started. * @size: Number of pages read in the most recent readahead. * @async_size: Numer of pages that were/are not needed immediately * and so were/are genuinely "ahead". Start next readahead when * the first of these pages is accessed. * @ra_pages: Maximum size of a readahead request, copied from the bdi. * @mmap_miss: How many mmap accesses missed in the page cache. * @prev_pos: The last byte in the most recent read request. * * When this structure is passed to ->readahead(), the "most recent" * readahead means the current readahead. */ struct file_ra_state { pgoff_t start; unsigned int size; unsigned int async_size; unsigned int ra_pages; unsigned int mmap_miss; loff_t prev_pos; }; /* * Check if @index falls in the readahead windows. */ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) { return (index >= ra->start && index < ra->start + ra->size); } /** * struct file - Represents a file * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context. * @f_mode: FMODE_* flags often used in hotpaths * @f_op: file operations * @f_mapping: Contents of a cacheable, mappable object. * @private_data: filesystem or driver specific data * @f_inode: cached inode * @f_flags: file flags * @f_iocb_flags: iocb flags * @f_cred: stashed credentials of creator/opener * @f_owner: file owner * @f_path: path of the file * @f_pos_lock: lock protecting file position * @f_pipe: specific to pipes * @f_pos: file position * @f_security: LSM security context of this file * @f_wb_err: writeback error * @f_sb_err: per sb writeback errors * @f_ep: link of all epoll hooks for this file * @f_task_work: task work entry point * @f_llist: work queue entrypoint * @f_ra: file's readahead state * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.) * @f_ref: reference count */ struct file { spinlock_t f_lock; fmode_t f_mode; const struct file_operations *f_op; struct address_space *f_mapping; void *private_data; struct inode *f_inode; unsigned int f_flags; unsigned int f_iocb_flags; const struct cred *f_cred; struct fown_struct *f_owner; /* --- cacheline 1 boundary (64 bytes) --- */ struct path f_path; union { /* regular files (with FMODE_ATOMIC_POS) and directories */ struct mutex f_pos_lock; /* pipes */ u64 f_pipe; }; loff_t f_pos; #ifdef CONFIG_SECURITY void *f_security; #endif /* --- cacheline 2 boundary (128 bytes) --- */ errseq_t f_wb_err; errseq_t f_sb_err; #ifdef CONFIG_EPOLL struct hlist_head *f_ep; #endif union { struct callback_head f_task_work; struct llist_node f_llist; struct file_ra_state f_ra; freeptr_t f_freeptr; }; file_ref_t f_ref; /* --- cacheline 3 boundary (192 bytes) --- */ } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { __u32 handle_bytes; int handle_type; /* file identifier */ unsigned char f_handle[] __counted_by(handle_bytes); }; static inline struct file *get_file(struct file *f) { file_ref_inc(&f->f_ref); return f; } struct file *get_file_rcu(struct file __rcu **f); struct file *get_file_active(struct file **f); #define file_count(f) file_ref_read(&(f)->f_ref) #define MAX_NON_LFS ((1UL<<31) - 1) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 #define MAX_LFS_FILESIZE ((loff_t)ULONG_MAX << PAGE_SHIFT) #elif BITS_PER_LONG==64 #define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX) #endif /* legacy typedef, should eventually be removed */ typedef void *fl_owner_t; struct file_lock; struct file_lease; /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX #define OFFSET_MAX type_max(loff_t) #define OFFT_OFFSET_MAX type_max(off_t) #endif int file_f_owner_allocate(struct file *file); static inline struct fown_struct *file_f_owner(const struct file *file) { return READ_ONCE(file->f_owner); } extern void send_sigio(struct fown_struct *fown, int fd, int band); static inline struct inode *file_inode(const struct file *f) { return f->f_inode; } /* * file_dentry() is a relic from the days that overlayfs was using files with a * "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs. * In those days, file_dentry() was needed to get the underlying fs dentry that * matches f_inode. * Files with "fake" path should not exist nowadays, so use an assertion to make * sure that file_dentry() was not papering over filesystem bugs. */ static inline struct dentry *file_dentry(const struct file *file) { struct dentry *dentry = file->f_path.dentry; WARN_ON_ONCE(d_inode(dentry) != file_inode(file)); return dentry; } struct fasync_struct { rwlock_t fa_lock; int magic; int fa_fd; struct fasync_struct *fa_next; /* singly linked list */ struct file *fa_file; struct rcu_head fa_rcu; }; #define FASYNC_MAGIC 0x4601 /* SMP safe fasync helpers: */ extern int fasync_helper(int, struct file *, int, struct fasync_struct **); extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *); extern int fasync_remove_entry(struct file *, struct fasync_struct **); extern struct fasync_struct *fasync_alloc(void); extern void fasync_free(struct fasync_struct *); /* can be called from interrupts */ extern void kill_fasync(struct fasync_struct **, int, int); extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); extern int f_setown(struct file *filp, int who, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct file *file); /* * sb->s_flags. Note that these mirror the equivalent MS_* flags where * represented in both. */ #define SB_RDONLY BIT(0) /* Mount read-only */ #define SB_NOSUID BIT(1) /* Ignore suid and sgid bits */ #define SB_NODEV BIT(2) /* Disallow access to device special files */ #define SB_NOEXEC BIT(3) /* Disallow program execution */ #define SB_SYNCHRONOUS BIT(4) /* Writes are synced at once */ #define SB_MANDLOCK BIT(6) /* Allow mandatory locks on an FS */ #define SB_DIRSYNC BIT(7) /* Directory modifications are synchronous */ #define SB_NOATIME BIT(10) /* Do not update access times. */ #define SB_NODIRATIME BIT(11) /* Do not update directory access times */ #define SB_SILENT BIT(15) #define SB_POSIXACL BIT(16) /* Supports POSIX ACLs */ #define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */ #define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */ #define SB_I_VERSION BIT(23) /* Update inode I_version field */ #define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */ /* These sb flags are internal to the kernel */ #define SB_DEAD BIT(21) #define SB_DYING BIT(24) #define SB_FORCE BIT(27) #define SB_NOSEC BIT(28) #define SB_BORN BIT(29) #define SB_ACTIVE BIT(30) #define SB_NOUSER BIT(31) /* These flags relate to encoding and casefolding */ #define SB_ENC_STRICT_MODE_FL (1 << 0) #define SB_ENC_NO_COMPAT_FALLBACK_FL (1 << 1) #define sb_has_strict_encoding(sb) \ (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) #if IS_ENABLED(CONFIG_UNICODE) #define sb_no_casefold_compat_fallback(sb) \ (sb->s_encoding_flags & SB_ENC_NO_COMPAT_FALLBACK_FL) #else #define sb_no_casefold_compat_fallback(sb) (1) #endif /* * Umount options */ #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ #define MNT_DETACH 0x00000002 /* Just detach from the tree */ #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ /* sb->s_iflags */ #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ #define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ #define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020 #define SB_I_UNTRUSTED_MOUNTER 0x00000040 #define SB_I_EVM_HMAC_UNSUPPORTED 0x00000080 #define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ #define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */ #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ #define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ #define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */ /* Possible states of 'frozen' field */ enum { SB_UNFROZEN = 0, /* FS is unfrozen */ SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */ SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */ SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop * internal threads if needed) */ SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */ }; #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) struct sb_writers { unsigned short frozen; /* Is sb frozen? */ int freeze_kcount; /* How many kernel freeze requests? */ int freeze_ucount; /* How many userspace freeze requests? */ const void *freeze_owner; /* Owner of the freeze */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ unsigned char s_blocksize_bits; unsigned long s_blocksize; loff_t s_maxbytes; /* Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; const struct dquot_operations *dq_op; const struct quotactl_ops *s_qcop; const struct export_operations *s_export_op; unsigned long s_flags; unsigned long s_iflags; /* internal SB_I_* flags */ unsigned long s_magic; struct dentry *s_root; struct rw_semaphore s_umount; int s_count; atomic_t s_active; #ifdef CONFIG_SECURITY void *s_security; #endif const struct xattr_handler * const *s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; #endif #if IS_ENABLED(CONFIG_UNICODE) struct unicode_map *s_encoding; __u16 s_encoding_flags; #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ struct file *s_bdev_file; struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; struct hlist_node s_instances; unsigned int s_quota_types; /* Bitmask of supported quota types */ struct quota_info s_dquot; /* Diskquota specific options */ struct sb_writers s_writers; /* * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and * s_fsnotify_info together for cache efficiency. They are frequently * accessed and rarely modified. */ void *s_fs_info; /* Filesystem private info */ /* Granularity of c/m/atime in ns (cannot be worse than a second) */ u32 s_time_gran; /* Time limits for c/m/atime in seconds */ time64_t s_time_min; time64_t s_time_max; #ifdef CONFIG_FSNOTIFY u32 s_fsnotify_mask; struct fsnotify_sb_info *s_fsnotify_info; #endif /* * q: why are s_id and s_sysfs_name not the same? both are human * readable strings that identify the filesystem * a: s_id is allowed to change at runtime; it's used in log messages, * and we want to when a device starts out as single device (s_id is dev * name) but then a device is hot added and we have to switch to * identifying it by UUID * but s_sysfs_name is a handle for programmatic access, and can't * change at runtime */ char s_id[32]; /* Informational name */ uuid_t s_uuid; /* UUID */ u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */ /* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */ char s_sysfs_name[UUID_STRING_LEN + 1]; unsigned int s_max_links; /* * The next field is for VFS *only*. No filesystems have any business * even looking at it. You had been warned. */ struct mutex s_vfs_rename_mutex; /* Kludge */ /* * Filesystem subtype. If non-empty the filesystem type field * in /proc/mounts will be "type.subtype" */ const char *s_subtype; const struct dentry_operations *s_d_op; /* default d_op for dentries */ struct shrinker *s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; /* Read-only state of the superblock is being changed */ int s_readonly_remount; /* per-sb errseq_t for reporting writeback errors via syncfs */ errseq_t s_wb_err; /* AIO completions deferred from interrupt context */ struct workqueue_struct *s_dio_done_wq; struct hlist_head s_pins; /* * Owning user namespace and default context in which to * interpret filesystem uids, gids, quotas, device nodes, * xattrs and security labels. */ struct user_namespace *s_user_ns; /* * The list_lru structure is essentially just a pointer to a table * of per-node lru lists, each of which has its own spinlock. * There is no need to put them into separate cachelines. */ struct list_lru s_dentry_lru; struct list_lru s_inode_lru; struct rcu_head rcu; struct work_struct destroy_work; struct mutex s_sync_lock; /* sync serialisation lock */ /* * Indicates how deep in a filesystem stack this SB is */ int s_stack_depth; /* s_inode_list_lock protects s_inodes */ spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp; struct list_head s_inodes; /* all inodes */ spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ } __randomize_layout; static inline struct user_namespace *i_user_ns(const struct inode *inode) { return inode->i_sb->s_user_ns; } /* Helper functions so that in most cases filesystems will * not need to deal directly with kuid_t and kgid_t and can * instead deal with the raw numeric values that are stored * in the filesystem. */ static inline uid_t i_uid_read(const struct inode *inode) { return from_kuid(i_user_ns(inode), inode->i_uid); } static inline gid_t i_gid_read(const struct inode *inode) { return from_kgid(i_user_ns(inode), inode->i_gid); } static inline void i_uid_write(struct inode *inode, uid_t uid) { inode->i_uid = make_kuid(i_user_ns(inode), uid); } static inline void i_gid_write(struct inode *inode, gid_t gid) { inode->i_gid = make_kgid(i_user_ns(inode), gid); } /** * i_uid_into_vfsuid - map an inode's i_uid down according to an idmapping * @idmap: idmap of the mount the inode was found from * @inode: inode to map * * Return: whe inode's i_uid mapped down according to @idmap. * If the inode's i_uid has no mapping INVALID_VFSUID is returned. */ static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap, const struct inode *inode) { return make_vfsuid(idmap, i_user_ns(inode), inode->i_uid); } /** * i_uid_needs_update - check whether inode's i_uid needs to be updated * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Check whether the $inode's i_uid field needs to be updated taking idmapped * mounts into account if the filesystem supports it. * * Return: true if @inode's i_uid field needs to be updated, false if not. */ static inline bool i_uid_needs_update(struct mnt_idmap *idmap, const struct iattr *attr, const struct inode *inode) { return ((attr->ia_valid & ATTR_UID) && !vfsuid_eq(attr->ia_vfsuid, i_uid_into_vfsuid(idmap, inode))); } /** * i_uid_update - update @inode's i_uid field * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Safely update @inode's i_uid field translating the vfsuid of any idmapped * mount into the filesystem kuid. */ static inline void i_uid_update(struct mnt_idmap *idmap, const struct iattr *attr, struct inode *inode) { if (attr->ia_valid & ATTR_UID) inode->i_uid = from_vfsuid(idmap, i_user_ns(inode), attr->ia_vfsuid); } /** * i_gid_into_vfsgid - map an inode's i_gid down according to an idmapping * @idmap: idmap of the mount the inode was found from * @inode: inode to map * * Return: the inode's i_gid mapped down according to @idmap. * If the inode's i_gid has no mapping INVALID_VFSGID is returned. */ static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap, const struct inode *inode) { return make_vfsgid(idmap, i_user_ns(inode), inode->i_gid); } /** * i_gid_needs_update - check whether inode's i_gid needs to be updated * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Check whether the $inode's i_gid field needs to be updated taking idmapped * mounts into account if the filesystem supports it. * * Return: true if @inode's i_gid field needs to be updated, false if not. */ static inline bool i_gid_needs_update(struct mnt_idmap *idmap, const struct iattr *attr, const struct inode *inode) { return ((attr->ia_valid & ATTR_GID) && !vfsgid_eq(attr->ia_vfsgid, i_gid_into_vfsgid(idmap, inode))); } /** * i_gid_update - update @inode's i_gid field * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Safely update @inode's i_gid field translating the vfsgid of any idmapped * mount into the filesystem kgid. */ static inline void i_gid_update(struct mnt_idmap *idmap, const struct iattr *attr, struct inode *inode) { if (attr->ia_valid & ATTR_GID) inode->i_gid = from_vfsgid(idmap, i_user_ns(inode), attr->ia_vfsgid); } /** * inode_fsuid_set - initialize inode's i_uid field with callers fsuid * @inode: inode to initialize * @idmap: idmap of the mount the inode was found from * * Initialize the i_uid field of @inode. If the inode was found/created via * an idmapped mount map the caller's fsuid according to @idmap. */ static inline void inode_fsuid_set(struct inode *inode, struct mnt_idmap *idmap) { inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode)); } /** * inode_fsgid_set - initialize inode's i_gid field with callers fsgid * @inode: inode to initialize * @idmap: idmap of the mount the inode was found from * * Initialize the i_gid field of @inode. If the inode was found/created via * an idmapped mount map the caller's fsgid according to @idmap. */ static inline void inode_fsgid_set(struct inode *inode, struct mnt_idmap *idmap) { inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode)); } /** * fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped * @sb: the superblock we want a mapping in * @idmap: idmap of the relevant mount * * Check whether the caller's fsuid and fsgid have a valid mapping in the * s_user_ns of the superblock @sb. If the caller is on an idmapped mount map * the caller's fsuid and fsgid according to the @idmap first. * * Return: true if fsuid and fsgid is mapped, false if not. */ static inline bool fsuidgid_has_mapping(struct super_block *sb, struct mnt_idmap *idmap) { struct user_namespace *fs_userns = sb->s_user_ns; kuid_t kuid; kgid_t kgid; kuid = mapped_fsuid(idmap, fs_userns); if (!uid_valid(kuid)) return false; kgid = mapped_fsgid(idmap, fs_userns); if (!gid_valid(kgid)) return false; return kuid_has_mapping(fs_userns, kuid) && kgid_has_mapping(fs_userns, kgid); } struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 update); static inline time64_t inode_get_atime_sec(const struct inode *inode) { return inode->i_atime_sec; } static inline long inode_get_atime_nsec(const struct inode *inode) { return inode->i_atime_nsec; } static inline struct timespec64 inode_get_atime(const struct inode *inode) { struct timespec64 ts = { .tv_sec = inode_get_atime_sec(inode), .tv_nsec = inode_get_atime_nsec(inode) }; return ts; } static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, struct timespec64 ts) { inode->i_atime_sec = ts.tv_sec; inode->i_atime_nsec = ts.tv_nsec; return ts; } static inline struct timespec64 inode_set_atime(struct inode *inode, time64_t sec, long nsec) { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; return inode_set_atime_to_ts(inode, ts); } static inline time64_t inode_get_mtime_sec(const struct inode *inode) { return inode->i_mtime_sec; } static inline long inode_get_mtime_nsec(const struct inode *inode) { return inode->i_mtime_nsec; } static inline struct timespec64 inode_get_mtime(const struct inode *inode) { struct timespec64 ts = { .tv_sec = inode_get_mtime_sec(inode), .tv_nsec = inode_get_mtime_nsec(inode) }; return ts; } static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, struct timespec64 ts) { inode->i_mtime_sec = ts.tv_sec; inode->i_mtime_nsec = ts.tv_nsec; return ts; } static inline struct timespec64 inode_set_mtime(struct inode *inode, time64_t sec, long nsec) { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; return inode_set_mtime_to_ts(inode, ts); } /* * Multigrain timestamps * * Conditionally use fine-grained ctime and mtime timestamps when there * are users actively observing them via getattr. The primary use-case * for this is NFS clients that use the ctime to distinguish between * different states of the file, and that are often fooled by multiple * operations that occur in the same coarse-grained timer tick. */ #define I_CTIME_QUERIED ((u32)BIT(31)) static inline time64_t inode_get_ctime_sec(const struct inode *inode) { return inode->i_ctime_sec; } static inline long inode_get_ctime_nsec(const struct inode *inode) { return inode->i_ctime_nsec & ~I_CTIME_QUERIED; } static inline struct timespec64 inode_get_ctime(const struct inode *inode) { struct timespec64 ts = { .tv_sec = inode_get_ctime_sec(inode), .tv_nsec = inode_get_ctime_nsec(inode) }; return ts; } struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts); /** * inode_set_ctime - set the ctime in the inode * @inode: inode in which to set the ctime * @sec: tv_sec value to set * @nsec: tv_nsec value to set * * Set the ctime in @inode to { @sec, @nsec } */ static inline struct timespec64 inode_set_ctime(struct inode *inode, time64_t sec, long nsec) { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; return inode_set_ctime_to_ts(inode, ts); } struct timespec64 simple_inode_init_ts(struct inode *inode); /* * Snapshotting support. */ /* * These are internal functions, please use sb_start_{write,pagefault,intwrite} * instead. */ static inline void __sb_end_write(struct super_block *sb, int level) { percpu_up_read(sb->s_writers.rw_sem + level-1); } static inline void __sb_start_write(struct super_block *sb, int level) { percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true); } static inline bool __sb_start_write_trylock(struct super_block *sb, int level) { return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); } #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) #define __sb_writers_release(sb, lev) \ percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], _THIS_IP_) /** * __sb_write_started - check if sb freeze level is held * @sb: the super we write to * @level: the freeze level * * * > 0 - sb freeze level is held * * 0 - sb freeze level is not held * * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN */ static inline int __sb_write_started(const struct super_block *sb, int level) { return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1); } /** * sb_write_started - check if SB_FREEZE_WRITE is held * @sb: the super we write to * * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. */ static inline bool sb_write_started(const struct super_block *sb) { return __sb_write_started(sb, SB_FREEZE_WRITE); } /** * sb_write_not_started - check if SB_FREEZE_WRITE is not held * @sb: the super we write to * * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. */ static inline bool sb_write_not_started(const struct super_block *sb) { return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0; } /** * file_write_started - check if SB_FREEZE_WRITE is held * @file: the file we write to * * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. * May be false positive with !S_ISREG, because file_start_write() has * no effect on !S_ISREG. */ static inline bool file_write_started(const struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; return sb_write_started(file_inode(file)->i_sb); } /** * file_write_not_started - check if SB_FREEZE_WRITE is not held * @file: the file we write to * * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. * May be false positive with !S_ISREG, because file_start_write() has * no effect on !S_ISREG. */ static inline bool file_write_not_started(const struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; return sb_write_not_started(file_inode(file)->i_sb); } /** * sb_end_write - drop write access to a superblock * @sb: the super we wrote to * * Decrement number of writers to the filesystem. Wake up possible waiters * wanting to freeze the filesystem. */ static inline void sb_end_write(struct super_block *sb) { __sb_end_write(sb, SB_FREEZE_WRITE); } /** * sb_end_pagefault - drop write access to a superblock from a page fault * @sb: the super we wrote to * * Decrement number of processes handling write page fault to the filesystem. * Wake up possible waiters wanting to freeze the filesystem. */ static inline void sb_end_pagefault(struct super_block *sb) { __sb_end_write(sb, SB_FREEZE_PAGEFAULT); } /** * sb_end_intwrite - drop write access to a superblock for internal fs purposes * @sb: the super we wrote to * * Decrement fs-internal number of writers to the filesystem. Wake up possible * waiters wanting to freeze the filesystem. */ static inline void sb_end_intwrite(struct super_block *sb) { __sb_end_write(sb, SB_FREEZE_FS); } /** * sb_start_write - get write access to a superblock * @sb: the super we write to * * When a process wants to write data or metadata to a file system (i.e. dirty * a page or an inode), it should embed the operation in a sb_start_write() - * sb_end_write() pair to get exclusion against file system freezing. This * function increments number of writers preventing freezing. If the file * system is already frozen, the function waits until the file system is * thawed. * * Since freeze protection behaves as a lock, users have to preserve * ordering of freeze protection and other filesystem locks. Generally, * freeze protection should be the outermost lock. In particular, we have: * * sb_start_write * -> i_mutex (write path, truncate, directory ops, ...) * -> s_umount (freeze_super, thaw_super) */ static inline void sb_start_write(struct super_block *sb) { __sb_start_write(sb, SB_FREEZE_WRITE); } static inline bool sb_start_write_trylock(struct super_block *sb) { return __sb_start_write_trylock(sb, SB_FREEZE_WRITE); } /** * sb_start_pagefault - get write access to a superblock from a page fault * @sb: the super we write to * * When a process starts handling write page fault, it should embed the * operation into sb_start_pagefault() - sb_end_pagefault() pair to get * exclusion against file system freezing. This is needed since the page fault * is going to dirty a page. This function increments number of running page * faults preventing freezing. If the file system is already frozen, the * function waits until the file system is thawed. * * Since page fault freeze protection behaves as a lock, users have to preserve * ordering of freeze protection and other filesystem locks. It is advised to * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault * handling code implies lock dependency: * * mmap_lock * -> sb_start_pagefault */ static inline void sb_start_pagefault(struct super_block *sb) { __sb_start_write(sb, SB_FREEZE_PAGEFAULT); } /** * sb_start_intwrite - get write access to a superblock for internal fs purposes * @sb: the super we write to * * This is the third level of protection against filesystem freezing. It is * free for use by a filesystem. The only requirement is that it must rank * below sb_start_pagefault. * * For example filesystem can call sb_start_intwrite() when starting a * transaction which somewhat eases handling of freezing for internal sources * of filesystem changes (internal fs threads, discarding preallocation on file * close, etc.). */ static inline void sb_start_intwrite(struct super_block *sb) { __sb_start_write(sb, SB_FREEZE_FS); } static inline bool sb_start_intwrite_trylock(struct super_block *sb) { return __sb_start_write_trylock(sb, SB_FREEZE_FS); } bool inode_owner_or_capable(struct mnt_idmap *idmap, const struct inode *inode); /* * VFS helper functions.. */ int vfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); int vfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, struct dentry *, struct inode **); int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *); int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *, struct inode **); /** * struct renamedata - contains all information required for renaming * @old_mnt_idmap: idmap of the old mount the inode was found from * @old_dir: parent of source * @old_dentry: source * @new_mnt_idmap: idmap of the new mount the inode was found from * @new_dir: parent of destination * @new_dentry: destination * @delegated_inode: returns an inode needing a delegation break * @flags: rename flags */ struct renamedata { struct mnt_idmap *old_mnt_idmap; struct inode *old_dir; struct dentry *old_dentry; struct mnt_idmap *new_mnt_idmap; struct inode *new_dir; struct dentry *new_dentry; struct inode **delegated_inode; unsigned int flags; } __randomize_layout; int vfs_rename(struct renamedata *); static inline int vfs_whiteout(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry) { return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); } struct file *kernel_tmpfile_open(struct mnt_idmap *idmap, const struct path *parentpath, umode_t mode, int open_flag, const struct cred *cred); struct file *kernel_file_open(const struct path *path, int flags, const struct cred *cred); int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), void *); int vfs_fchown(struct file *file, uid_t user, gid_t group); int vfs_fchmod(struct file *file, umode_t mode); int vfs_utimes(const struct path *path, struct timespec64 *times); int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #else #define compat_ptr_ioctl NULL #endif /* * VFS file helper functions. */ void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, const struct inode *dir, umode_t mode); extern bool may_open_dev(const struct path *path); umode_t mode_strip_sgid(struct mnt_idmap *idmap, const struct inode *dir, umode_t mode); bool in_group_or_capable(struct mnt_idmap *idmap, const struct inode *inode, vfsgid_t vfsgid); /* * This is the "filldir" function type, used by readdir() to let * the kernel specify what kind of dirent layout it wants to have. * This allows the kernel to read directories into kernel space or * to have different dirent layouts depending on the binary type. * Return 'true' to keep going and 'false' if there are no more entries. */ struct dir_context; typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, unsigned); struct dir_context { filldir_t actor; loff_t pos; /* * Filesystems MUST NOT MODIFY count, but may use as a hint: * 0 unknown * > 0 space in buffer (assume at least one entry) * INT_MAX unlimited */ int count; }; /* If OR-ed with d_type, pending signals are not checked */ #define FILLDIR_FLAG_NOINTR 0x1000 /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. * * NOMMU_MAP_COPY: Copy can be mapped (MAP_PRIVATE) * NOMMU_MAP_DIRECT: Can be mapped directly (MAP_SHARED) * NOMMU_MAP_READ: Can be mapped for reading * NOMMU_MAP_WRITE: Can be mapped for writing * NOMMU_MAP_EXEC: Can be mapped for execution */ #define NOMMU_MAP_COPY 0x00000001 #define NOMMU_MAP_DIRECT 0x00000008 #define NOMMU_MAP_READ VM_MAYREAD #define NOMMU_MAP_WRITE VM_MAYWRITE #define NOMMU_MAP_EXEC VM_MAYEXEC #define NOMMU_VMFLAGS \ (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC) /* * These flags control the behavior of the remap_file_range function pointer. * If it is called with len == 0 that means "remap to end of source file". * See Documentation/filesystems/vfs.rst for more details about this call. * * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request */ #define REMAP_FILE_DEDUP (1 << 0) #define REMAP_FILE_CAN_SHORTEN (1 << 1) /* * These flags signal that the caller is ok with altering various aspects of * the behavior of the remap operation. The changes must be made by the * implementation; the vfs remap helper functions can take advantage of them. * Flags in this category exist to preserve the quirky behavior of the hoisted * btrfs clone/dedupe ioctls. */ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) /* * These flags control the behavior of vfs_copy_file_range(). * They are not available to the user via syscall. * * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops */ #define COPY_FILE_SPLICE (1 << 0) struct iov_iter; struct io_uring_cmd; struct offset_ctx; typedef unsigned int __bitwise fop_flags_t; struct file_operations { struct module *owner; fop_flags_t fop_flags; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *, unsigned int flags); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t, loff_t, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); void (*splice_eof)(struct file *file); int (*setlease)(struct file *, int, struct file_lease **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); #ifndef CONFIG_MMU unsigned (*mmap_capabilities)(struct file *); #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, unsigned int poll_flags); int (*mmap_prepare)(struct vm_area_desc *); } __randomize_layout; /* Supports async buffered reads */ #define FOP_BUFFER_RASYNC ((__force fop_flags_t)(1 << 0)) /* Supports async buffered writes */ #define FOP_BUFFER_WASYNC ((__force fop_flags_t)(1 << 1)) /* Supports synchronous page faults for mappings */ #define FOP_MMAP_SYNC ((__force fop_flags_t)(1 << 2)) /* Supports non-exclusive O_DIRECT writes from multiple threads */ #define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3)) /* Contains huge pages */ #define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4)) /* Treat loff_t as unsigned (e.g., /dev/mem) */ #define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5)) /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ #define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, int (*) (struct file *, struct dir_context *)); #define WRAP_DIR_ITER(x) \ static int shared_##x(struct file *file , struct dir_context *ctx) \ { return wrap_directory_iterator(file, ctx, x); } struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct mnt_idmap *, struct inode *, int); struct posix_acl * (*get_inode_acl)(struct inode *, int, bool); int (*readlink) (struct dentry *, char __user *,int); int (*create) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *, const char *); struct dentry *(*mkdir) (struct mnt_idmap *, struct inode *, struct dentry *, umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t,dev_t); int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *); int (*getattr) (struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); int (*update_time)(struct inode *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode); int (*tmpfile) (struct mnt_idmap *, struct inode *, struct file *, umode_t); struct posix_acl *(*get_acl)(struct mnt_idmap *, struct dentry *, int); int (*set_acl)(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); int (*fileattr_set)(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); struct offset_ctx *(*get_offset_ctx)(struct inode *inode); } ____cacheline_aligned; /* Did the driver provide valid mmap hook configuration? */ static inline bool file_has_valid_mmap_hooks(struct file *file) { bool has_mmap = file->f_op->mmap; bool has_mmap_prepare = file->f_op->mmap_prepare; /* Hooks are mutually exclusive. */ if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) return false; if (!has_mmap && !has_mmap_prepare) return false; return true; } int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { if (file->f_op->mmap_prepare) return compat_vma_mmap_prepare(file, vma); return file->f_op->mmap(file, vma); } static inline int __call_mmap_prepare(struct file *file, struct vm_area_desc *desc) { return file->f_op->mmap_prepare(desc); } extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); int remap_verify_area(struct file *file, loff_t pos, loff_t len, bool write); int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags, const struct iomap_ops *dax_read_ops); int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, loff_t len, unsigned int remap_flags); /** * enum freeze_holder - holder of the freeze * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed * @FREEZE_EXCL: a freeze that can only be undone by the owner * * Indicate who the owner of the freeze or thaw request is and whether * the freeze needs to be exclusive or can nest. * Without @FREEZE_MAY_NEST, multiple freeze and thaw requests from the * same holder aren't allowed. It is however allowed to hold a single * @FREEZE_HOLDER_USERSPACE and a single @FREEZE_HOLDER_KERNEL freeze at * the same time. This is relied upon by some filesystems during online * repair or similar. */ enum freeze_holder { FREEZE_HOLDER_KERNEL = (1U << 0), FREEZE_HOLDER_USERSPACE = (1U << 1), FREEZE_MAY_NEST = (1U << 2), FREEZE_EXCL = (1U << 3), }; struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); void (*free_inode)(struct inode *); void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); int (*freeze_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*freeze_fs) (struct super_block *); int (*thaw_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); int (*show_devname)(struct seq_file *, struct dentry *); int (*show_path)(struct seq_file *, struct dentry *); int (*show_stats)(struct seq_file *, struct dentry *); #ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); struct dquot __rcu **(*get_dquots)(struct inode *); #endif long (*nr_cached_objects)(struct super_block *, struct shrink_control *); long (*free_cached_objects)(struct super_block *, struct shrink_control *); void (*shutdown)(struct super_block *sb); }; /* * Inode flags - they have no relation to superblock flags now */ #define S_SYNC (1 << 0) /* Writes are synced at once */ #define S_NOATIME (1 << 1) /* Do not update access times */ #define S_APPEND (1 << 2) /* Append-only file */ #define S_IMMUTABLE (1 << 3) /* Immutable file */ #define S_DEAD (1 << 4) /* removed, but still open directory */ #define S_NOQUOTA (1 << 5) /* Inode is not counted to quota */ #define S_DIRSYNC (1 << 6) /* Directory modifications are synchronous */ #define S_NOCMTIME (1 << 7) /* Do not update file c/mtime */ #define S_SWAPFILE (1 << 8) /* Do not truncate: swapon got its bmaps */ #define S_PRIVATE (1 << 9) /* Inode is fs-internal */ #define S_IMA (1 << 10) /* Inode has an associated IMA struct */ #define S_AUTOMOUNT (1 << 11) /* Automount/referral quasi-directory */ #define S_NOSEC (1 << 12) /* no suid or xattr security attributes */ #ifdef CONFIG_FS_DAX #define S_DAX (1 << 13) /* Direct Access, avoiding the page cache */ #else #define S_DAX 0 /* Make all the DAX code disappear */ #endif #define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */ #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ #define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */ /* * Note that nosuid etc flags are inode-specific: setting some file-system * flags just means all the inodes inherit those flags by default. It might be * possible to override it selectively if you really wanted to with some * ioctl() that is not currently implemented. * * Exception: SB_RDONLY is always applied to the entire file system. * * Unfortunately, it is possible to change a filesystems flags with it mounted * with files in use. This means that all of the inodes will not have their * i_flags updated. Hence, i_flags no longer inherit the superblock mount * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org */ #define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg)) static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; } #define IS_RDONLY(inode) sb_rdonly((inode)->i_sb) #define IS_SYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) #define IS_DIRSYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS|SB_DIRSYNC) || \ ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) #define IS_MANDLOCK(inode) __IS_FLG(inode, SB_MANDLOCK) #define IS_NOATIME(inode) __IS_FLG(inode, SB_RDONLY|SB_NOATIME) #define IS_I_VERSION(inode) __IS_FLG(inode, SB_I_VERSION) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) #ifdef CONFIG_FS_POSIX_ACL #define IS_POSIXACL(inode) __IS_FLG(inode, SB_POSIXACL) #else #define IS_POSIXACL(inode) 0 #endif #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) #ifdef CONFIG_SWAP #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) #else #define IS_SWAPFILE(inode) ((void)(inode), 0U) #endif #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) #define IS_IMA(inode) ((inode)->i_flags & S_IMA) #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) #define IS_DAX(inode) ((inode)->i_flags & S_DAX) #define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED) #define IS_CASEFOLDED(inode) ((inode)->i_flags & S_CASEFOLD) #define IS_VERITY(inode) ((inode)->i_flags & S_VERITY) #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) #define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE) static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, struct inode *inode) { return !vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) || !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)); } static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, .ki_flags = filp->f_iocb_flags, .ki_ioprio = get_current_ioprio(), }; } static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, .ki_flags = kiocb_src->ki_flags, .ki_ioprio = kiocb_src->ki_ioprio, .ki_pos = kiocb_src->ki_pos, }; } /* * Inode state bits. Protected by inode->i_lock * * Four bits determine the dirty state of the inode: I_DIRTY_SYNC, * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME. * * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at * various stages of removing an inode. * * Two bits are used for locking and completion notification, I_NEW and I_SYNC. * * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on * fdatasync() (unless I_DIRTY_DATASYNC is also set). * Timestamp updates are the usual cause. * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of * these changes separately from I_DIRTY_SYNC so that we * don't have to write inode on fdatasync() when only * e.g. the timestamps have changed. * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. * I_DIRTY_TIME The inode itself has dirty timestamps, and the * lazytime mount option is enabled. We keep track of this * separately from I_DIRTY_SYNC in order to implement * lazytime. This gets cleared if I_DIRTY_INODE * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already * in place because writeback might already be in progress * and we don't want to lose the time update * I_NEW Serves as both a mutex and completion notification. * New inodes set I_NEW. If two processes both create * the same inode, one of them will release its inode and * wait for I_NEW to be released before returning. * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can * also cause waiting on I_NEW, without I_NEW actually * being set. find_inode() uses this to prevent returning * nearly-dead inodes. * I_WILL_FREE Must be set when calling write_inode_now() if i_count * is zero. I_FREEING must be set when I_WILL_FREE is * cleared. * I_FREEING Set when inode is about to be freed but still has dirty * pages or buffers attached or the inode itself is still * dirty. * I_CLEAR Added by clear_inode(). In this state the inode is * clean and can be destroyed. Inode keeps I_FREEING. * * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are * prohibited for many purposes. iget() must wait for * the inode to be completely released, then create it * anew. Other functions will just ignore such inodes, * if appropriate. I_NEW is used for waiting. * * I_SYNC Writeback of inode is running. The bit is set during * data writeback, and cleared with a wakeup on the bit * address once it is done. The bit is also used to pin * the inode in memory for flusher thread. * * I_REFERENCED Marks the inode as recently references on the LRU list. * * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to * synchronize competing switching instances and to tell * wb stat updates to grab the i_pages lock. See * inode_switch_wbs_work_fn() for details. * * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper * and work dirs among overlayfs mounts. * * I_CREATING New object's inode in the middle of setting up. * * I_DONTCACHE Evict inode as soon as it is not used anymore. * * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. * Used to detect that mark_inode_dirty() should not move * inode between dirty lists. * * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. * * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding * i_count. * * Q: What is the difference between I_WILL_FREE and I_FREEING? * * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait * upon. There's one free address left. */ #define __I_NEW 0 #define I_NEW (1 << __I_NEW) #define __I_SYNC 1 #define I_SYNC (1 << __I_SYNC) #define __I_LRU_ISOLATING 2 #define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) #define I_DIRTY_SYNC (1 << 3) #define I_DIRTY_DATASYNC (1 << 4) #define I_DIRTY_PAGES (1 << 5) #define I_WILL_FREE (1 << 6) #define I_FREEING (1 << 7) #define I_CLEAR (1 << 8) #define I_REFERENCED (1 << 9) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) #define I_WB_SWITCH (1 << 12) #define I_OVL_INUSE (1 << 13) #define I_CREATING (1 << 14) #define I_DONTCACHE (1 << 15) #define I_SYNC_QUEUED (1 << 16) #define I_PINNING_NETFS_WB (1 << 17) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) extern void __mark_inode_dirty(struct inode *, int); static inline void mark_inode_dirty(struct inode *inode) { __mark_inode_dirty(inode, I_DIRTY); } static inline void mark_inode_dirty_sync(struct inode *inode) { __mark_inode_dirty(inode, I_DIRTY_SYNC); } /* * Returns true if the given inode itself only has dirty timestamps (its pages * may still be dirty) and isn't currently being allocated or freed. * Filesystems should call this if when writing an inode when lazytime is * enabled, they want to opportunistically write the timestamps of other inodes * located very nearby on-disk, e.g. in the same inode block. This returns true * if the given inode is in need of such an opportunistic update. Requires * i_lock, or at least later re-checking under i_lock. */ static inline bool inode_is_dirtytime_only(struct inode *inode) { return (inode->i_state & (I_DIRTY_TIME | I_NEW | I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME; } extern void inc_nlink(struct inode *inode); extern void drop_nlink(struct inode *inode); extern void clear_nlink(struct inode *inode); extern void set_nlink(struct inode *inode, unsigned int nlink); static inline void inode_inc_link_count(struct inode *inode) { inc_nlink(inode); mark_inode_dirty(inode); } static inline void inode_dec_link_count(struct inode *inode) { drop_nlink(inode); mark_inode_dirty(inode); } enum file_time_flags { S_ATIME = 1, S_MTIME = 2, S_CTIME = 4, S_VERSION = 8, }; extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); int inode_update_time(struct inode *inode, int flags); static inline void file_accessed(struct file *file) { if (!(file->f_flags & O_NOATIME)) touch_atime(&file->f_path); } extern int file_modified(struct file *file); int kiocb_modified(struct kiocb *iocb); int sync_inode_metadata(struct inode *inode, int wait); struct file_system_type { const char *name; int fs_flags; #define FS_REQUIRES_DEV 1 #define FS_BINARY_MOUNTDATA 2 #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ #define FS_MGTIME 64 /* FS uses multigrain timestamps */ #define FS_LBS 128 /* FS supports LBS */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; struct hlist_head fs_supers; struct lock_class_key s_lock_key; struct lock_class_key s_umount_key; struct lock_class_key s_vfs_rename_key; struct lock_class_key s_writers_key[SB_FREEZE_LEVELS]; struct lock_class_key i_lock_key; struct lock_class_key i_mutex_key; struct lock_class_key invalidate_lock_key; struct lock_class_key i_mutex_dir_key; }; #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) /** * is_mgtime: is this inode using multigrain timestamps * @inode: inode to test for multigrain timestamps * * Return true if the inode uses multigrain timestamps, false otherwise. */ static inline bool is_mgtime(const struct inode *inode) { return inode->i_opflags & IOP_MGTIME; } extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_nodev(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void retire_super(struct super_block *sb); void generic_shutdown_super(struct super_block *sb); void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); int set_anon_super_fc(struct super_block *s, struct fs_context *fc); int get_anon_bdev(dev_t *); void free_anon_bdev(dev_t); struct super_block *sget_fc(struct fs_context *fc, int (*test)(struct super_block *, struct fs_context *), int (*set)(struct super_block *, struct fs_context *)); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), int flags, void *data); struct super_block *sget_dev(struct fs_context *fc, dev_t dev); /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) ({ \ const struct file_operations *_fops = (fops); \ (((_fops) && try_module_get((_fops)->owner) ? (_fops) : NULL)); \ }) #define fops_put(fops) ({ \ const struct file_operations *_fops = (fops); \ if (_fops) \ module_put((_fops)->owner); \ }) /* * This one is to be used *ONLY* from ->open() instances. * fops must be non-NULL, pinned down *and* module dependencies * should be sufficient to pin the caller down as well. */ #define replace_fops(f, fops) \ do { \ struct file *__file = (f); \ fops_put(__file->f_op); \ BUG_ON(!(__file->f_op = (fops))); \ } while(0) extern int register_filesystem(struct file_system_type *); extern int unregister_filesystem(struct file_system_type *); extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); int freeze_super(struct super_block *super, enum freeze_holder who, const void *freeze_owner); int thaw_super(struct super_block *super, enum freeze_holder who, const void *freeze_owner); extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); static inline void super_set_uuid(struct super_block *sb, const u8 *uuid, unsigned len) { if (WARN_ON(len > sizeof(sb->s_uuid))) len = sizeof(sb->s_uuid); sb->s_uuid_len = len; memcpy(&sb->s_uuid, uuid, len); } /* set sb sysfs name based on sb->s_bdev */ static inline void super_set_sysfs_name_bdev(struct super_block *sb) { snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pg", sb->s_bdev); } /* set sb sysfs name based on sb->s_uuid */ static inline void super_set_sysfs_name_uuid(struct super_block *sb) { WARN_ON(sb->s_uuid_len != sizeof(sb->s_uuid)); snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pU", sb->s_uuid.b); } /* set sb sysfs name based on sb->s_id */ static inline void super_set_sysfs_name_id(struct super_block *sb) { strscpy(sb->s_sysfs_name, sb->s_id, sizeof(sb->s_sysfs_name)); } /* try to use something standard before you use this */ __printf(2, 3) static inline void super_set_sysfs_name_generic(struct super_block *sb, const char *fmt, ...) { va_list args; va_start(args, fmt); vsnprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), fmt, args); va_end(args); } extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); int inode_update_timestamps(struct inode *inode, int flags); int generic_update_time(struct inode *, int); /* /sys/fs */ extern struct kobject *fs_kobj; #define MAX_RW_COUNT (INT_MAX & PAGE_MASK) /* fs/open.c */ struct audit_names; struct filename { const char *name; /* pointer to actual string */ const __user char *uptr; /* original userland pointer */ atomic_t refcnt; struct audit_names *aname; const char iname[]; }; static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); static inline struct mnt_idmap *file_mnt_idmap(const struct file *file) { return mnt_idmap(file->f_path.mnt); } /** * is_idmapped_mnt - check whether a mount is mapped * @mnt: the mount to check * * If @mnt has an non @nop_mnt_idmap attached to it then @mnt is mapped. * * Return: true if mount is mapped, false if not. */ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) { return mnt_idmap(mnt) != &nop_mnt_idmap; } int vfs_truncate(const struct path *, loff_t); int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len); int do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode); extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(const struct path *, const char *, int, umode_t); static inline struct file *file_open_root_mnt(struct vfsmount *mnt, const char *name, int flags, umode_t mode) { return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root}, name, flags, mode); } struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); struct file *dentry_open_nonotify(const struct path *path, int flags, const struct cred *cred); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); struct path *backing_file_user_path(struct file *f); /* * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file * stored in ->vm_file is a backing file whose f_inode is on the underlying * filesystem. When the mapped file path and inode number are displayed to * user (e.g. via /proc/<pid>/maps), these helpers should be used to get the * path and inode number to display to the user, which is the path of the fd * that user has requested to map and the inode number that would be returned * by fstat() on that same fd. */ /* Get the path to display in /proc/<pid>/maps */ static inline const struct path *file_user_path(struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) return backing_file_user_path(f); return &f->f_path; } /* Get the inode whose inode number to display in /proc/<pid>/maps */ static inline const struct inode *file_user_inode(struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) return d_inode(backing_file_user_path(f)->dentry); return file_inode(f); } static inline struct file *file_clone_open(struct file *file) { return dentry_open(&file->f_path, file->f_flags, file->f_cred); } extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname_flags(const char __user *, int); extern struct filename *getname_uflags(const char __user *, int); static inline struct filename *getname(const char __user *name) { return getname_flags(name, 0); } extern struct filename *getname_kernel(const char *); extern struct filename *__getname_maybe_null(const char __user *); static inline struct filename *getname_maybe_null(const char __user *name, int flags) { if (!(flags & AT_EMPTY_PATH)) return getname(name); if (!name) return NULL; return __getname_maybe_null(name); } extern void putname(struct filename *name); DEFINE_FREE(putname, struct filename *, if (!IS_ERR_OR_NULL(_T)) putname(_T)) static inline struct filename *refname(struct filename *name) { atomic_inc(&name->refcnt); return name; } extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern int finish_no_open(struct file *file, struct dentry *dentry); /* Helper for the simple case when original dentry is used */ static inline int finish_open_simple(struct file *file, int error) { if (error) return error; return finish_open(file, file->f_path.dentry, NULL); } /* fs/dcache.c */ extern void __init vfs_caches_init_early(void); extern void __init vfs_caches_init(void); extern struct kmem_cache *names_cachep; #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) extern struct super_block *blockdev_superblock; static inline bool sb_is_blkdev_sb(struct super_block *sb) { return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock; } void emergency_thaw_all(void); extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; /* fs/char_dev.c */ #define CHRDEV_MAJOR_MAX 512 /* Marks the bottom of the first segment of free char majors */ #define CHRDEV_MAJOR_DYN_END 234 /* Marks the top and bottom of the second segment of free char majors */ #define CHRDEV_MAJOR_DYN_EXT_START 511 #define CHRDEV_MAJOR_DYN_EXT_END 384 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); extern int __register_chrdev(unsigned int major, unsigned int baseminor, unsigned int count, const char *name, const struct file_operations *fops); extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, unsigned int count, const char *name); extern void unregister_chrdev_region(dev_t, unsigned); extern void chrdev_show(struct seq_file *,off_t); static inline int register_chrdev(unsigned int major, const char *name, const struct file_operations *fops) { return __register_chrdev(major, 0, 256, name, fops); } static inline void unregister_chrdev(unsigned int major, const char *name) { __unregister_chrdev(major, 0, 256, name); } extern void init_special_inode(struct inode *, umode_t, dev_t); /* Invalid inode operations -- fs/bad_inode.c */ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, loff_t lend); extern int __must_check file_check_and_advance_wb_err(struct file *file); extern int __must_check file_write_and_wait_range(struct file *file, loff_t start, loff_t end); int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start, loff_t end); static inline int file_write_and_wait(struct file *file) { return file_write_and_wait_range(file, 0, LLONG_MAX); } extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, unsigned int flags); static inline bool iocb_is_dsync(const struct kiocb *iocb) { return (iocb->ki_flags & IOCB_DSYNC) || IS_SYNC(iocb->ki_filp->f_mapping->host); } /* * Sync the bytes written if this was a synchronous write. Expect ki_pos * to already be updated for the write, and will return either the amount * of bytes passed in, or an error if syncing the file failed. */ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) { if (iocb_is_dsync(iocb)) { int ret = vfs_fsync_range(iocb->ki_filp, iocb->ki_pos - count, iocb->ki_pos - 1, (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); if (ret) return ret; } else if (iocb->ki_flags & IOCB_DONTCACHE) { struct address_space *mapping = iocb->ki_filp->f_mapping; filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count, iocb->ki_pos - 1); } return count; } extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK extern int bmap(struct inode *inode, sector_t *block); #else static inline int bmap(struct inode *inode, sector_t *block) { return -EINVAL; } #endif int notify_change(struct mnt_idmap *, struct dentry *, struct iattr *, struct inode **); int inode_permission(struct mnt_idmap *, struct inode *, int); int generic_permission(struct mnt_idmap *, struct inode *, int); static inline int file_permission(struct file *file, int mask) { return inode_permission(file_mnt_idmap(file), file_inode(file), mask); } static inline int path_permission(const struct path *path, int mask) { return inode_permission(mnt_idmap(path->mnt), d_inode(path->dentry), mask); } int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode); static inline bool execute_ok(struct inode *inode) { return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); } static inline bool inode_wrong_type(const struct inode *inode, umode_t mode) { return (inode->i_mode ^ mode) & S_IFMT; } /** * file_start_write - get write access to a superblock for regular file io * @file: the file we want to write to * * This is a variant of sb_start_write() which is a noop on non-regualr file. * Should be matched with a call to file_end_write(). */ static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; sb_start_write(file_inode(file)->i_sb); } static inline bool file_start_write_trylock(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; return sb_start_write_trylock(file_inode(file)->i_sb); } /** * file_end_write - drop write access to a superblock of a regular file * @file: the file we wrote to * * Should be matched with a call to file_start_write(). */ static inline void file_end_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; sb_end_write(file_inode(file)->i_sb); } /** * kiocb_start_write - get write access to a superblock for async file io * @iocb: the io context we want to submit the write with * * This is a variant of sb_start_write() for async io submission. * Should be matched with a call to kiocb_end_write(). */ static inline void kiocb_start_write(struct kiocb *iocb) { struct inode *inode = file_inode(iocb->ki_filp); sb_start_write(inode->i_sb); /* * Fool lockdep by telling it the lock got released so that it * doesn't complain about the held lock when we return to userspace. */ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); } /** * kiocb_end_write - drop write access to a superblock after async file io * @iocb: the io context we sumbitted the write with * * Should be matched with a call to kiocb_start_write(). */ static inline void kiocb_end_write(struct kiocb *iocb) { struct inode *inode = file_inode(iocb->ki_filp); /* * Tell lockdep we inherited freeze protection from submission thread. */ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); sb_end_write(inode->i_sb); } /* * This is used for regular files where some users -- especially the * currently executed binary in a process, previously handled via * VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap * read-write shared) accesses. * * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. * deny_write_access() denies write access to a file. * allow_write_access() re-enables write access to a file. * * The i_writecount field of an inode can have the following values: * 0: no write access, no denied write access * < 0: (-i_writecount) users that denied write access to the file. * > 0: (i_writecount) users that have write access to the file. * * Normally we operate on that counter with atomic_{inc,dec} and it's safe * except for the cases where we don't hold i_writecount yet. Then we need to * use {get,deny}_write_access() - these functions check the sign and refuse * to do the change if sign is wrong. */ static inline int get_write_access(struct inode *inode) { return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY; } static inline int deny_write_access(struct file *file) { struct inode *inode = file_inode(file); return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY; } static inline void put_write_access(struct inode * inode) { atomic_dec(&inode->i_writecount); } static inline void allow_write_access(struct file *file) { if (file) atomic_inc(&file_inode(file)->i_writecount); } /* * Do not prevent write to executable file when watched by pre-content events. * * Note that FMODE_FSNOTIFY_HSM mode is set depending on pre-content watches at * the time of file open and remains constant for entire lifetime of the file, * so if pre-content watches are added post execution or removed before the end * of the execution, it will not cause i_writecount reference leak. */ static inline int exe_file_deny_write_access(struct file *exe_file) { if (unlikely(FMODE_FSNOTIFY_HSM(exe_file->f_mode))) return 0; return deny_write_access(exe_file); } static inline void exe_file_allow_write_access(struct file *exe_file) { if (unlikely(!exe_file || FMODE_FSNOTIFY_HSM(exe_file->f_mode))) return; allow_write_access(exe_file); } static inline void file_set_fsnotify_mode(struct file *file, fmode_t mode) { file->f_mode &= ~FMODE_FSNOTIFY_MASK; file->f_mode |= mode; } static inline bool inode_is_open_for_write(const struct inode *inode) { return atomic_read(&inode->i_writecount) > 0; } #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) static inline void i_readcount_dec(struct inode *inode) { BUG_ON(atomic_dec_return(&inode->i_readcount) < 0); } static inline void i_readcount_inc(struct inode *inode) { atomic_inc(&inode->i_readcount); } #else static inline void i_readcount_dec(struct inode *inode) { return; } static inline void i_readcount_inc(struct inode *inode) { return; } #endif extern int do_pipe_flags(int *, int); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos); extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *); extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ extern bool is_subdir(struct dentry *, struct dentry *); extern bool path_is_under(const struct path *, const struct path *); extern char *file_path(struct file *, char *, int); /** * is_dot_dotdot - returns true only if @name is "." or ".." * @name: file name to check * @len: length of file name, in bytes */ static inline bool is_dot_dotdot(const char *name, size_t len) { return len && unlikely(name[0] == '.') && (len == 1 || (len == 2 && name[1] == '.')); } #include <linux/err.h> /* needed for stackable file system support */ extern loff_t default_llseek(struct file *file, loff_t offset, int whence); extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence); extern int inode_init_always_gfp(struct super_block *, struct inode *, gfp_t); static inline int inode_init_always(struct super_block *sb, struct inode *inode) { return inode_init_always_gfp(sb, inode, GFP_NOFS); } extern void inode_init_once(struct inode *); extern void address_space_init_once(struct address_space *mapping); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern int generic_delete_inode(struct inode *inode); static inline int generic_drop_inode(struct inode *inode) { return !inode->i_nlink || inode_unhashed(inode); } extern void d_mark_dontcache(struct inode *inode); extern struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data); extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data); extern struct inode *ilookup(struct super_block *sb, unsigned long ino); extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data); struct inode *iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); struct inode *iget5_locked_rcu(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); extern struct inode * iget_locked(struct super_block *, unsigned long); extern struct inode *find_inode_nowait(struct super_block *, unsigned long, int (*match)(struct inode *, unsigned long, void *), void *data); extern struct inode *find_inode_rcu(struct super_block *, unsigned long, int (*)(struct inode *, void *), void *); extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked(struct inode *); #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void lockdep_annotate_inode_mutex_key(struct inode *inode); #else static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; #endif extern void unlock_new_inode(struct inode *); extern void discard_new_inode(struct inode *); extern unsigned int get_next_ino(void); extern void evict_inodes(struct super_block *sb); void dump_mapping(const struct address_space *); /* * Userspace may rely on the inode number being non-zero. For example, glibc * simply ignores files with zero i_ino in unlink() and other places. * * As an additional complication, if userspace was compiled with * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the * lower 32 bits, so we need to check that those aren't zero explicitly. With * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but * better safe than sorry. */ static inline bool is_zero_ino(ino_t ino) { return (u32)ino == 0; } /* * inode->i_lock must be held */ static inline void __iget(struct inode *inode) { atomic_inc(&inode->i_count); } extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void __destroy_inode(struct inode *); struct inode *alloc_inode(struct super_block *sb); static inline struct inode *new_inode_pseudo(struct super_block *sb) { return alloc_inode(sb); } extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); extern int file_remove_privs_flags(struct file *file, unsigned int flags); extern int file_remove_privs(struct file *); int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode); /* * This must be used for allocating filesystems specific inodes to set * up the inode reclaim context correctly. */ #define alloc_inode_sb(_sb, _cache, _gfp) kmem_cache_alloc_lru(_cache, &_sb->s_inode_lru, _gfp) extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } extern void __remove_inode_hash(struct inode *); static inline void remove_inode_hash(struct inode *inode) { if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash)) __remove_inode_hash(inode); } extern void inode_sb_list_add(struct inode *inode); extern void inode_add_lru(struct inode *inode); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); int generic_write_checks_count(struct kiocb *iocb, loff_t *count); extern int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to, ssize_t already_read); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); ssize_t generic_perform_write(struct kiocb *, struct iov_iter *); ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter, ssize_t direct_written, ssize_t buffered_written); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, struct iov_iter *iter); ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, struct iov_iter *iter); /* fs/splice.c */ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); ssize_t copy_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence, u64 *cookie); extern loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size); extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); extern loff_t no_seek_end_llseek(struct file *, loff_t, int); int rw_verify_area(int, struct file *, const loff_t *, size_t); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); extern int stream_open(struct inode * inode, struct file * filp); #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(struct bio *bio, struct inode *inode, loff_t file_offset); enum { /* need locking between buffered and direct access */ DIO_LOCKING = 0x01, /* filesystem does not support filling holes */ DIO_SKIP_HOLES = 0x02, }; ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, get_block_t get_block, dio_iodone_t end_io, int flags); static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct iov_iter *iter, get_block_t get_block) { return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, get_block, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } #endif bool inode_dio_finished(const struct inode *inode); void inode_dio_wait(struct inode *inode); void inode_dio_wait_interruptible(struct inode *inode); /** * inode_dio_begin - signal start of a direct I/O requests * @inode: inode the direct I/O happens on * * This is called once we've finished processing a direct I/O request, * and is used to wake up callers waiting for direct I/O to be quiesced. */ static inline void inode_dio_begin(struct inode *inode) { atomic_inc(&inode->i_dio_count); } /** * inode_dio_end - signal finish of a direct I/O requests * @inode: inode the direct I/O happens on * * This is called once we've finished processing a direct I/O request, * and is used to wake up callers waiting for direct I/O to be quiesced. */ static inline void inode_dio_end(struct inode *inode) { if (atomic_dec_and_test(&inode->i_dio_count)) wake_up_var(&inode->i_dio_count); } extern void inode_set_flags(struct inode *inode, unsigned int flags, unsigned int mask); extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) extern int readlink_copy(char __user *, int, const char *, int); extern int page_readlink(struct dentry *, char __user *, int); extern const char *page_get_link_raw(struct dentry *, struct inode *, struct delayed_call *); extern const char *page_get_link(struct dentry *, struct inode *, struct delayed_call *); extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, unsigned int unit_max, unsigned int unit_max_opt); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); void __inode_sub_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); static inline loff_t __inode_get_bytes(struct inode *inode) { return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes; } loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); const char *simple_get_link(struct dentry *, struct inode *, struct delayed_call *); extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags); int vfs_fstat(int fd, struct kstat *stat); static inline int vfs_stat(const char __user *filename, struct kstat *stat) { return vfs_fstatat(AT_FDCWD, filename, stat, 0); } static inline int vfs_lstat(const char __user *name, struct kstat *stat) { return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); } extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int); extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); void filesystems_freeze(void); void filesystems_thaw(void); extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, struct dir_context *); extern int simple_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern int simple_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename(struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); extern int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, void **fsdata); extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; extern void make_empty_dir_inode(struct inode *inode); extern bool is_empty_dir_inode(struct inode *inode); struct tree_descr { const char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, unsigned long, const struct tree_descr *); extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); struct offset_ctx { struct maple_tree mt; unsigned long next_offset; }; void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); void simple_offset_destroy(struct offset_ctx *octx); extern const struct file_operations simple_offset_dir_operations; extern int __generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); extern void generic_set_sb_d_ops(struct super_block *sb); extern int generic_ci_match(const struct inode *parent, const struct qstr *name, const struct qstr *folded_name, const u8 *de_name, u32 de_name_len); #if IS_ENABLED(CONFIG_UNICODE) int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); /** * generic_ci_validate_strict_name - Check if a given name is suitable * for a directory * * This functions checks if the proposed filename is valid for the * parent directory. That means that only valid UTF-8 filenames will be * accepted for casefold directories from filesystems created with the * strict encoding flag. That also means that any name will be * accepted for directories that doesn't have casefold enabled, or * aren't being strict with the encoding. * * @dir: inode of the directory where the new file will be created * @name: name of the new file * * Return: * * True: if the filename is suitable for this directory. It can be * true if a given name is not suitable for a strict encoding * directory, but the directory being used isn't strict * * False if the filename isn't suitable for this directory. This only * happens when a directory is casefolded and the filesystem is strict * about its encoding. */ static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name) { if (!IS_CASEFOLDED(dir) || !sb_has_strict_encoding(dir->i_sb)) return true; /* * A casefold dir must have a encoding set, unless the filesystem * is corrupted */ if (WARN_ON_ONCE(!dir->i_sb->s_encoding)) return true; return !utf8_validate(dir->i_sb->s_encoding, name); } #else static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name) { return true; } #endif static inline bool sb_has_encoding(const struct super_block *sb) { #if IS_ENABLED(CONFIG_UNICODE) return !!sb->s_encoding; #else return false; #endif } int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid); int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); void setattr_copy(struct mnt_idmap *, struct inode *inode, const struct iattr *attr); extern int file_update_time(struct file *file); static inline bool vma_is_dax(const struct vm_area_struct *vma) { return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); } static inline bool vma_is_fsdax(struct vm_area_struct *vma) { struct inode *inode; if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file) return false; if (!vma_is_dax(vma)) return false; inode = file_inode(vma->vm_file); if (S_ISCHR(inode->i_mode)) return false; /* device-dax */ return true; } static inline int iocb_flags(struct file *file) { int res = 0; if (file->f_flags & O_APPEND) res |= IOCB_APPEND; if (file->f_flags & O_DIRECT) res |= IOCB_DIRECT; if (file->f_flags & O_DSYNC) res |= IOCB_DSYNC; if (file->f_flags & __O_SYNC) res |= IOCB_SYNC; return res; } static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags, int rw_type) { int kiocb_flags = 0; /* make sure there's no overlap between RWF and private IOCB flags */ BUILD_BUG_ON((__force int) RWF_SUPPORTED & IOCB_EVENTFD); if (!flags) return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND))) return -EINVAL; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; } if (flags & RWF_ATOMIC) { if (rw_type != WRITE) return -EOPNOTSUPP; if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE)) return -EOPNOTSUPP; } if (flags & RWF_DONTCACHE) { /* file system must support it */ if (!(ki->ki_filp->f_op->fop_flags & FOP_DONTCACHE)) return -EOPNOTSUPP; /* DAX mappings not supported */ if (IS_DAX(ki->ki_filp->f_mapping->host)) return -EOPNOTSUPP; } kiocb_flags |= (__force int) (flags & RWF_SUPPORTED); if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) { if (IS_APPEND(file_inode(ki->ki_filp))) return -EPERM; ki->ki_flags &= ~IOCB_APPEND; } ki->ki_flags |= kiocb_flags; return 0; } /* Transaction based IO helpers */ /* * An argresp is stored in an allocated page and holds the * size of the argument or response, along with its content */ struct simple_transaction_argresp { ssize_t size; char data[]; }; #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) char *simple_transaction_get(struct file *file, const char __user *buf, size_t size); ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos); int simple_transaction_release(struct inode *inode, struct file *file); void simple_transaction_set(struct file *file, size_t n); /* * simple attribute files * * These attributes behave similar to those in sysfs: * * Writing to an attribute immediately sets a value, an open file can be * written to multiple times. * * Reading from an attribute creates a buffer from the value that might get * read with multiple read calls. When the attribute has been read * completely, no further read calls are possible until the file is opened * again. * * All attributes contain a text representation of a numeric value * that are accessed with the get() and set() functions. */ #define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \ static int __fops ## _open(struct inode *inode, struct file *file) \ { \ __simple_attr_check_format(__fmt, 0ull); \ return simple_attr_open(inode, file, __get, __set, __fmt); \ } \ static const struct file_operations __fops = { \ .owner = THIS_MODULE, \ .open = __fops ## _open, \ .release = simple_attr_release, \ .read = simple_attr_read, \ .write = (__is_signed) ? simple_attr_write_signed : simple_attr_write, \ .llseek = generic_file_llseek, \ } #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false) #define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true) static inline __printf(1, 2) void __simple_attr_check_format(const char *fmt, ...) { /* don't do anything, just let the compiler check the arguments; */ } int simple_attr_open(struct inode *inode, struct file *file, int (*get)(void *, u64 *), int (*set)(void *, u64), const char *fmt); int simple_attr_release(struct inode *inode, struct file *file); ssize_t simple_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, size_t len, loff_t *ppos); struct ctl_table; int __init list_bdev_fs_names(char *buf, size_t size); #define __FMODE_EXEC ((__force int) FMODE_EXEC) #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) #define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE)) static inline bool is_sxid(umode_t mode) { return mode & (S_ISUID | S_ISGID); } static inline int check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode) { if (!(dir->i_mode & S_ISVTX)) return 0; return __check_sticky(idmap, dir, inode); } static inline void inode_has_no_xattr(struct inode *inode) { if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & SB_NOSEC)) inode->i_flags |= S_NOSEC; } static inline bool is_root_inode(struct inode *inode) { return inode == inode->i_sb->s_root->d_inode; } static inline bool dir_emit(struct dir_context *ctx, const char *name, int namelen, u64 ino, unsigned type) { return ctx->actor(ctx, name, namelen, ctx->pos, ino, type); } static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) { return ctx->actor(ctx, ".", 1, ctx->pos, file->f_path.dentry->d_inode->i_ino, DT_DIR); } static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx) { return ctx->actor(ctx, "..", 2, ctx->pos, d_parent_ino(file->f_path.dentry), DT_DIR); } static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) { if (ctx->pos == 0) { if (!dir_emit_dot(file, ctx)) return false; ctx->pos = 1; } if (ctx->pos == 1) { if (!dir_emit_dotdot(file, ctx)) return false; ctx->pos = 2; } return true; } static inline bool dir_relax(struct inode *inode) { inode_unlock(inode); inode_lock(inode); return !IS_DEADDIR(inode); } static inline bool dir_relax_shared(struct inode *inode) { inode_unlock_shared(inode); inode_lock_shared(inode); return !IS_DEADDIR(inode); } extern bool path_noexec(const struct path *path); extern void inode_nohighmem(struct inode *inode); /* mm/fadvise.c */ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice); extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); static inline bool vfs_empty_path(int dfd, const char __user *path) { char c; if (dfd < 0) return false; /* We now allow NULL to be used for empty path. */ if (!path) return true; if (unlikely(get_user(c, path))) return false; return !c; } int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter); #endif /* _LINUX_FS_H */
20 192 173 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _DELAYED_CALL_H #define _DELAYED_CALL_H /* * Poor man's closures; I wish we could've done them sanely polymorphic, * but... */ struct delayed_call { void (*fn)(void *); void *arg; }; #define DEFINE_DELAYED_CALL(name) struct delayed_call name = {NULL, NULL} /* I really wish we had closures with sane typechecking... */ static inline void set_delayed_call(struct delayed_call *call, void (*fn)(void *), void *arg) { call->fn = fn; call->arg = arg; } static inline void do_delayed_call(struct delayed_call *call) { if (call->fn) call->fn(call->arg); } static inline void clear_delayed_call(struct delayed_call *call) { call->fn = NULL; } #endif
136 136 136 136 136 136 136 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2019 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_trace.h" #include "xfs_sysctl.h" #include "xfs_pwork.h" #include <linux/nmi.h> /* * Parallel Work Queue * =================== * * Abstract away the details of running a large and "obviously" parallelizable * task across multiple CPUs. Callers initialize the pwork control object with * a desired level of parallelization and a work function. Next, they embed * struct xfs_pwork in whatever structure they use to pass work context to a * worker thread and queue that pwork. The work function will be passed the * pwork item when it is run (from process context) and any returned error will * be recorded in xfs_pwork_ctl.error. Work functions should check for errors * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not * stop workqueue item processing. * * This is the rough equivalent of the xfsprogs workqueue code, though we can't * reuse that name here. */ /* Invoke our caller's function. */ static void xfs_pwork_work( struct work_struct *work) { struct xfs_pwork *pwork; struct xfs_pwork_ctl *pctl; int error; pwork = container_of(work, struct xfs_pwork, work); pctl = pwork->pctl; error = pctl->work_fn(pctl->mp, pwork); if (error && !pctl->error) pctl->error = error; if (atomic_dec_and_test(&pctl->nr_work)) wake_up(&pctl->poll_wait); } /* * Set up control data for parallel work. @work_fn is the function that will * be called. @tag will be written into the kernel threads. @nr_threads is * the level of parallelism desired, or 0 for no limit. */ int xfs_pwork_init( struct xfs_mount *mp, struct xfs_pwork_ctl *pctl, xfs_pwork_work_fn work_fn, const char *tag) { unsigned int nr_threads = 0; #ifdef DEBUG if (xfs_globals.pwork_threads >= 0) nr_threads = xfs_globals.pwork_threads; #endif trace_xfs_pwork_init(mp, nr_threads, current->pid); pctl->wq = alloc_workqueue("%s-%d", WQ_UNBOUND | WQ_SYSFS | WQ_FREEZABLE, nr_threads, tag, current->pid); if (!pctl->wq) return -ENOMEM; pctl->work_fn = work_fn; pctl->error = 0; pctl->mp = mp; atomic_set(&pctl->nr_work, 0); init_waitqueue_head(&pctl->poll_wait); return 0; } /* Queue some parallel work. */ void xfs_pwork_queue( struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork) { INIT_WORK(&pwork->work, xfs_pwork_work); pwork->pctl = pctl; atomic_inc(&pctl->nr_work); queue_work(pctl->wq, &pwork->work); } /* Wait for the work to finish and tear down the control structure. */ int xfs_pwork_destroy( struct xfs_pwork_ctl *pctl) { destroy_workqueue(pctl->wq); pctl->wq = NULL; return pctl->error; } /* * Wait for the work to finish by polling completion status and touch the soft * lockup watchdog. This is for callers such as mount which hold locks. */ void xfs_pwork_poll( struct xfs_pwork_ctl *pctl) { while (wait_event_timeout(pctl->poll_wait, atomic_read(&pctl->nr_work) == 0, HZ) == 0) touch_softlockup_watchdog(); }
5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 /* SPDX-License-Identifier: GPL-2.0-only */ /* * vivid-core.h - core datastructures * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #ifndef _VIVID_CORE_H_ #define _VIVID_CORE_H_ #include <linux/fb.h> #include <linux/workqueue.h> #include <media/cec.h> #include <media/videobuf2-v4l2.h> #include <media/v4l2-device.h> #include <media/v4l2-dev.h> #include <media/v4l2-ctrls.h> #include <media/tpg/v4l2-tpg.h> #include "vivid-rds-gen.h" #include "vivid-vbi-gen.h" #define dprintk(dev, level, fmt, arg...) \ v4l2_dbg(level, vivid_debug, &dev->v4l2_dev, fmt, ## arg) /* The maximum number of inputs */ #define MAX_INPUTS 16 /* The maximum number of outputs */ #define MAX_OUTPUTS 16 /* The maximum number of video capture buffers */ #define MAX_VID_CAP_BUFFERS 64 /* The maximum up or down scaling factor is 4 */ #define MAX_ZOOM 4 /* The maximum image width/height are set to 4K DMT */ #define MAX_WIDTH 4096 #define MAX_HEIGHT 2160 /* The minimum image width/height */ #define MIN_WIDTH 16 #define MIN_HEIGHT MIN_WIDTH /* Pixel Array control divider */ #define PIXEL_ARRAY_DIV MIN_WIDTH /* The data_offset of plane 0 for the multiplanar formats */ #define PLANE0_DATA_OFFSET 128 /* The supported TV frequency range in MHz */ #define MIN_TV_FREQ (44U * 16U) #define MAX_TV_FREQ (958U * 16U) /* The number of samples returned in every SDR buffer */ #define SDR_CAP_SAMPLES_PER_BUF 0x4000 /* used by the threads to know when to resync internal counters */ #define JIFFIES_PER_DAY (3600U * 24U * HZ) #define JIFFIES_RESYNC (JIFFIES_PER_DAY * (0xf0000000U / JIFFIES_PER_DAY)) /* * Maximum number of HDMI inputs allowed by vivid, due to limitations * of the Physical Address in the EDID and used by CEC we stop at 15 * inputs and outputs. */ #define MAX_HDMI_INPUTS 15 #define MAX_HDMI_OUTPUTS 15 /* Maximum number of S-Video inputs allowed by vivid */ #define MAX_SVID_INPUTS 16 /* The maximum number of items in a menu control */ #define MAX_MENU_ITEMS BITS_PER_LONG_LONG /* Number of fixed menu items in the 'Connected To' menu controls */ #define FIXED_MENU_ITEMS 2 /* The maximum number of vivid devices */ #define VIVID_MAX_DEVS CONFIG_VIDEO_VIVID_MAX_DEVS extern const struct v4l2_rect vivid_min_rect; extern const struct v4l2_rect vivid_max_rect; extern unsigned vivid_debug; /* * NULL-terminated string array for the HDMI 'Connected To' menu controls * with the list of possible HDMI outputs. * * The first two items are fixed ("TPG" and "None"). */ extern char *vivid_ctrl_hdmi_to_output_strings[1 + MAX_MENU_ITEMS]; /* Menu control skip mask of all HDMI outputs that are in use */ extern u64 hdmi_to_output_menu_skip_mask; /* * Bitmask of which vivid instances need to update any connected * HDMI outputs. */ extern u64 hdmi_input_update_outputs_mask; /* * Spinlock for access to hdmi_to_output_menu_skip_mask and * hdmi_input_update_outputs_mask. */ extern spinlock_t hdmi_output_skip_mask_lock; /* * Workqueue that updates the menu controls whenever the HDMI menu skip mask * changes. */ extern struct workqueue_struct *update_hdmi_ctrls_workqueue; /* * The HDMI menu control value (index in the menu list) maps to an HDMI * output that is part of the given vivid_dev instance and has the given * output index (as returned by VIDIOC_G_OUTPUT). * * NULL/0 if not available. */ extern struct vivid_dev *vivid_ctrl_hdmi_to_output_instance[MAX_MENU_ITEMS]; extern unsigned int vivid_ctrl_hdmi_to_output_index[MAX_MENU_ITEMS]; /* * NULL-terminated string array for the S-Video 'Connected To' menu controls * with the list of possible S-Video outputs. * * The first two items are fixed ("TPG" and "None"). */ extern char *vivid_ctrl_svid_to_output_strings[1 + MAX_MENU_ITEMS]; /* Menu control skip mask of all S-Video outputs that are in use */ extern u64 svid_to_output_menu_skip_mask; /* Spinlock for access to svid_to_output_menu_skip_mask */ extern spinlock_t svid_output_skip_mask_lock; /* * Workqueue that updates the menu controls whenever the S-Video menu skip mask * changes. */ extern struct workqueue_struct *update_svid_ctrls_workqueue; /* * The S-Video menu control value (index in the menu list) maps to an S-Video * output that is part of the given vivid_dev instance and has the given * output index (as returned by VIDIOC_G_OUTPUT). * * NULL/0 if not available. */ extern struct vivid_dev *vivid_ctrl_svid_to_output_instance[MAX_MENU_ITEMS]; extern unsigned int vivid_ctrl_svid_to_output_index[MAX_MENU_ITEMS]; extern struct vivid_dev *vivid_devs[VIVID_MAX_DEVS]; extern unsigned int n_devs; struct vivid_fmt { u32 fourcc; /* v4l2 format id */ enum tgp_color_enc color_enc; bool can_do_overlay; u8 vdownsampling[TPG_MAX_PLANES]; u32 alpha_mask; u8 planes; u8 buffers; u32 data_offset[TPG_MAX_PLANES]; u32 bit_depth[TPG_MAX_PLANES]; }; extern struct vivid_fmt vivid_formats[]; /* buffer for one video frame */ struct vivid_buffer { /* common v4l buffer stuff -- must be first */ struct vb2_v4l2_buffer vb; struct list_head list; }; enum vivid_input { WEBCAM, TV, SVID, HDMI, }; enum vivid_signal_mode { CURRENT_DV_TIMINGS, CURRENT_STD = CURRENT_DV_TIMINGS, NO_SIGNAL, NO_LOCK, OUT_OF_RANGE, SELECTED_DV_TIMINGS, SELECTED_STD = SELECTED_DV_TIMINGS, CYCLE_DV_TIMINGS, CYCLE_STD = CYCLE_DV_TIMINGS, CUSTOM_DV_TIMINGS, }; enum vivid_colorspace { VIVID_CS_170M, VIVID_CS_709, VIVID_CS_SRGB, VIVID_CS_OPRGB, VIVID_CS_2020, VIVID_CS_DCI_P3, VIVID_CS_240M, VIVID_CS_SYS_M, VIVID_CS_SYS_BG, }; #define VIVID_INVALID_SIGNAL(mode) \ ((mode) == NO_SIGNAL || (mode) == NO_LOCK || (mode) == OUT_OF_RANGE) struct vivid_cec_xfer { struct cec_adapter *adap; u8 msg[CEC_MAX_MSG_SIZE]; u32 len; u32 sft; }; struct vivid_dev { u8 inst; struct v4l2_device v4l2_dev; #ifdef CONFIG_MEDIA_CONTROLLER struct media_device mdev; struct media_pad vid_cap_pad; struct media_pad vid_out_pad; struct media_pad vbi_cap_pad; struct media_pad vbi_out_pad; struct media_pad sdr_cap_pad; struct media_pad meta_cap_pad; struct media_pad meta_out_pad; struct media_pad touch_cap_pad; #endif struct v4l2_ctrl_handler ctrl_hdl_user_gen; struct v4l2_ctrl_handler ctrl_hdl_user_vid; struct v4l2_ctrl_handler ctrl_hdl_user_aud; struct v4l2_ctrl_handler ctrl_hdl_streaming; struct v4l2_ctrl_handler ctrl_hdl_sdtv_cap; struct v4l2_ctrl_handler ctrl_hdl_loop_cap; struct v4l2_ctrl_handler ctrl_hdl_fb; struct video_device vid_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_vid_cap; struct video_device vid_out_dev; struct v4l2_ctrl_handler ctrl_hdl_vid_out; struct video_device vbi_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_vbi_cap; struct video_device vbi_out_dev; struct v4l2_ctrl_handler ctrl_hdl_vbi_out; struct video_device radio_rx_dev; struct v4l2_ctrl_handler ctrl_hdl_radio_rx; struct video_device radio_tx_dev; struct v4l2_ctrl_handler ctrl_hdl_radio_tx; struct video_device sdr_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_sdr_cap; struct video_device meta_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_meta_cap; struct video_device meta_out_dev; struct v4l2_ctrl_handler ctrl_hdl_meta_out; struct video_device touch_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_touch_cap; spinlock_t slock; struct mutex mutex; struct work_struct update_hdmi_ctrl_work; struct work_struct update_svid_ctrl_work; /* capabilities */ u32 vid_cap_caps; u32 vid_out_caps; u32 vbi_cap_caps; u32 vbi_out_caps; u32 sdr_cap_caps; u32 radio_rx_caps; u32 radio_tx_caps; u32 meta_cap_caps; u32 meta_out_caps; u32 touch_cap_caps; /* supported features */ bool multiplanar; u8 num_inputs; u8 num_hdmi_inputs; u8 num_svid_inputs; u8 input_type[MAX_INPUTS]; u8 input_name_counter[MAX_INPUTS]; u8 num_outputs; u8 num_hdmi_outputs; u8 output_type[MAX_OUTPUTS]; u8 output_name_counter[MAX_OUTPUTS]; bool has_audio_inputs; bool has_audio_outputs; bool has_vid_cap; bool has_vid_out; bool has_vbi_cap; bool has_raw_vbi_cap; bool has_sliced_vbi_cap; bool has_vbi_out; bool has_raw_vbi_out; bool has_sliced_vbi_out; bool has_radio_rx; bool has_radio_tx; bool has_sdr_cap; bool has_fb; bool has_meta_cap; bool has_meta_out; bool has_tv_tuner; bool has_touch_cap; /* Output index (0-MAX_OUTPUTS) to vivid instance of connected input */ struct vivid_dev *output_to_input_instance[MAX_OUTPUTS]; /* Output index (0-MAX_OUTPUTS) to input index (0-MAX_INPUTS) of connected input */ u8 output_to_input_index[MAX_OUTPUTS]; /* Output index (0-MAX_OUTPUTS) to HDMI or S-Video output index (0-MAX_HDMI/SVID_OUTPUTS) */ u8 output_to_iface_index[MAX_OUTPUTS]; /* ctrl_hdmi_to_output or ctrl_svid_to_output control value for each input */ s32 input_is_connected_to_output[MAX_INPUTS]; /* HDMI index (0-MAX_HDMI_OUTPUTS) to output index (0-MAX_OUTPUTS) */ u8 hdmi_index_to_output_index[MAX_HDMI_OUTPUTS]; /* HDMI index (0-MAX_HDMI_INPUTS) to input index (0-MAX_INPUTS) */ u8 hdmi_index_to_input_index[MAX_HDMI_INPUTS]; /* S-Video index (0-MAX_SVID_INPUTS) to input index (0-MAX_INPUTS) */ u8 svid_index_to_input_index[MAX_SVID_INPUTS]; /* controls */ struct v4l2_ctrl *brightness; struct v4l2_ctrl *contrast; struct v4l2_ctrl *saturation; struct v4l2_ctrl *hue; struct { /* autogain/gain cluster */ struct v4l2_ctrl *autogain; struct v4l2_ctrl *gain; }; struct v4l2_ctrl *volume; struct v4l2_ctrl *mute; struct v4l2_ctrl *alpha; struct v4l2_ctrl *button; struct v4l2_ctrl *boolean; struct v4l2_ctrl *int32; struct v4l2_ctrl *int64; struct v4l2_ctrl *menu; struct v4l2_ctrl *string; struct v4l2_ctrl *bitmask; struct v4l2_ctrl *int_menu; struct v4l2_ctrl *ro_int32; struct v4l2_ctrl *pixel_array; struct v4l2_ctrl *test_pattern; struct v4l2_ctrl *colorspace; struct v4l2_ctrl *rgb_range_cap; struct v4l2_ctrl *real_rgb_range_cap; struct { /* std_signal_mode/standard cluster */ struct v4l2_ctrl *ctrl_std_signal_mode; struct v4l2_ctrl *ctrl_standard; }; struct { /* dv_timings_signal_mode/timings cluster */ struct v4l2_ctrl *ctrl_dv_timings_signal_mode; struct v4l2_ctrl *ctrl_dv_timings; }; struct v4l2_ctrl *ctrl_has_crop_cap; struct v4l2_ctrl *ctrl_has_compose_cap; struct v4l2_ctrl *ctrl_has_scaler_cap; struct v4l2_ctrl *ctrl_has_crop_out; struct v4l2_ctrl *ctrl_has_compose_out; struct v4l2_ctrl *ctrl_has_scaler_out; struct v4l2_ctrl *ctrl_tx_mode; struct v4l2_ctrl *ctrl_tx_rgb_range; struct v4l2_ctrl *ctrl_tx_edid_present; struct v4l2_ctrl *ctrl_tx_hotplug; struct v4l2_ctrl *ctrl_tx_rxsense; struct v4l2_ctrl *ctrl_rx_power_present; struct v4l2_ctrl *radio_tx_rds_pi; struct v4l2_ctrl *radio_tx_rds_pty; struct v4l2_ctrl *radio_tx_rds_mono_stereo; struct v4l2_ctrl *radio_tx_rds_art_head; struct v4l2_ctrl *radio_tx_rds_compressed; struct v4l2_ctrl *radio_tx_rds_dyn_pty; struct v4l2_ctrl *radio_tx_rds_ta; struct v4l2_ctrl *radio_tx_rds_tp; struct v4l2_ctrl *radio_tx_rds_ms; struct v4l2_ctrl *radio_tx_rds_psname; struct v4l2_ctrl *radio_tx_rds_radiotext; struct v4l2_ctrl *radio_rx_rds_pty; struct v4l2_ctrl *radio_rx_rds_ta; struct v4l2_ctrl *radio_rx_rds_tp; struct v4l2_ctrl *radio_rx_rds_ms; struct v4l2_ctrl *radio_rx_rds_psname; struct v4l2_ctrl *radio_rx_rds_radiotext; struct v4l2_ctrl *ctrl_hdmi_to_output[MAX_HDMI_INPUTS]; char ctrl_hdmi_to_output_names[MAX_HDMI_INPUTS][32]; struct v4l2_ctrl *ctrl_svid_to_output[MAX_SVID_INPUTS]; char ctrl_svid_to_output_names[MAX_SVID_INPUTS][32]; unsigned input_brightness[MAX_INPUTS]; unsigned osd_mode; unsigned button_pressed; bool sensor_hflip; bool sensor_vflip; bool hflip; bool vflip; bool vbi_cap_interlaced; bool loop_video; bool reduced_fps; /* Framebuffer */ unsigned long video_pbase; void *video_vbase; u32 video_buffer_size; int display_width; int display_height; int display_byte_stride; int bits_per_pixel; int bytes_per_pixel; #ifdef CONFIG_VIDEO_VIVID_OSD struct fb_info fb_info; struct fb_var_screeninfo fb_defined; struct fb_fix_screeninfo fb_fix; #endif /* Error injection */ bool disconnect_error; bool queue_setup_error; bool buf_prepare_error; bool start_streaming_error; bool dqbuf_error; bool req_validate_error; bool seq_wrap; u64 time_wrap; u64 time_wrap_offset; unsigned perc_dropped_buffers; enum vivid_signal_mode std_signal_mode[MAX_INPUTS]; unsigned int query_std_last[MAX_INPUTS]; v4l2_std_id query_std[MAX_INPUTS]; enum tpg_video_aspect std_aspect_ratio[MAX_INPUTS]; enum vivid_signal_mode dv_timings_signal_mode[MAX_INPUTS]; char **query_dv_timings_qmenu; char *query_dv_timings_qmenu_strings; unsigned query_dv_timings_size; unsigned int query_dv_timings_last[MAX_INPUTS]; unsigned int query_dv_timings[MAX_INPUTS]; enum tpg_video_aspect dv_timings_aspect_ratio[MAX_INPUTS]; /* Input */ unsigned input; v4l2_std_id std_cap[MAX_INPUTS]; struct v4l2_dv_timings dv_timings_cap[MAX_INPUTS]; int dv_timings_cap_sel[MAX_INPUTS]; u32 service_set_cap; struct vivid_vbi_gen_data vbi_gen; u8 *edid; unsigned edid_blocks; unsigned edid_max_blocks; unsigned webcam_size_idx; unsigned webcam_ival_idx; unsigned tv_freq; unsigned tv_audmode; unsigned tv_field_cap; unsigned tv_audio_input; u32 power_present; /* Output */ unsigned output; v4l2_std_id std_out; struct v4l2_dv_timings dv_timings_out; u32 colorspace_out; u32 ycbcr_enc_out; u32 hsv_enc_out; u32 quantization_out; u32 xfer_func_out; u32 service_set_out; unsigned bytesperline_out[TPG_MAX_PLANES]; unsigned tv_field_out; unsigned tv_audio_output; bool vbi_out_have_wss; u8 vbi_out_wss[2]; bool vbi_out_have_cc[2]; u8 vbi_out_cc[2][2]; bool dvi_d_out; u8 *scaled_line; u8 *blended_line; unsigned cur_scaled_line; /* Output Overlay */ void *fb_vbase_out; bool overlay_out_enabled; int overlay_out_top, overlay_out_left; unsigned fbuf_out_flags; u32 chromakey_out; u8 global_alpha_out; /* video capture */ struct tpg_data tpg; unsigned ms_vid_cap; bool must_blank[MAX_VID_CAP_BUFFERS]; const struct vivid_fmt *fmt_cap; struct v4l2_fract timeperframe_vid_cap; enum v4l2_field field_cap; struct v4l2_rect src_rect; struct v4l2_rect fmt_cap_rect; struct v4l2_rect crop_cap; struct v4l2_rect compose_cap; struct v4l2_rect crop_bounds_cap; struct vb2_queue vb_vid_cap_q; struct list_head vid_cap_active; struct vb2_queue vb_vbi_cap_q; struct list_head vbi_cap_active; struct vb2_queue vb_meta_cap_q; struct list_head meta_cap_active; struct vb2_queue vb_touch_cap_q; struct list_head touch_cap_active; /* thread for generating video capture stream */ struct task_struct *kthread_vid_cap; unsigned long jiffies_vid_cap; u64 cap_stream_start; u64 cap_frame_period; u64 cap_frame_eof_offset; u32 cap_seq_offset; u32 cap_seq_count; bool cap_seq_resync; u32 vid_cap_seq_start; u32 vid_cap_seq_count; bool vid_cap_streaming; u32 vbi_cap_seq_start; u32 vbi_cap_seq_count; bool vbi_cap_streaming; u32 meta_cap_seq_start; u32 meta_cap_seq_count; bool meta_cap_streaming; /* Touch capture */ struct task_struct *kthread_touch_cap; unsigned long jiffies_touch_cap; u64 touch_cap_stream_start; u32 touch_cap_seq_offset; bool touch_cap_seq_resync; u32 touch_cap_seq_start; u32 touch_cap_seq_count; u32 touch_cap_with_seq_wrap_count; bool touch_cap_streaming; struct v4l2_fract timeperframe_tch_cap; struct v4l2_pix_format tch_format; int tch_pat_random; /* video output */ const struct vivid_fmt *fmt_out; struct v4l2_fract timeperframe_vid_out; enum v4l2_field field_out; struct v4l2_rect sink_rect; struct v4l2_rect fmt_out_rect; struct v4l2_rect crop_out; struct v4l2_rect compose_out; struct v4l2_rect compose_bounds_out; struct vb2_queue vb_vid_out_q; struct list_head vid_out_active; struct vb2_queue vb_vbi_out_q; struct list_head vbi_out_active; struct vb2_queue vb_meta_out_q; struct list_head meta_out_active; /* video loop precalculated rectangles */ /* * Intersection between what the output side composes and the capture side * crops. I.e., what actually needs to be copied from the output buffer to * the capture buffer. */ struct v4l2_rect loop_vid_copy; /* The part of the output buffer that (after scaling) corresponds to loop_vid_copy. */ struct v4l2_rect loop_vid_out; /* The part of the capture buffer that (after scaling) corresponds to loop_vid_copy. */ struct v4l2_rect loop_vid_cap; /* * The intersection of the framebuffer, the overlay output window and * loop_vid_copy. I.e., the part of the framebuffer that actually should be * blended with the compose_out rectangle. This uses the framebuffer origin. */ struct v4l2_rect loop_fb_copy; /* The same as loop_fb_copy but with compose_out origin. */ struct v4l2_rect loop_vid_overlay; /* * The part of the capture buffer that (after scaling) corresponds * to loop_vid_overlay. */ struct v4l2_rect loop_vid_overlay_cap; /* thread for generating video output stream */ struct task_struct *kthread_vid_out; unsigned long jiffies_vid_out; u32 out_seq_offset; u32 out_seq_count; bool out_seq_resync; u32 vid_out_seq_start; u32 vid_out_seq_count; bool vid_out_streaming; u32 vbi_out_seq_start; u32 vbi_out_seq_count; bool vbi_out_streaming; bool stream_sliced_vbi_out; u32 meta_out_seq_start; u32 meta_out_seq_count; bool meta_out_streaming; /* SDR capture */ struct vb2_queue vb_sdr_cap_q; struct list_head sdr_cap_active; u32 sdr_pixelformat; /* v4l2 format id */ unsigned sdr_buffersize; unsigned sdr_adc_freq; unsigned sdr_fm_freq; unsigned sdr_fm_deviation; int sdr_fixp_src_phase; int sdr_fixp_mod_phase; bool tstamp_src_is_soe; bool has_crop_cap; bool has_compose_cap; bool has_scaler_cap; bool has_crop_out; bool has_compose_out; bool has_scaler_out; /* thread for generating SDR stream */ struct task_struct *kthread_sdr_cap; unsigned long jiffies_sdr_cap; u32 sdr_cap_seq_offset; u32 sdr_cap_seq_start; u32 sdr_cap_seq_count; u32 sdr_cap_with_seq_wrap_count; bool sdr_cap_seq_resync; /* RDS generator */ struct vivid_rds_gen rds_gen; /* Radio receiver */ unsigned radio_rx_freq; unsigned radio_rx_audmode; int radio_rx_sig_qual; unsigned radio_rx_hw_seek_mode; bool radio_rx_hw_seek_prog_lim; bool radio_rx_rds_controls; bool radio_rx_rds_enabled; unsigned radio_rx_rds_use_alternates; unsigned radio_rx_rds_last_block; struct v4l2_fh *radio_rx_rds_owner; /* Radio transmitter */ unsigned radio_tx_freq; unsigned radio_tx_subchans; bool radio_tx_rds_controls; unsigned radio_tx_rds_last_block; struct v4l2_fh *radio_tx_rds_owner; /* Shared between radio receiver and transmitter */ bool radio_rds_loop; ktime_t radio_rds_init_time; /* CEC */ struct cec_adapter *cec_rx_adap; struct cec_adapter *cec_tx_adap[MAX_HDMI_OUTPUTS]; struct task_struct *kthread_cec; wait_queue_head_t kthread_waitq_cec; struct vivid_cec_xfer xfers[MAX_OUTPUTS]; spinlock_t cec_xfers_slock; /* read and write cec messages */ u32 cec_sft; /* bus signal free time, in bit periods */ u8 last_initiator; /* CEC OSD String */ char osd[14]; unsigned long osd_jiffies; bool meta_pts; bool meta_scr; }; static inline bool vivid_is_webcam(const struct vivid_dev *dev) { return dev->input_type[dev->input] == WEBCAM; } static inline bool vivid_is_tv_cap(const struct vivid_dev *dev) { return dev->input_type[dev->input] == TV; } static inline bool vivid_is_svid_cap(const struct vivid_dev *dev) { return dev->input_type[dev->input] == SVID; } static inline bool vivid_is_hdmi_cap(const struct vivid_dev *dev) { return dev->input_type[dev->input] == HDMI; } static inline bool vivid_is_sdtv_cap(const struct vivid_dev *dev) { return vivid_is_tv_cap(dev) || vivid_is_svid_cap(dev); } static inline bool vivid_is_svid_out(const struct vivid_dev *dev) { return dev->output_type[dev->output] == SVID; } static inline bool vivid_is_hdmi_out(const struct vivid_dev *dev) { return dev->output_type[dev->output] == HDMI; } #endif
22 22 18 18 18 18 2 2 2 4 4 1 1 37 37 37 37 31 31 1 1 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/proc/net.c * * Copyright (C) 2007 * * Author: Eric Biederman <ebiederm@xmission.com> * * proc net directory handling functions */ #include <linux/errno.h> #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/mount.h> #include <linux/nsproxy.h> #include <linux/uidgid.h> #include <net/net_namespace.h> #include <linux/seq_file.h> #include "internal.h" static inline struct net *PDE_NET(struct proc_dir_entry *pde) { return pde->parent->data; } static struct net *get_proc_net(const struct inode *inode) { return maybe_get_net(PDE_NET(PDE(inode))); } static int seq_open_net(struct inode *inode, struct file *file) { unsigned int state_size = PDE(inode)->state_size; struct seq_net_private *p; struct net *net; WARN_ON_ONCE(state_size < sizeof(*p)); if (file->f_mode & FMODE_WRITE && !PDE(inode)->write) return -EACCES; net = get_proc_net(inode); if (!net) return -ENXIO; p = __seq_open_private(file, PDE(inode)->seq_ops, state_size); if (!p) { put_net(net); return -ENOMEM; } #ifdef CONFIG_NET_NS p->net = net; netns_tracker_alloc(net, &p->ns_tracker, GFP_KERNEL); #endif return 0; } static void seq_file_net_put_net(struct seq_file *seq) { #ifdef CONFIG_NET_NS struct seq_net_private *priv = seq->private; put_net_track(priv->net, &priv->ns_tracker); #else put_net(&init_net); #endif } static int seq_release_net(struct inode *ino, struct file *f) { struct seq_file *seq = f->private_data; seq_file_net_put_net(seq); seq_release_private(ino, f); return 0; } static const struct proc_ops proc_net_seq_ops = { .proc_open = seq_open_net, .proc_read = seq_read, .proc_write = proc_simple_write, .proc_lseek = seq_lseek, .proc_release = seq_release_net, }; int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux) { #ifdef CONFIG_NET_NS struct seq_net_private *p = priv_data; p->net = get_net_track(current->nsproxy->net_ns, &p->ns_tracker, GFP_KERNEL); #endif return 0; } void bpf_iter_fini_seq_net(void *priv_data) { #ifdef CONFIG_NET_NS struct seq_net_private *p = priv_data; put_net_track(p->net, &p->ns_tracker); #endif } struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct seq_operations *ops, unsigned int state_size, void *data) { struct proc_dir_entry *p; p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; pde_force_lookup(p); p->proc_ops = &proc_net_seq_ops; p->seq_ops = ops; p->state_size = state_size; return proc_register(parent, p); } EXPORT_SYMBOL_GPL(proc_create_net_data); /** * proc_create_net_data_write - Create a writable net_ns-specific proc file * @name: The name of the file. * @mode: The file's access mode. * @parent: The parent directory in which to create. * @ops: The seq_file ops with which to read the file. * @write: The write method with which to 'modify' the file. * @state_size: The size of the per-file private state to allocate. * @data: Data for retrieval by pde_data(). * * Create a network namespaced proc file in the @parent directory with the * specified @name and @mode that allows reading of a file that displays a * series of elements and also provides for the file accepting writes that have * some arbitrary effect. * * The functions in the @ops table are used to iterate over items to be * presented and extract the readable content using the seq_file interface. * * The @write function is called with the data copied into a kernel space * scratch buffer and has a NUL appended for convenience. The buffer may be * modified by the @write function. @write should return 0 on success. * * The @data value is accessible from the @show and @write functions by calling * pde_data() on the file inode. The network namespace must be accessed by * calling seq_file_net() on the seq_file struct. */ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct seq_operations *ops, proc_write_t write, unsigned int state_size, void *data) { struct proc_dir_entry *p; p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; pde_force_lookup(p); p->proc_ops = &proc_net_seq_ops; p->seq_ops = ops; p->state_size = state_size; p->write = write; return proc_register(parent, p); } EXPORT_SYMBOL_GPL(proc_create_net_data_write); static int single_open_net(struct inode *inode, struct file *file) { struct proc_dir_entry *de = PDE(inode); struct net *net; int err; net = get_proc_net(inode); if (!net) return -ENXIO; err = single_open(file, de->single_show, net); if (err) put_net(net); return err; } static int single_release_net(struct inode *ino, struct file *f) { struct seq_file *seq = f->private_data; put_net(seq->private); return single_release(ino, f); } static const struct proc_ops proc_net_single_ops = { .proc_open = single_open_net, .proc_read = seq_read, .proc_write = proc_simple_write, .proc_lseek = seq_lseek, .proc_release = single_release_net, }; struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, struct proc_dir_entry *parent, int (*show)(struct seq_file *, void *), void *data) { struct proc_dir_entry *p; p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; pde_force_lookup(p); p->proc_ops = &proc_net_single_ops; p->single_show = show; return proc_register(parent, p); } EXPORT_SYMBOL_GPL(proc_create_net_single); /** * proc_create_net_single_write - Create a writable net_ns-specific proc file * @name: The name of the file. * @mode: The file's access mode. * @parent: The parent directory in which to create. * @show: The seqfile show method with which to read the file. * @write: The write method with which to 'modify' the file. * @data: Data for retrieval by pde_data(). * * Create a network-namespaced proc file in the @parent directory with the * specified @name and @mode that allows reading of a file that displays a * single element rather than a series and also provides for the file accepting * writes that have some arbitrary effect. * * The @show function is called to extract the readable content via the * seq_file interface. * * The @write function is called with the data copied into a kernel space * scratch buffer and has a NUL appended for convenience. The buffer may be * modified by the @write function. @write should return 0 on success. * * The @data value is accessible from the @show and @write functions by calling * pde_data() on the file inode. The network namespace must be accessed by * calling seq_file_single_net() on the seq_file struct. */ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mode, struct proc_dir_entry *parent, int (*show)(struct seq_file *, void *), proc_write_t write, void *data) { struct proc_dir_entry *p; p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; pde_force_lookup(p); p->proc_ops = &proc_net_single_ops; p->single_show = show; p->write = write; return proc_register(parent, p); } EXPORT_SYMBOL_GPL(proc_create_net_single_write); static struct net *get_proc_task_net(struct inode *dir) { struct task_struct *task; struct nsproxy *ns; struct net *net = NULL; rcu_read_lock(); task = pid_task(proc_pid(dir), PIDTYPE_PID); if (task != NULL) { task_lock(task); ns = task->nsproxy; if (ns != NULL) net = get_net(ns->net_ns); task_unlock(task); } rcu_read_unlock(); return net; } static struct dentry *proc_tgid_net_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct dentry *de; struct net *net; de = ERR_PTR(-ENOENT); net = get_proc_task_net(dir); if (net != NULL) { de = proc_lookup_de(dir, dentry, net->proc_net); put_net(net); } return de; } static int proc_tgid_net_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct net *net; net = get_proc_task_net(inode); generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (net != NULL) { stat->nlink = net->proc_net->nlink; put_net(net); } return 0; } const struct inode_operations proc_net_inode_operations = { .lookup = proc_tgid_net_lookup, .getattr = proc_tgid_net_getattr, .setattr = proc_setattr, }; static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx) { int ret; struct net *net; ret = -EINVAL; net = get_proc_task_net(file_inode(file)); if (net != NULL) { ret = proc_readdir_de(file, ctx, net->proc_net); put_net(net); } return ret; } const struct file_operations proc_net_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = proc_tgid_net_readdir, }; static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; kuid_t uid; kgid_t gid; int err; /* * This PDE acts only as an anchor for /proc/${pid}/net hierarchy. * Corresponding inode (PDE(inode) == net->proc_net) is never * instantiated therefore blanket zeroing is fine. * net->proc_net_stat inode is instantiated normally. */ err = -ENOMEM; netd = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL); if (!netd) goto out; netd->subdir = RB_ROOT; netd->data = net; netd->nlink = 2; netd->namelen = 3; netd->parent = &proc_root; netd->name = netd->inline_name; memcpy(netd->name, "net", 4); uid = make_kuid(net->user_ns, 0); if (!uid_valid(uid)) uid = netd->uid; gid = make_kgid(net->user_ns, 0); if (!gid_valid(gid)) gid = netd->gid; proc_set_user(netd, uid, gid); /* Seed dentry revalidation for /proc/${pid}/net */ pde_force_lookup(netd); err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) goto free_net; net->proc_net = netd; net->proc_net_stat = net_statd; return 0; free_net: pde_free(netd); out: return err; } static __net_exit void proc_net_ns_exit(struct net *net) { remove_proc_entry("stat", net->proc_net); pde_free(net->proc_net); } static struct pernet_operations __net_initdata proc_net_ns_ops = { .init = proc_net_ns_init, .exit = proc_net_ns_exit, }; int __init proc_net_init(void) { proc_symlink("net", NULL, "self/net"); return register_pernet_subsys(&proc_net_ns_ops); }
18 18 16 21 21 2 4 19 12 19 8 19 19 3 1 1 1 1 1 12 12 12 21 20 12 10 10 1 1 1 1 1 1 8 3 3 7 18 18 18 18 18 18 18 12 18 18 14 14 13 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" #include "xfs_health.h" static xfs_failaddr_t xfs_dir2_data_freefind_verify( struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup, struct xfs_dir2_data_free **bf_ent); struct xfs_dir2_data_free * xfs_dir2_data_bestfree_p( struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr) { if (xfs_has_crc(mp)) return ((struct xfs_dir3_data_hdr *)hdr)->best_free; return hdr->bestfree; } /* * Pointer to an entry's tag word. */ __be16 * xfs_dir2_data_entry_tag_p( struct xfs_mount *mp, struct xfs_dir2_data_entry *dep) { return (__be16 *)((char *)dep + xfs_dir2_data_entsize(mp, dep->namelen) - sizeof(__be16)); } uint8_t xfs_dir2_data_get_ftype( struct xfs_mount *mp, struct xfs_dir2_data_entry *dep) { if (xfs_has_ftype(mp)) { uint8_t ftype = dep->name[dep->namelen]; if (likely(ftype < XFS_DIR3_FT_MAX)) return ftype; } return XFS_DIR3_FT_UNKNOWN; } void xfs_dir2_data_put_ftype( struct xfs_mount *mp, struct xfs_dir2_data_entry *dep, uint8_t ftype) { ASSERT(ftype < XFS_DIR3_FT_MAX); ASSERT(dep->namelen != 0); if (xfs_has_ftype(mp)) dep->name[dep->namelen] = ftype; } /* * The number of leaf entries is limited by the size of the block and the amount * of space used by the data entries. We don't know how much space is used by * the data entries yet, so just ensure that the count falls somewhere inside * the block right now. */ static inline unsigned int xfs_dir2_data_max_leaf_entries( struct xfs_da_geometry *geo) { return (geo->blksize - sizeof(struct xfs_dir2_block_tail) - geo->data_entry_offset) / sizeof(struct xfs_dir2_leaf_entry); } /* * Check the consistency of the data block. * The input can also be a block-format directory. * Return NULL if the buffer is good, otherwise the address of the error. */ xfs_failaddr_t __xfs_dir3_data_check( struct xfs_inode *dp, /* incore inode pointer */ struct xfs_buf *bp) /* data block's buffer */ { xfs_dir2_dataptr_t addr; /* addr for leaf lookup */ xfs_dir2_data_free_t *bf; /* bestfree table */ xfs_dir2_block_tail_t *btp=NULL; /* block tail */ int count; /* count of entries found */ xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_free_t *dfp; /* bestfree entry */ int freeseen; /* mask of bestfrees seen */ xfs_dahash_t hash; /* hash of current name */ int i; /* leaf index */ int lastfree; /* last entry was unused */ xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */ struct xfs_mount *mp = bp->b_mount; int stale; /* count of stale leaves */ struct xfs_name name; unsigned int offset; unsigned int end; struct xfs_da_geometry *geo = mp->m_dir_geo; /* * If this isn't a directory, something is seriously wrong. Bail out. */ if (dp && !S_ISDIR(VFS_I(dp)->i_mode)) return __this_address; hdr = bp->b_addr; offset = geo->data_entry_offset; switch (hdr->magic) { case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): btp = xfs_dir2_block_tail_p(geo, hdr); lep = xfs_dir2_block_leaf_p(btp); if (be32_to_cpu(btp->count) >= xfs_dir2_data_max_leaf_entries(geo)) return __this_address; break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): break; default: return __this_address; } end = xfs_dir3_data_end_offset(geo, hdr); if (!end) return __this_address; /* * Account for zero bestfree entries. */ bf = xfs_dir2_data_bestfree_p(mp, hdr); count = lastfree = freeseen = 0; if (!bf[0].length) { if (bf[0].offset) return __this_address; freeseen |= 1 << 0; } if (!bf[1].length) { if (bf[1].offset) return __this_address; freeseen |= 1 << 1; } if (!bf[2].length) { if (bf[2].offset) return __this_address; freeseen |= 1 << 2; } if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length)) return __this_address; if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length)) return __this_address; /* * Loop over the data/unused entries. */ while (offset < end) { struct xfs_dir2_data_unused *dup = bp->b_addr + offset; struct xfs_dir2_data_entry *dep = bp->b_addr + offset; unsigned int reclen; /* * Are the remaining bytes large enough to hold an * unused entry? */ if (offset > end - xfs_dir2_data_unusedsize(1)) return __this_address; /* * If it's unused, look for the space in the bestfree table. * If we find it, account for that, else make sure it * doesn't need to be there. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { xfs_failaddr_t fa; reclen = xfs_dir2_data_unusedsize( be16_to_cpu(dup->length)); if (lastfree != 0) return __this_address; if (be16_to_cpu(dup->length) != reclen) return __this_address; if (offset + reclen > end) return __this_address; if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != offset) return __this_address; fa = xfs_dir2_data_freefind_verify(hdr, bf, dup, &dfp); if (fa) return fa; if (dfp) { i = (int)(dfp - bf); if ((freeseen & (1 << i)) != 0) return __this_address; freeseen |= 1 << i; } else { if (be16_to_cpu(dup->length) > be16_to_cpu(bf[2].length)) return __this_address; } offset += reclen; lastfree = 1; continue; } /* * This is not an unused entry. Are the remaining bytes * large enough for a dirent with a single-byte name? */ if (offset > end - xfs_dir2_data_entsize(mp, 1)) return __this_address; /* * It's a real entry. Validate the fields. * If this is a block directory then make sure it's * in the leaf section of the block. * The linear search is crude but this is DEBUG code. */ if (dep->namelen == 0) return __this_address; reclen = xfs_dir2_data_entsize(mp, dep->namelen); if (offset + reclen > end) return __this_address; if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber))) return __this_address; if (be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)) != offset) return __this_address; if (xfs_dir2_data_get_ftype(mp, dep) >= XFS_DIR3_FT_MAX) return __this_address; count++; lastfree = 0; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, (xfs_dir2_data_aoff_t) ((char *)dep - (char *)hdr)); name.name = dep->name; name.len = dep->namelen; hash = xfs_dir2_hashname(mp, &name); for (i = 0; i < be32_to_cpu(btp->count); i++) { if (be32_to_cpu(lep[i].address) == addr && be32_to_cpu(lep[i].hashval) == hash) break; } if (i >= be32_to_cpu(btp->count)) return __this_address; } offset += reclen; } /* * Need to have seen all the entries and all the bestfree slots. */ if (freeseen != 7) return __this_address; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { if (lep[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; if (i > 0 && be32_to_cpu(lep[i].hashval) < be32_to_cpu(lep[i - 1].hashval)) return __this_address; } if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)) return __this_address; if (stale != be32_to_cpu(btp->stale)) return __this_address; } return NULL; } #ifdef DEBUG void xfs_dir3_data_check( struct xfs_inode *dp, struct xfs_buf *bp) { xfs_failaddr_t fa; fa = __xfs_dir3_data_check(dp, bp); if (!fa) return; xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, bp->b_addr, BBTOB(bp->b_length), __FILE__, __LINE__, fa); ASSERT(0); } #endif static xfs_failaddr_t xfs_dir3_data_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_verify_magic(bp, hdr3->magic)) return __this_address; if (xfs_has_crc(mp)) { if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp)) return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) return __this_address; } return __xfs_dir3_data_check(NULL, bp); } /* * Readahead of the first block of the directory when it is opened is completely * oblivious to the format of the directory. Hence we can either get a block * format buffer or a data format buffer on readahead. */ static void xfs_dir3_data_reada_verify( struct xfs_buf *bp) { struct xfs_dir2_data_hdr *hdr = bp->b_addr; switch (hdr->magic) { case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): bp->b_ops = &xfs_dir3_block_buf_ops; bp->b_ops->verify_read(bp); return; case cpu_to_be32(XFS_DIR2_DATA_MAGIC): case cpu_to_be32(XFS_DIR3_DATA_MAGIC): bp->b_ops = &xfs_dir3_data_buf_ops; bp->b_ops->verify_read(bp); return; default: xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); break; } } static void xfs_dir3_data_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_has_crc(mp) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { fa = xfs_dir3_data_verify(bp); if (fa) xfs_verifier_error(bp, -EFSCORRUPTED, fa); } } static void xfs_dir3_data_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; fa = xfs_dir3_data_verify(bp); if (fa) { xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } if (!xfs_has_crc(mp)) return; if (bip) hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); } const struct xfs_buf_ops xfs_dir3_data_buf_ops = { .name = "xfs_dir3_data", .magic = { cpu_to_be32(XFS_DIR2_DATA_MAGIC), cpu_to_be32(XFS_DIR3_DATA_MAGIC) }, .verify_read = xfs_dir3_data_read_verify, .verify_write = xfs_dir3_data_write_verify, .verify_struct = xfs_dir3_data_verify, }; static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { .name = "xfs_dir3_data_reada", .magic = { cpu_to_be32(XFS_DIR2_DATA_MAGIC), cpu_to_be32(XFS_DIR3_DATA_MAGIC) }, .verify_read = xfs_dir3_data_reada_verify, .verify_write = xfs_dir3_data_write_verify, }; xfs_failaddr_t xfs_dir3_data_header_check( struct xfs_buf *bp, xfs_ino_t owner) { struct xfs_mount *mp = bp->b_mount; if (xfs_has_crc(mp)) { struct xfs_dir3_data_hdr *hdr3 = bp->b_addr; if (hdr3->hdr.magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) return __this_address; if (be64_to_cpu(hdr3->hdr.owner) != owner) return __this_address; } return NULL; } int xfs_dir3_data_read( struct xfs_trans *tp, struct xfs_inode *dp, xfs_ino_t owner, xfs_dablk_t bno, unsigned int flags, struct xfs_buf **bpp) { xfs_failaddr_t fa; int err; err = xfs_da_read_buf(tp, dp, bno, flags, bpp, XFS_DATA_FORK, &xfs_dir3_data_buf_ops); if (err || !*bpp) return err; /* Check things that we can't do in the verifier. */ fa = xfs_dir3_data_header_check(*bpp, owner); if (fa) { __xfs_buf_mark_corrupt(*bpp, fa); xfs_trans_brelse(tp, *bpp); *bpp = NULL; xfs_dirattr_mark_sick(dp, XFS_DATA_FORK); return -EFSCORRUPTED; } xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); return err; } int xfs_dir3_data_readahead( struct xfs_inode *dp, xfs_dablk_t bno, unsigned int flags) { return xfs_da_reada_buf(dp, bno, flags, XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops); } /* * Find the bestfree entry that exactly coincides with unused directory space * or a verifier error because the bestfree data are bad. */ static xfs_failaddr_t xfs_dir2_data_freefind_verify( struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup, struct xfs_dir2_data_free **bf_ent) { struct xfs_dir2_data_free *dfp; xfs_dir2_data_aoff_t off; bool matched = false; bool seenzero = false; *bf_ent = NULL; off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); /* * Validate some consistency in the bestfree table. * Check order, non-overlapping entries, and if we find the * one we're looking for it has to be exact. */ for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) { if (dfp->length) return __this_address; seenzero = true; continue; } if (seenzero) return __this_address; if (be16_to_cpu(dfp->offset) == off) { matched = true; if (dfp->length != dup->length) return __this_address; } else if (be16_to_cpu(dfp->offset) > off) { if (off + be16_to_cpu(dup->length) > be16_to_cpu(dfp->offset)) return __this_address; } else { if (be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) > off) return __this_address; } if (!matched && be16_to_cpu(dfp->length) < be16_to_cpu(dup->length)) return __this_address; if (dfp > &bf[0] && be16_to_cpu(dfp[-1].length) < be16_to_cpu(dfp[0].length)) return __this_address; } /* Looks ok so far; now try to match up with a bestfree entry. */ *bf_ent = xfs_dir2_data_freefind(hdr, bf, dup); return NULL; } /* * Given a data block and an unused entry from that block, * return the bestfree entry if any that corresponds to it. */ xfs_dir2_data_free_t * xfs_dir2_data_freefind( struct xfs_dir2_data_hdr *hdr, /* data block header */ struct xfs_dir2_data_free *bf, /* bestfree table pointer */ struct xfs_dir2_data_unused *dup) /* unused space */ { xfs_dir2_data_free_t *dfp; /* bestfree entry */ xfs_dir2_data_aoff_t off; /* offset value needed */ off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); /* * If this is smaller than the smallest bestfree entry, * it can't be there since they're sorted. */ if (be16_to_cpu(dup->length) < be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length)) return NULL; /* * Look at the three bestfree entries for our guy. */ for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) { if (!dfp->offset) return NULL; if (be16_to_cpu(dfp->offset) == off) return dfp; } /* * Didn't find it. This only happens if there are duplicate lengths. */ return NULL; } /* * Insert an unused-space entry into the bestfree table. */ xfs_dir2_data_free_t * /* entry inserted */ xfs_dir2_data_freeinsert( struct xfs_dir2_data_hdr *hdr, /* data block pointer */ struct xfs_dir2_data_free *dfp, /* bestfree table pointer */ struct xfs_dir2_data_unused *dup, /* unused space */ int *loghead) /* log the data header (out) */ { xfs_dir2_data_free_t new; /* new bestfree entry */ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); new.length = dup->length; new.offset = cpu_to_be16((char *)dup - (char *)hdr); /* * Insert at position 0, 1, or 2; or not at all. */ if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) { dfp[2] = dfp[1]; dfp[1] = dfp[0]; dfp[0] = new; *loghead = 1; return &dfp[0]; } if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) { dfp[2] = dfp[1]; dfp[1] = new; *loghead = 1; return &dfp[1]; } if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) { dfp[2] = new; *loghead = 1; return &dfp[2]; } return NULL; } /* * Remove a bestfree entry from the table. */ STATIC void xfs_dir2_data_freeremove( struct xfs_dir2_data_hdr *hdr, /* data block header */ struct xfs_dir2_data_free *bf, /* bestfree table pointer */ struct xfs_dir2_data_free *dfp, /* bestfree entry pointer */ int *loghead) /* out: log data header */ { ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); /* * It's the first entry, slide the next 2 up. */ if (dfp == &bf[0]) { bf[0] = bf[1]; bf[1] = bf[2]; } /* * It's the second entry, slide the 3rd entry up. */ else if (dfp == &bf[1]) bf[1] = bf[2]; /* * Must be the last entry. */ else ASSERT(dfp == &bf[2]); /* * Clear the 3rd entry, must be zero now. */ bf[2].length = 0; bf[2].offset = 0; *loghead = 1; } /* * Given a data block, reconstruct its bestfree map. */ void xfs_dir2_data_freescan( struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr, int *loghead) { struct xfs_da_geometry *geo = mp->m_dir_geo; struct xfs_dir2_data_free *bf = xfs_dir2_data_bestfree_p(mp, hdr); void *addr = hdr; unsigned int offset = geo->data_entry_offset; unsigned int end; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); /* * Start by clearing the table. */ memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT); *loghead = 1; end = xfs_dir3_data_end_offset(geo, addr); while (offset < end) { struct xfs_dir2_data_unused *dup = addr + offset; struct xfs_dir2_data_entry *dep = addr + offset; /* * If it's a free entry, insert it. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { ASSERT(offset == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); xfs_dir2_data_freeinsert(hdr, bf, dup, loghead); offset += be16_to_cpu(dup->length); continue; } /* * For active entries, check their tags and skip them. */ ASSERT(offset == be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep))); offset += xfs_dir2_data_entsize(mp, dep->namelen); } } /* * Initialize a data block at the given block number in the directory. * Give back the buffer for the created block. */ int /* error */ xfs_dir3_data_init( struct xfs_da_args *args, /* directory operation args */ xfs_dir2_db_t blkno, /* logical dir block number */ struct xfs_buf **bpp) /* output block buffer */ { struct xfs_trans *tp = args->trans; struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; struct xfs_da_geometry *geo = args->geo; struct xfs_buf *bp; struct xfs_dir2_data_hdr *hdr; struct xfs_dir2_data_unused *dup; struct xfs_dir2_data_free *bf; int error; int i; /* * Get the buffer set up for the block. */ error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno), &bp, XFS_DATA_FORK); if (error) return error; bp->b_ops = &xfs_dir3_data_buf_ops; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF); /* * Initialize the header. */ hdr = bp->b_addr; if (xfs_has_crc(mp)) { struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; memset(hdr3, 0, sizeof(*hdr3)); hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp)); hdr3->owner = cpu_to_be64(args->owner); uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid); } else hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); bf = xfs_dir2_data_bestfree_p(mp, hdr); bf[0].offset = cpu_to_be16(geo->data_entry_offset); bf[0].length = cpu_to_be16(geo->blksize - geo->data_entry_offset); for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { bf[i].length = 0; bf[i].offset = 0; } /* * Set up an unused entry for the block's body. */ dup = bp->b_addr + geo->data_entry_offset; dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); dup->length = bf[0].length; *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); /* * Log it and return it. */ xfs_dir2_data_log_header(args, bp); xfs_dir2_data_log_unused(args, bp, dup); *bpp = bp; return 0; } /* * Log an active data entry from the block. */ void xfs_dir2_data_log_entry( struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_entry_t *dep) /* data entry pointer */ { struct xfs_mount *mp = bp->b_mount; struct xfs_dir2_data_hdr *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr), (uint)((char *)(xfs_dir2_data_entry_tag_p(mp, dep) + 1) - (char *)hdr - 1)); } /* * Log a data block header. */ void xfs_dir2_data_log_header( struct xfs_da_args *args, struct xfs_buf *bp) { #ifdef DEBUG struct xfs_dir2_data_hdr *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); #endif xfs_trans_log_buf(args->trans, bp, 0, args->geo->data_entry_offset - 1); } /* * Log a data unused entry. */ void xfs_dir2_data_log_unused( struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_unused_t *dup) /* data unused pointer */ { xfs_dir2_data_hdr_t *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); /* * Log the first part of the unused entry. */ xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr), (uint)((char *)&dup->length + sizeof(dup->length) - 1 - (char *)hdr)); /* * Log the end (tag) of the unused entry. */ xfs_trans_log_buf(args->trans, bp, (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr), (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr + sizeof(xfs_dir2_data_off_t) - 1)); } /* * Make a byte range in the data block unused. * Its current contents are unimportant. */ void xfs_dir2_data_make_free( struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, /* starting byte offset */ xfs_dir2_data_aoff_t len, /* length in bytes */ int *needlogp, /* out: log header */ int *needscanp) /* out: regen bestfree */ { xfs_dir2_data_hdr_t *hdr; /* data block pointer */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ int needscan; /* need to regen bestfree */ xfs_dir2_data_unused_t *newdup; /* new unused entry */ xfs_dir2_data_unused_t *postdup; /* unused entry after us */ xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ unsigned int end; struct xfs_dir2_data_free *bf; hdr = bp->b_addr; /* * Figure out where the end of the data area is. */ end = xfs_dir3_data_end_offset(args->geo, hdr); ASSERT(end != 0); /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. */ if (offset > args->geo->data_entry_offset) { __be16 *tagp; /* tag just before us */ tagp = (__be16 *)((char *)hdr + offset) - 1; prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG) prevdup = NULL; } else prevdup = NULL; /* * If this isn't the end of the block, see if the entry after * us is free. */ if (offset + len < end) { postdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG) postdup = NULL; } else postdup = NULL; ASSERT(*needscanp == 0); needscan = 0; /* * Previous and following entries are both free, * merge everything into a single free entry. */ bf = xfs_dir2_data_bestfree_p(args->dp->i_mount, hdr); if (prevdup && postdup) { xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ /* * See if prevdup and/or postdup are in bestfree table. */ dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup); /* * We need a rescan unless there are exactly 2 free entries * namely our two. Then we know what's happening, otherwise * since the third bestfree is there, there might be more * entries. */ needscan = (bf[2].length != 0); /* * Fix up the new big freespace. */ be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, prevdup); if (!needscan) { /* * Has to be the case that entries 0 and 1 are * dfp and dfp2 (don't know which is which), and * entry 2 is empty. * Remove entry 1 first then entry 0. */ ASSERT(dfp && dfp2); if (dfp == &bf[1]) { dfp = &bf[0]; ASSERT(dfp2 == dfp); dfp2 = &bf[1]; } xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp); xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); /* * Now insert the new entry. */ dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp); ASSERT(dfp == &bf[0]); ASSERT(dfp->length == prevdup->length); ASSERT(!dfp[1].length); ASSERT(!dfp[2].length); } } /* * The entry before us is free, merge with it. */ else if (prevdup) { dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, prevdup); /* * If the previous entry was in the table, the new entry * is longer, so it will be in the table too. Remove * the old one and add the new one. */ if (dfp) { xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(prevdup->length) > be16_to_cpu(bf[2].length); } } /* * The following entry is free, merge with it. */ else if (postdup) { dfp = xfs_dir2_data_freefind(hdr, bf, postdup); newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup); /* * If the following entry was in the table, the new entry * is longer, so it will be in the table too. Remove * the old one and add the new one. */ if (dfp) { xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. */ else { needscan = be16_to_cpu(newdup->length) > be16_to_cpu(bf[2].length); } } /* * Neither neighbor is free. Make a new entry. */ else { newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup); xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); } *needscanp = needscan; } /* Check our free data for obvious signs of corruption. */ static inline xfs_failaddr_t xfs_dir2_data_check_free( struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len) { if (hdr->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC) && hdr->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC) && hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) && hdr->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) return __this_address; if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG) return __this_address; if (offset < (char *)dup - (char *)hdr) return __this_address; if (offset + len > (char *)dup + be16_to_cpu(dup->length) - (char *)hdr) return __this_address; if ((char *)dup - (char *)hdr != be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))) return __this_address; return NULL; } /* Sanity-check a new bestfree entry. */ static inline xfs_failaddr_t xfs_dir2_data_check_new_free( struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *dfp, struct xfs_dir2_data_unused *newdup) { if (dfp == NULL) return __this_address; if (dfp->length != newdup->length) return __this_address; if (be16_to_cpu(dfp->offset) != (char *)newdup - (char *)hdr) return __this_address; return NULL; } /* * Take a byte range out of an existing unused space and make it un-free. */ int xfs_dir2_data_use_free( struct xfs_da_args *args, struct xfs_buf *bp, xfs_dir2_data_unused_t *dup, /* unused entry */ xfs_dir2_data_aoff_t offset, /* starting offset to use */ xfs_dir2_data_aoff_t len, /* length to use */ int *needlogp, /* out: need to log header */ int *needscanp) /* out: need regen bestfree */ { xfs_dir2_data_hdr_t *hdr; /* data block header */ xfs_dir2_data_free_t *dfp; /* bestfree pointer */ xfs_dir2_data_unused_t *newdup; /* new unused entry */ xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ struct xfs_dir2_data_free *bf; xfs_failaddr_t fa; int matchback; /* matches end of freespace */ int matchfront; /* matches start of freespace */ int needscan; /* need to regen bestfree */ int oldlen; /* old unused entry's length */ hdr = bp->b_addr; fa = xfs_dir2_data_check_free(hdr, dup, offset, len); if (fa) goto corrupt; /* * Look up the entry in the bestfree table. */ oldlen = be16_to_cpu(dup->length); bf = xfs_dir2_data_bestfree_p(args->dp->i_mount, hdr); dfp = xfs_dir2_data_freefind(hdr, bf, dup); ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length)); /* * Check for alignment with front and back of the entry. */ matchfront = (char *)dup - (char *)hdr == offset; matchback = (char *)dup + oldlen - (char *)hdr == offset + len; ASSERT(*needscanp == 0); needscan = 0; /* * If we matched it exactly we just need to get rid of it from * the bestfree table. */ if (matchfront && matchback) { if (dfp) { needscan = (bf[2].offset != 0); if (!needscan) xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); } } /* * We match the first part of the entry. * Make a new entry with the remaining freespace. */ else if (matchfront) { newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(oldlen - len); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup); if (fa) goto corrupt; /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ needscan = dfp == &bf[2]; } } /* * We match the last part of the entry. * Trim the allocated space off the tail of the entry. */ else if (matchback) { newdup = dup; newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup); /* * If it was in the table, remove it and add the new one. */ if (dfp) { xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup); if (fa) goto corrupt; /* * If we got inserted at the last slot, * that means we don't know if there was a better * choice for the last slot, or not. Rescan. */ needscan = dfp == &bf[2]; } } /* * Poking out the middle of an entry. * Make two new entries. */ else { newdup = dup; newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup); *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup); newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len); newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); *xfs_dir2_data_unused_tag_p(newdup2) = cpu_to_be16((char *)newdup2 - (char *)hdr); xfs_dir2_data_log_unused(args, bp, newdup2); /* * If the old entry was in the table, we need to scan * if the 3rd entry was valid, since these entries * are smaller than the old one. * If we don't need to scan that means there were 1 or 2 * entries in the table, and removing the old and adding * the 2 new will work. */ if (dfp) { needscan = (bf[2].length != 0); if (!needscan) { xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); xfs_dir2_data_freeinsert(hdr, bf, newdup2, needlogp); } } } *needscanp = needscan; return 0; corrupt: xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount, hdr, sizeof(*hdr), __FILE__, __LINE__, fa); xfs_da_mark_sick(args); return -EFSCORRUPTED; } /* Find the end of the entry data in a data/block format dir block. */ unsigned int xfs_dir3_data_end_offset( struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) { void *p; switch (hdr->magic) { case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): p = xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr)); return p - (void *)hdr; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): return geo->blksize; default: return 0; } }
54 6 50 54 2 54 50 54 6 54 6 4 4 4 4 2 2 109 97 5 9 10 5 5 9 4 4 4 4 4 7 3 1 4 8 5 10 5 1 5 5 5 1 4 4 5 10 3 7 10 12 5 10 358 8 352 352 88 264 2 12 12 12 15 3 1 11 11 245 5 243 242 215 13 18 31 13 101 101 222 219 1 19 1 1 18 18 13 13 13 13 13 1 13 177 178 15 1 4 13 44 44 7 44 44 46 46 171 11 45 27 30 15 31 13 31 31 2 5 2 7 7 7 7 7 2 7 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 46 43 2 28 46 46 19 49 50 13 13 32 26 28 19 49 11 4 2 3 19 46 50 50 50 50 50 5 5 5 5 5 2 3 3 2 2 2 2 171 14 21 24 32 32 50 50 9 50 9 50 50 50 50 54 54 48 48 45 6 48 54 54 54 6 11 54 54 54 54 54 104 104 104 15 89 6 48 54 54 54 104 240 239 171 242 207 33 240 240 3 32 135 166 230 15 176 177 176 158 164 120 167 119 80 80 69 10 5 75 70 10 78 163 9 29 143 143 29 171 177 177 176 172 80 177 177 69 68 69 51 23 23 23 23 22 19 5 5 101 101 231 101 3 3 101 4 3 15 15 15 15 15 1 14 1 4 11 15 241 241 2 9 2 3 1 14 6 11 15 15 14 1 210 210 210 100 100 1 101 101 101 101 100 101 101 101 2 2 2 2 4 4 2 11 11 2 11 11 11 6 6 3 3 3 3 2 10 10 10 9 1 7 7 7 3 3 3 21 2 5 14 202 101 23 50 21 71 6 19 11 53 13 58 56 2 56 54 54 1 38 19 59 7 154 147 6 29 113 148 7 7 172 1 171 172 171 172 164 8 171 171 171 10 3 4 4 4 172 172 71 105 106 71 26 147 148 146 147 148 148 147 1 148 62 147 1 101 101 101 71 81 81 12 12 12 12 12 3 5 5 5 3 5 5 5 5 5 5 5 1 375 374 21 3 30 30 11 2 18 18 231 1 229 230 230 229 230 229 230 229 231 231 231 230 228 230 229 101 101 101 101 101 101 53 100 101 53 360 361 54 348 48 4 30 227 51 51 48 51 48 51 51 51 48 48 48 48 103 53 51 4 48 51 51 50 3 48 51 51 51 51 51 51 97 6 104 239 239 238 238 237 33 204 237 236 236 236 235 235 235 234 233 233 233 232 232 233 231 227 227 227 227 33 179 15 226 2 227 226 1 226 2 1 32 179 15 225 227 4 1 221 1 221 221 221 221 221 221 221 218 219 219 219 218 217 221 221 220 221 219 219 219 218 218 218 218 218 209 210 357 357 4 225 217 217 217 217 217 216 216 216 216 216 244 243 24 219 243 1 242 240 235 5 6 3 2 216 20 14 10 25 8 27 4 1 30 35 1 35 35 34 35 35 35 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 // SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/segment.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/sched/mm.h> #include <linux/prefetch.h> #include <linux/kthread.h> #include <linux/swap.h> #include <linux/timer.h> #include <linux/freezer.h> #include <linux/sched/signal.h> #include <linux/random.h> #include "f2fs.h" #include "segment.h" #include "node.h" #include "gc.h" #include "iostat.h" #include <trace/events/f2fs.h> #define __reverse_ffz(x) __reverse_ffs(~(x)) static struct kmem_cache *discard_entry_slab; static struct kmem_cache *discard_cmd_slab; static struct kmem_cache *sit_entry_set_slab; static struct kmem_cache *revoke_entry_slab; static unsigned long __reverse_ulong(unsigned char *str) { unsigned long tmp = 0; int shift = 24, idx = 0; #if BITS_PER_LONG == 64 shift = 56; #endif while (shift >= 0) { tmp |= (unsigned long)str[idx++] << shift; shift -= BITS_PER_BYTE; } return tmp; } /* * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since * MSB and LSB are reversed in a byte by f2fs_set_bit. */ static inline unsigned long __reverse_ffs(unsigned long word) { int num = 0; #if BITS_PER_LONG == 64 if ((word & 0xffffffff00000000UL) == 0) num += 32; else word >>= 32; #endif if ((word & 0xffff0000) == 0) num += 16; else word >>= 16; if ((word & 0xff00) == 0) num += 8; else word >>= 8; if ((word & 0xf0) == 0) num += 4; else word >>= 4; if ((word & 0xc) == 0) num += 2; else word >>= 2; if ((word & 0x2) == 0) num += 1; return num; } /* * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because * f2fs_set_bit makes MSB and LSB reversed in a byte. * @size must be integral times of unsigned long. * Example: * MSB <--> LSB * f2fs_set_bit(0, bitmap) => 1000 0000 * f2fs_set_bit(7, bitmap) => 0000 0001 */ static unsigned long __find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = size; unsigned long tmp; if (offset >= size) return size; size -= (offset & ~(BITS_PER_LONG - 1)); offset %= BITS_PER_LONG; while (1) { if (*p == 0) goto pass; tmp = __reverse_ulong((unsigned char *)p); tmp &= ~0UL >> offset; if (size < BITS_PER_LONG) tmp &= (~0UL << (BITS_PER_LONG - size)); if (tmp) goto found; pass: if (size <= BITS_PER_LONG) break; size -= BITS_PER_LONG; offset = 0; p++; } return result; found: return result - size + __reverse_ffs(tmp); } static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); unsigned long result = size; unsigned long tmp; if (offset >= size) return size; size -= (offset & ~(BITS_PER_LONG - 1)); offset %= BITS_PER_LONG; while (1) { if (*p == ~0UL) goto pass; tmp = __reverse_ulong((unsigned char *)p); if (offset) tmp |= ~0UL << (BITS_PER_LONG - offset); if (size < BITS_PER_LONG) tmp |= ~0UL >> size; if (tmp != ~0UL) goto found; pass: if (size <= BITS_PER_LONG) break; size -= BITS_PER_LONG; offset = 0; p++; } return result; found: return result - size + __reverse_ffz(tmp); } bool f2fs_need_SSR(struct f2fs_sb_info *sbi) { int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); if (f2fs_lfs_mode(sbi)) return false; if (sbi->gc_mode == GC_URGENT_HIGH) return true; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); } void f2fs_abort_atomic_write(struct inode *inode, bool clean) { struct f2fs_inode_info *fi = F2FS_I(inode); if (!f2fs_is_atomic_file(inode)) return; if (clean) truncate_inode_pages_final(inode->i_mapping); release_atomic_write_cnt(inode); clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_REPLACE); clear_inode_flag(inode, FI_ATOMIC_FILE); if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { clear_inode_flag(inode, FI_ATOMIC_DIRTIED); /* * The vfs inode keeps clean during commit, but the f2fs inode * doesn't. So clear the dirty state after commit and let * f2fs_mark_inode_dirty_sync ensure a consistent dirty state. */ f2fs_inode_synced(inode); f2fs_mark_inode_dirty_sync(inode, true); } stat_dec_atomic_inode(inode); F2FS_I(inode)->atomic_write_task = NULL; if (clean) { f2fs_i_size_write(inode, fi->original_i_size); fi->original_i_size = 0; } /* avoid stale dirty inode during eviction */ sync_inode_metadata(inode, 0); } static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, block_t new_addr, block_t *old_addr, bool recover) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; struct node_info ni; int err; retry: set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) { if (err == -ENOMEM) { f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); goto retry; } return err; } err = f2fs_get_node_info(sbi, dn.nid, &ni, false); if (err) { f2fs_put_dnode(&dn); return err; } if (recover) { /* dn.data_blkaddr is always valid */ if (!__is_valid_data_blkaddr(new_addr)) { if (new_addr == NULL_ADDR) dec_valid_block_count(sbi, inode, 1); f2fs_invalidate_blocks(sbi, dn.data_blkaddr, 1); f2fs_update_data_blkaddr(&dn, new_addr); } else { f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr, ni.version, true, true); } } else { blkcnt_t count = 1; err = inc_valid_block_count(sbi, inode, &count, true); if (err) { f2fs_put_dnode(&dn); return err; } *old_addr = dn.data_blkaddr; f2fs_truncate_data_blocks_range(&dn, 1); dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count); f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr, ni.version, true, false); } f2fs_put_dnode(&dn); trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode, index, old_addr ? *old_addr : 0, new_addr, recover); return 0; } static void __complete_revoke_list(struct inode *inode, struct list_head *head, bool revoke) { struct revoke_entry *cur, *tmp; pgoff_t start_index = 0; bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE); list_for_each_entry_safe(cur, tmp, head, list) { if (revoke) { __replace_atomic_write_block(inode, cur->index, cur->old_addr, NULL, true); } else if (truncate) { f2fs_truncate_hole(inode, start_index, cur->index); start_index = cur->index + 1; } list_del(&cur->list); kmem_cache_free(revoke_entry_slab, cur); } if (!revoke && truncate) f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false); } static int __f2fs_commit_atomic_write(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct inode *cow_inode = fi->cow_inode; struct revoke_entry *new; struct list_head revoke_list; block_t blkaddr; struct dnode_of_data dn; pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); pgoff_t off = 0, blen, index; int ret = 0, i; INIT_LIST_HEAD(&revoke_list); while (len) { blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len); set_new_dnode(&dn, cow_inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); if (ret && ret != -ENOENT) { goto out; } else if (ret == -ENOENT) { ret = 0; if (dn.max_level == 0) goto out; goto next; } blen = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, cow_inode), len); index = off; for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) { blkaddr = f2fs_data_blkaddr(&dn); if (!__is_valid_data_blkaddr(blkaddr)) { continue; } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); ret = -EFSCORRUPTED; goto out; } new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS, true, NULL); ret = __replace_atomic_write_block(inode, index, blkaddr, &new->old_addr, false); if (ret) { f2fs_put_dnode(&dn); kmem_cache_free(revoke_entry_slab, new); goto out; } f2fs_update_data_blkaddr(&dn, NULL_ADDR); new->index = index; list_add_tail(&new->list, &revoke_list); } f2fs_put_dnode(&dn); next: off += blen; len -= blen; } out: if (time_to_inject(sbi, FAULT_TIMEOUT)) f2fs_io_schedule_timeout_killable(DEFAULT_FAULT_TIMEOUT); if (ret) { sbi->revoked_atomic_block += fi->atomic_write_cnt; } else { sbi->committed_atomic_block += fi->atomic_write_cnt; set_inode_flag(inode, FI_ATOMIC_COMMITTED); /* * inode may has no FI_ATOMIC_DIRTIED flag due to no write * before commit. */ if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { /* clear atomic dirty status and set vfs dirty status */ clear_inode_flag(inode, FI_ATOMIC_DIRTIED); f2fs_mark_inode_dirty_sync(inode, true); } } __complete_revoke_list(inode, &revoke_list, ret ? true : false); return ret; } int f2fs_commit_atomic_write(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); int err; err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (err) return err; f2fs_down_write(&fi->i_gc_rwsem[WRITE]); f2fs_lock_op(sbi); err = __f2fs_commit_atomic_write(inode); f2fs_unlock_op(sbi); f2fs_up_write(&fi->i_gc_rwsem[WRITE]); return err; } /* * This function balances dirty node and dentry pages. * In addition, it controls garbage collection. */ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (f2fs_cp_error(sbi)) return; if (time_to_inject(sbi, FAULT_CHECKPOINT)) f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT); /* balance_fs_bg is able to be pending */ if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi, false); if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return; /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. */ if (has_enough_free_secs(sbi, 0, 0)) return; if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && sbi->gc_thread->f2fs_gc_task) { DEFINE_WAIT(wait); prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, TASK_UNINTERRUPTIBLE); wake_up(&sbi->gc_thread->gc_wait_queue_head); io_schedule(); finish_wait(&sbi->gc_thread->fggc_wq, &wait); } else { struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO, .init_gc_type = BG_GC, .no_bg_gc = true, .should_migrate_blocks = false, .err_gc_skipped = false, .nr_free_secs = 1 }; f2fs_down_write(&sbi->gc_lock); stat_inc_gc_call_count(sbi, FOREGROUND); f2fs_gc(sbi, &gc_control); } } static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi) { int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2; unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS); unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA); unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES); unsigned int meta = get_pages(sbi, F2FS_DIRTY_META); unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA); unsigned int threshold = SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD)); unsigned int global_threshold = threshold * 3 / 2; if (dents >= threshold || qdata >= threshold || nodes >= threshold || meta >= threshold || imeta >= threshold) return true; return dents + qdata + nodes + meta + imeta > global_threshold; } void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) { if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return; /* try to shrink extent cache when there is no enough memory */ if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE)) f2fs_shrink_read_extent_tree(sbi, READ_EXTENT_CACHE_SHRINK_NUMBER); /* try to shrink age extent cache when there is no enough memory */ if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE)) f2fs_shrink_age_extent_tree(sbi, AGE_EXTENT_CACHE_SHRINK_NUMBER); /* check the # of cached NAT entries */ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); if (!f2fs_available_free_memory(sbi, FREE_NIDS)) f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS); else f2fs_build_free_nids(sbi, false, false); if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) || excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi)) goto do_sync; /* there is background inflight IO or foreground operation recently */ if (is_inflight_io(sbi, REQ_TIME) || (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem))) return; /* exceed periodical checkpoint timeout threshold */ if (f2fs_time_over(sbi, CP_TIME)) goto do_sync; /* checkpoint is the only way to shrink partial cached entries */ if (f2fs_available_free_memory(sbi, NAT_ENTRIES) && f2fs_available_free_memory(sbi, INO_ENTRIES)) return; do_sync: if (test_opt(sbi, DATA_FLUSH) && from_bg) { struct blk_plug plug; mutex_lock(&sbi->flush_lock); blk_start_plug(&plug); f2fs_sync_dirty_inodes(sbi, FILE_INODE, false); blk_finish_plug(&plug); mutex_unlock(&sbi->flush_lock); } stat_inc_cp_call_count(sbi, BACKGROUND); f2fs_sync_fs(sbi->sb, 1); } static int __submit_flush_wait(struct f2fs_sb_info *sbi, struct block_device *bdev) { int ret = blkdev_issue_flush(bdev); trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER), test_opt(sbi, FLUSH_MERGE), ret); if (!ret) f2fs_update_iostat(sbi, NULL, FS_FLUSH_IO, 0); return ret; } static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) { int ret = 0; int i; if (!f2fs_is_multi_device(sbi)) return __submit_flush_wait(sbi, sbi->sb->s_bdev); for (i = 0; i < sbi->s_ndevs; i++) { if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO)) continue; ret = __submit_flush_wait(sbi, FDEV(i).bdev); if (ret) break; } return ret; } static int issue_flush_thread(void *data) { struct f2fs_sb_info *sbi = data; struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; wait_queue_head_t *q = &fcc->flush_wait_queue; repeat: if (kthread_should_stop()) return 0; if (!llist_empty(&fcc->issue_list)) { struct flush_cmd *cmd, *next; int ret; fcc->dispatch_list = llist_del_all(&fcc->issue_list); fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode); ret = submit_flush_wait(sbi, cmd->ino); atomic_inc(&fcc->issued_flush); llist_for_each_entry_safe(cmd, next, fcc->dispatch_list, llnode) { cmd->ret = ret; complete(&cmd->wait); } fcc->dispatch_list = NULL; } wait_event_interruptible(*q, kthread_should_stop() || !llist_empty(&fcc->issue_list)); goto repeat; } int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; struct flush_cmd cmd; int ret; if (test_opt(sbi, NOBARRIER)) return 0; if (!test_opt(sbi, FLUSH_MERGE)) { atomic_inc(&fcc->queued_flush); ret = submit_flush_wait(sbi, ino); atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; } if (atomic_inc_return(&fcc->queued_flush) == 1 || f2fs_is_multi_device(sbi)) { ret = submit_flush_wait(sbi, ino); atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; } cmd.ino = ino; init_completion(&cmd.wait); llist_add(&cmd.llnode, &fcc->issue_list); /* * update issue_list before we wake up issue_flush thread, this * smp_mb() pairs with another barrier in ___wait_event(), see * more details in comments of waitqueue_active(). */ smp_mb(); if (waitqueue_active(&fcc->flush_wait_queue)) wake_up(&fcc->flush_wait_queue); if (fcc->f2fs_issue_flush) { wait_for_completion(&cmd.wait); atomic_dec(&fcc->queued_flush); } else { struct llist_node *list; list = llist_del_all(&fcc->issue_list); if (!list) { wait_for_completion(&cmd.wait); atomic_dec(&fcc->queued_flush); } else { struct flush_cmd *tmp, *next; ret = submit_flush_wait(sbi, ino); llist_for_each_entry_safe(tmp, next, list, llnode) { if (tmp == &cmd) { cmd.ret = ret; atomic_dec(&fcc->queued_flush); continue; } tmp->ret = ret; complete(&tmp->wait); } } } return cmd.ret; } int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct flush_cmd_control *fcc; if (SM_I(sbi)->fcc_info) { fcc = SM_I(sbi)->fcc_info; if (fcc->f2fs_issue_flush) return 0; goto init_thread; } fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; atomic_set(&fcc->issued_flush, 0); atomic_set(&fcc->queued_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; if (!test_opt(sbi, FLUSH_MERGE)) return 0; init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { int err = PTR_ERR(fcc->f2fs_issue_flush); fcc->f2fs_issue_flush = NULL; return err; } return 0; } void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; if (fcc && fcc->f2fs_issue_flush) { struct task_struct *flush_thread = fcc->f2fs_issue_flush; fcc->f2fs_issue_flush = NULL; kthread_stop(flush_thread); } if (free) { kfree(fcc); SM_I(sbi)->fcc_info = NULL; } } int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) { int ret = 0, i; if (!f2fs_is_multi_device(sbi)) return 0; if (test_opt(sbi, NOBARRIER)) return 0; for (i = 1; i < sbi->s_ndevs; i++) { int count = DEFAULT_RETRY_IO_COUNT; if (!f2fs_test_bit(i, (char *)&sbi->dirty_device)) continue; do { ret = __submit_flush_wait(sbi, FDEV(i).bdev); if (ret) f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); } while (ret && --count); if (ret) { f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FLUSH_FAIL); break; } spin_lock(&sbi->dev_lock); f2fs_clear_bit(i, (char *)&sbi->dirty_device); spin_unlock(&sbi->dev_lock); } return ret; } static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); /* need not be added */ if (IS_CURSEG(sbi, segno)) return; if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) dirty_i->nr_dirty[dirty_type]++; if (dirty_type == DIRTY) { struct seg_entry *sentry = get_seg_entry(sbi, segno); enum dirty_type t = sentry->type; if (unlikely(t >= DIRTY)) { f2fs_bug_on(sbi, 1); return; } if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]++; if (__is_large_section(sbi)) { unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); block_t valid_blocks = get_valid_blocks(sbi, segno, true); f2fs_bug_on(sbi, (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && !valid_blocks) || valid_blocks == CAP_BLKS_PER_SEC(sbi)); if (!IS_CURSEC(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); } } } static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); block_t valid_blocks; if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) dirty_i->nr_dirty[dirty_type]--; if (dirty_type == DIRTY) { struct seg_entry *sentry = get_seg_entry(sbi, segno); enum dirty_type t = sentry->type; if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]--; valid_blocks = get_valid_blocks(sbi, segno, true); if (valid_blocks == 0) { clear_bit(GET_SEC_FROM_SEG(sbi, segno), dirty_i->victim_secmap); #ifdef CONFIG_F2FS_CHECK_FS clear_bit(segno, SIT_I(sbi)->invalid_segmap); #endif } if (__is_large_section(sbi)) { unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi)) { clear_bit(secno, dirty_i->dirty_secmap); return; } if (!IS_CURSEC(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); } } } /* * Should not occur error such as -ENOMEM. * Adding dirty entry into seglist is not critical operation. * If a given segment is one of current working segments, it won't be added. */ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned short valid_blocks, ckpt_valid_blocks; unsigned int usable_blocks; if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) return; usable_blocks = f2fs_usable_blks_in_seg(sbi, segno); mutex_lock(&dirty_i->seglist_lock); valid_blocks = get_valid_blocks(sbi, segno, false); ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false); if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) || ckpt_valid_blocks == usable_blocks)) { __locate_dirty_segment(sbi, segno, PRE); __remove_dirty_segment(sbi, segno, DIRTY); } else if (valid_blocks < usable_blocks) { __locate_dirty_segment(sbi, segno, DIRTY); } else { /* Recovery routine with SSR needs this */ __remove_dirty_segment(sbi, segno, DIRTY); } mutex_unlock(&dirty_i->seglist_lock); } /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int segno; mutex_lock(&dirty_i->seglist_lock); for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { if (get_valid_blocks(sbi, segno, false)) continue; if (IS_CURSEG(sbi, segno)) continue; __locate_dirty_segment(sbi, segno, PRE); __remove_dirty_segment(sbi, segno, DIRTY); } mutex_unlock(&dirty_i->seglist_lock); } block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) { int ovp_hole_segs = (overprovision_segments(sbi) - reserved_segments(sbi)); block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs); struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); block_t holes[2] = {0, 0}; /* DATA and NODE */ block_t unusable; struct seg_entry *se; unsigned int segno; mutex_lock(&dirty_i->seglist_lock); for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { se = get_seg_entry(sbi, segno); if (IS_NODESEG(se->type)) holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) - se->valid_blocks; else holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) - se->valid_blocks; } mutex_unlock(&dirty_i->seglist_lock); unusable = max(holes[DATA], holes[NODE]); if (unusable > ovp_holes) return unusable - ovp_holes; return 0; } int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) { int ovp_hole_segs = (overprovision_segments(sbi) - reserved_segments(sbi)); if (F2FS_OPTION(sbi).unusable_cap_perc == 100) return 0; if (unusable > F2FS_OPTION(sbi).unusable_cap) return -EAGAIN; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && dirty_segments(sbi) > ovp_hole_segs) return -EAGAIN; if (has_not_enough_free_secs(sbi, 0, 0)) return -EAGAIN; return 0; } /* This is only used by SBI_CP_DISABLED */ static unsigned int get_free_segment(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int segno = 0; mutex_lock(&dirty_i->seglist_lock); for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { if (get_valid_blocks(sbi, segno, false)) continue; if (get_ckpt_valid_blocks(sbi, segno, false)) continue; mutex_unlock(&dirty_i->seglist_lock); return segno; } mutex_unlock(&dirty_i->seglist_lock); return NULL_SEGNO; } static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc; f2fs_bug_on(sbi, !len); pend_list = &dcc->pend_list[plist_idx(len)]; dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL); INIT_LIST_HEAD(&dc->list); dc->bdev = bdev; dc->di.lstart = lstart; dc->di.start = start; dc->di.len = len; dc->ref = 0; dc->state = D_PREP; dc->queued = 0; dc->error = 0; init_completion(&dc->wait); list_add_tail(&dc->list, pend_list); spin_lock_init(&dc->lock); dc->bio_ref = 0; atomic_inc(&dcc->discard_cmd_cnt); dcc->undiscard_blks += len; return dc; } static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi) { #ifdef CONFIG_F2FS_CHECK_FS struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct rb_node *cur = rb_first_cached(&dcc->root), *next; struct discard_cmd *cur_dc, *next_dc; while (cur) { next = rb_next(cur); if (!next) return true; cur_dc = rb_entry(cur, struct discard_cmd, rb_node); next_dc = rb_entry(next, struct discard_cmd, rb_node); if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) { f2fs_info(sbi, "broken discard_rbtree, " "cur(%u, %u) next(%u, %u)", cur_dc->di.lstart, cur_dc->di.len, next_dc->di.lstart, next_dc->di.len); return false; } cur = next; } #endif return true; } static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi, block_t blkaddr) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct rb_node *node = dcc->root.rb_root.rb_node; struct discard_cmd *dc; while (node) { dc = rb_entry(node, struct discard_cmd, rb_node); if (blkaddr < dc->di.lstart) node = node->rb_left; else if (blkaddr >= dc->di.lstart + dc->di.len) node = node->rb_right; else return dc; } return NULL; } static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root, block_t blkaddr, struct discard_cmd **prev_entry, struct discard_cmd **next_entry, struct rb_node ***insert_p, struct rb_node **insert_parent) { struct rb_node **pnode = &root->rb_root.rb_node; struct rb_node *parent = NULL, *tmp_node; struct discard_cmd *dc; *insert_p = NULL; *insert_parent = NULL; *prev_entry = NULL; *next_entry = NULL; if (RB_EMPTY_ROOT(&root->rb_root)) return NULL; while (*pnode) { parent = *pnode; dc = rb_entry(*pnode, struct discard_cmd, rb_node); if (blkaddr < dc->di.lstart) pnode = &(*pnode)->rb_left; else if (blkaddr >= dc->di.lstart + dc->di.len) pnode = &(*pnode)->rb_right; else goto lookup_neighbors; } *insert_p = pnode; *insert_parent = parent; dc = rb_entry(parent, struct discard_cmd, rb_node); tmp_node = parent; if (parent && blkaddr > dc->di.lstart) tmp_node = rb_next(parent); *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node); tmp_node = parent; if (parent && blkaddr < dc->di.lstart) tmp_node = rb_prev(parent); *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node); return NULL; lookup_neighbors: /* lookup prev node for merging backward later */ tmp_node = rb_prev(&dc->rb_node); *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node); /* lookup next node for merging frontward later */ tmp_node = rb_next(&dc->rb_node); *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node); return dc; } static void __detach_discard_cmd(struct discard_cmd_control *dcc, struct discard_cmd *dc) { if (dc->state == D_DONE) atomic_sub(dc->queued, &dcc->queued_discard); list_del(&dc->list); rb_erase_cached(&dc->rb_node, &dcc->root); dcc->undiscard_blks -= dc->di.len; kmem_cache_free(discard_cmd_slab, dc); atomic_dec(&dcc->discard_cmd_cnt); } static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; unsigned long flags; trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len); spin_lock_irqsave(&dc->lock, flags); if (dc->bio_ref) { spin_unlock_irqrestore(&dc->lock, flags); return; } spin_unlock_irqrestore(&dc->lock, flags); f2fs_bug_on(sbi, dc->ref); if (dc->error == -EOPNOTSUPP) dc->error = 0; if (dc->error) f2fs_info_ratelimited(sbi, "Issue discard(%u, %u, %u) failed, ret: %d", dc->di.lstart, dc->di.start, dc->di.len, dc->error); __detach_discard_cmd(dcc, dc); } static void f2fs_submit_discard_endio(struct bio *bio) { struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; unsigned long flags; spin_lock_irqsave(&dc->lock, flags); if (!dc->error) dc->error = blk_status_to_errno(bio->bi_status); dc->bio_ref--; if (!dc->bio_ref && dc->state == D_SUBMIT) { dc->state = D_DONE; complete_all(&dc->wait); } spin_unlock_irqrestore(&dc->lock, flags); bio_put(bio); } static void __check_sit_bitmap(struct f2fs_sb_info *sbi, block_t start, block_t end) { #ifdef CONFIG_F2FS_CHECK_FS struct seg_entry *sentry; unsigned int segno; block_t blk = start; unsigned long offset, size, *map; while (blk < end) { segno = GET_SEGNO(sbi, blk); sentry = get_seg_entry(sbi, segno); offset = GET_BLKOFF_FROM_SEG0(sbi, blk); if (end < START_BLOCK(sbi, segno + 1)) size = GET_BLKOFF_FROM_SEG0(sbi, end); else size = BLKS_PER_SEG(sbi); map = (unsigned long *)(sentry->cur_valid_map); offset = __find_rev_next_bit(map, size, offset); f2fs_bug_on(sbi, offset != size); blk = START_BLOCK(sbi, segno + 1); } #endif } static void __init_discard_policy(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, int discard_type, unsigned int granularity) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; /* common policy */ dpolicy->type = discard_type; dpolicy->sync = true; dpolicy->ordered = false; dpolicy->granularity = granularity; dpolicy->max_requests = dcc->max_discard_request; dpolicy->io_aware_gran = dcc->discard_io_aware_gran; dpolicy->timeout = false; if (discard_type == DPOLICY_BG) { dpolicy->min_interval = dcc->min_discard_issue_time; dpolicy->mid_interval = dcc->mid_discard_issue_time; dpolicy->max_interval = dcc->max_discard_issue_time; if (dcc->discard_io_aware == DPOLICY_IO_AWARE_ENABLE) dpolicy->io_aware = true; else if (dcc->discard_io_aware == DPOLICY_IO_AWARE_DISABLE) dpolicy->io_aware = false; dpolicy->sync = false; dpolicy->ordered = true; if (utilization(sbi) > dcc->discard_urgent_util) { dpolicy->granularity = MIN_DISCARD_GRANULARITY; if (atomic_read(&dcc->discard_cmd_cnt)) dpolicy->max_interval = dcc->min_discard_issue_time; } } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = dcc->min_discard_issue_time; dpolicy->mid_interval = dcc->mid_discard_issue_time; dpolicy->max_interval = dcc->max_discard_issue_time; dpolicy->io_aware = false; } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { dpolicy->io_aware = false; /* we need to issue all to keep CP_TRIMMED_FLAG */ dpolicy->granularity = MIN_DISCARD_GRANULARITY; dpolicy->timeout = true; } } static void __update_discard_tree_range(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len); #ifdef CONFIG_BLK_DEV_ZONED static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc, blk_opf_t flag, struct list_head *wait_list, unsigned int *issued) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct block_device *bdev = dc->bdev; struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS); unsigned long flags; trace_f2fs_issue_reset_zone(bdev, dc->di.start); spin_lock_irqsave(&dc->lock, flags); dc->state = D_SUBMIT; dc->bio_ref++; spin_unlock_irqrestore(&dc->lock, flags); if (issued) (*issued)++; atomic_inc(&dcc->queued_discard); dc->queued++; list_move_tail(&dc->list, wait_list); /* sanity check on discard range */ __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len); bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start); bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; submit_bio(bio); atomic_inc(&dcc->issued_discard); f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE); } #endif /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, struct discard_cmd *dc, int *issued) { struct block_device *bdev = dc->bdev; unsigned int max_discard_blocks = SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev)); struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? &(dcc->fstrim_list) : &(dcc->wait_list); blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0; block_t lstart, start, len, total_len; int err = 0; if (dc->state != D_PREP) return 0; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) return 0; #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) { int devi = f2fs_bdev_index(sbi, bdev); if (devi < 0) return -EINVAL; if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) { __submit_zone_reset_cmd(sbi, dc, flag, wait_list, issued); return 0; } } #endif /* * stop issuing discard for any of below cases: * 1. device is conventional zone, but it doesn't support discard. * 2. device is regulare device, after snapshot it doesn't support * discard. */ if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len); lstart = dc->di.lstart; start = dc->di.start; len = dc->di.len; total_len = len; dc->di.len = 0; while (total_len && *issued < dpolicy->max_requests && !err) { struct bio *bio = NULL; unsigned long flags; bool last = true; if (len > max_discard_blocks) { len = max_discard_blocks; last = false; } (*issued)++; if (*issued == dpolicy->max_requests) last = true; dc->di.len += len; if (time_to_inject(sbi, FAULT_DISCARD)) { err = -EIO; } else { err = __blkdev_issue_discard(bdev, SECTOR_FROM_BLOCK(start), SECTOR_FROM_BLOCK(len), GFP_NOFS, &bio); } if (err) { spin_lock_irqsave(&dc->lock, flags); if (dc->state == D_PARTIAL) dc->state = D_SUBMIT; spin_unlock_irqrestore(&dc->lock, flags); break; } f2fs_bug_on(sbi, !bio); /* * should keep before submission to avoid D_DONE * right away */ spin_lock_irqsave(&dc->lock, flags); if (last) dc->state = D_SUBMIT; else dc->state = D_PARTIAL; dc->bio_ref++; spin_unlock_irqrestore(&dc->lock, flags); atomic_inc(&dcc->queued_discard); dc->queued++; list_move_tail(&dc->list, wait_list); /* sanity check on discard range */ __check_sit_bitmap(sbi, lstart, lstart + len); bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; bio->bi_opf |= flag; submit_bio(bio); atomic_inc(&dcc->issued_discard); f2fs_update_iostat(sbi, NULL, FS_DISCARD_IO, len * F2FS_BLKSIZE); lstart += len; start += len; total_len -= len; len = total_len; } if (!err && len) { dcc->undiscard_blks -= len; __update_discard_tree_range(sbi, bdev, lstart, start, len); } return err; } static void __insert_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct rb_node **p = &dcc->root.rb_root.rb_node; struct rb_node *parent = NULL; struct discard_cmd *dc; bool leftmost = true; /* look up rb tree to find parent node */ while (*p) { parent = *p; dc = rb_entry(parent, struct discard_cmd, rb_node); if (lstart < dc->di.lstart) { p = &(*p)->rb_left; } else if (lstart >= dc->di.lstart + dc->di.len) { p = &(*p)->rb_right; leftmost = false; } else { /* Let's skip to add, if exists */ return; } } dc = __create_discard_cmd(sbi, bdev, lstart, start, len); rb_link_node(&dc->rb_node, parent, p); rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost); } static void __relocate_discard_cmd(struct discard_cmd_control *dcc, struct discard_cmd *dc) { list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]); } static void __punch_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc, block_t blkaddr) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_info di = dc->di; bool modified = false; if (dc->state == D_DONE || dc->di.len == 1) { __remove_discard_cmd(sbi, dc); return; } dcc->undiscard_blks -= di.len; if (blkaddr > di.lstart) { dc->di.len = blkaddr - dc->di.lstart; dcc->undiscard_blks += dc->di.len; __relocate_discard_cmd(dcc, dc); modified = true; } if (blkaddr < di.lstart + di.len - 1) { if (modified) { __insert_discard_cmd(sbi, dc->bdev, blkaddr + 1, di.start + blkaddr + 1 - di.lstart, di.lstart + di.len - 1 - blkaddr); } else { dc->di.lstart++; dc->di.len--; dc->di.start++; dcc->undiscard_blks += dc->di.len; __relocate_discard_cmd(dcc, dc); } } } static void __update_discard_tree_range(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *prev_dc = NULL, *next_dc = NULL; struct discard_cmd *dc; struct discard_info di = {0}; struct rb_node **insert_p = NULL, *insert_parent = NULL; unsigned int max_discard_blocks = SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev)); block_t end = lstart + len; dc = __lookup_discard_cmd_ret(&dcc->root, lstart, &prev_dc, &next_dc, &insert_p, &insert_parent); if (dc) prev_dc = dc; if (!prev_dc) { di.lstart = lstart; di.len = next_dc ? next_dc->di.lstart - lstart : len; di.len = min(di.len, len); di.start = start; } while (1) { struct rb_node *node; bool merged = false; struct discard_cmd *tdc = NULL; if (prev_dc) { di.lstart = prev_dc->di.lstart + prev_dc->di.len; if (di.lstart < lstart) di.lstart = lstart; if (di.lstart >= end) break; if (!next_dc || next_dc->di.lstart > end) di.len = end - di.lstart; else di.len = next_dc->di.lstart - di.lstart; di.start = start + di.lstart - lstart; } if (!di.len) goto next; if (prev_dc && prev_dc->state == D_PREP && prev_dc->bdev == bdev && __is_discard_back_mergeable(&di, &prev_dc->di, max_discard_blocks)) { prev_dc->di.len += di.len; dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, prev_dc); di = prev_dc->di; tdc = prev_dc; merged = true; } if (next_dc && next_dc->state == D_PREP && next_dc->bdev == bdev && __is_discard_front_mergeable(&di, &next_dc->di, max_discard_blocks)) { next_dc->di.lstart = di.lstart; next_dc->di.len += di.len; next_dc->di.start = di.start; dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, next_dc); if (tdc) __remove_discard_cmd(sbi, tdc); merged = true; } if (!merged) __insert_discard_cmd(sbi, bdev, di.lstart, di.start, di.len); next: prev_dc = next_dc; if (!prev_dc) break; node = rb_next(&prev_dc->rb_node); next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); } } #ifdef CONFIG_BLK_DEV_ZONED static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t lblkstart, block_t blklen) { trace_f2fs_queue_reset_zone(bdev, blkstart); mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen); mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); } #endif static void __queue_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { block_t lblkstart = blkstart; if (!f2fs_bdev_support_discard(bdev)) return; trace_f2fs_queue_discard(bdev, blkstart, blklen); if (f2fs_is_multi_device(sbi)) { int devi = f2fs_target_device_index(sbi, blkstart); blkstart -= FDEV(devi).start_blk; } mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); } static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, int *issued) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *prev_dc = NULL, *next_dc = NULL; struct rb_node **insert_p = NULL, *insert_parent = NULL; struct discard_cmd *dc; struct blk_plug plug; bool io_interrupted = false; mutex_lock(&dcc->cmd_lock); dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos, &prev_dc, &next_dc, &insert_p, &insert_parent); if (!dc) dc = next_dc; blk_start_plug(&plug); while (dc) { struct rb_node *node; int err = 0; if (dc->state != D_PREP) goto next; if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; break; } dcc->next_pos = dc->di.lstart + dc->di.len; err = __submit_discard_cmd(sbi, dpolicy, dc, issued); if (*issued >= dpolicy->max_requests) break; next: node = rb_next(&dc->rb_node); if (err) __remove_discard_cmd(sbi, dc); dc = rb_entry_safe(node, struct discard_cmd, rb_node); } blk_finish_plug(&plug); if (!dc) dcc->next_pos = 0; mutex_unlock(&dcc->cmd_lock); if (!(*issued) && io_interrupted) *issued = -1; } static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy); static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; int i, issued; bool io_interrupted = false; if (dpolicy->timeout) f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); retry: issued = 0; for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { if (dpolicy->timeout && f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) break; if (i + 1 < dpolicy->granularity) break; if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) { __issue_discard_cmd_orderly(sbi, dpolicy, &issued); return issued; } pend_list = &dcc->pend_list[i]; mutex_lock(&dcc->cmd_lock); if (list_empty(pend_list)) goto next; if (unlikely(dcc->rbtree_check)) f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi)); blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); if (dpolicy->timeout && f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) break; if (dpolicy->io_aware && i < dpolicy->io_aware_gran && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; break; } __submit_discard_cmd(sbi, dpolicy, dc, &issued); if (issued >= dpolicy->max_requests) break; } blk_finish_plug(&plug); next: mutex_unlock(&dcc->cmd_lock); if (issued >= dpolicy->max_requests || io_interrupted) break; } if (dpolicy->type == DPOLICY_UMOUNT && issued) { __wait_all_discard_cmd(sbi, dpolicy); goto retry; } if (!issued && io_interrupted) issued = -1; return issued; } static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; int i; bool dropped = false; mutex_lock(&dcc->cmd_lock); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { pend_list = &dcc->pend_list[i]; list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); __remove_discard_cmd(sbi, dc); dropped = true; } } mutex_unlock(&dcc->cmd_lock); return dropped; } void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi) { __drop_discard_cmd(sbi); } static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; unsigned int len = 0; wait_for_completion_io(&dc->wait); mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, dc->state != D_DONE); dc->ref--; if (!dc->ref) { if (!dc->error) len = dc->di.len; __remove_discard_cmd(sbi, dc); } mutex_unlock(&dcc->cmd_lock); return len; } static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, block_t start, block_t end) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? &(dcc->fstrim_list) : &(dcc->wait_list); struct discard_cmd *dc = NULL, *iter, *tmp; unsigned int trimmed = 0; next: dc = NULL; mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(iter, tmp, wait_list, list) { if (iter->di.lstart + iter->di.len <= start || end <= iter->di.lstart) continue; if (iter->di.len < dpolicy->granularity) continue; if (iter->state == D_DONE && !iter->ref) { wait_for_completion_io(&iter->wait); if (!iter->error) trimmed += iter->di.len; __remove_discard_cmd(sbi, iter); } else { iter->ref++; dc = iter; break; } } mutex_unlock(&dcc->cmd_lock); if (dc) { trimmed += __wait_one_discard_bio(sbi, dc); goto next; } return trimmed; } static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) { struct discard_policy dp; unsigned int discard_blks; if (dpolicy) return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); /* wait all */ __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, MIN_DISCARD_GRANULARITY); discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, MIN_DISCARD_GRANULARITY); discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); return discard_blks; } /* This should be covered by global mutex, &sit_i->sentry_lock */ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *dc; bool need_wait = false; mutex_lock(&dcc->cmd_lock); dc = __lookup_discard_cmd(sbi, blkaddr); #ifdef CONFIG_BLK_DEV_ZONED if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) { int devi = f2fs_bdev_index(sbi, dc->bdev); if (devi < 0) { mutex_unlock(&dcc->cmd_lock); return; } if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) { /* force submit zone reset */ if (dc->state == D_PREP) __submit_zone_reset_cmd(sbi, dc, REQ_SYNC, &dcc->wait_list, NULL); dc->ref++; mutex_unlock(&dcc->cmd_lock); /* wait zone reset */ __wait_one_discard_bio(sbi, dc); return; } } #endif if (dc) { if (dc->state == D_PREP) { __punch_discard_cmd(sbi, dc, blkaddr); } else { dc->ref++; need_wait = true; } } mutex_unlock(&dcc->cmd_lock); if (need_wait) __wait_one_discard_bio(sbi, dc); } void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; if (dcc && dcc->f2fs_issue_discard) { struct task_struct *discard_thread = dcc->f2fs_issue_discard; dcc->f2fs_issue_discard = NULL; kthread_stop(discard_thread); } } /** * f2fs_issue_discard_timeout() - Issue all discard cmd within UMOUNT_DISCARD_TIMEOUT * @sbi: the f2fs_sb_info data for discard cmd to issue * * When UMOUNT_DISCARD_TIMEOUT is exceeded, all remaining discard commands will be dropped * * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false. */ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_policy dpolicy; bool dropped; if (!atomic_read(&dcc->discard_cmd_cnt)) return true; __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); /* just to make sure there is no pending discard commands */ __wait_all_discard_cmd(sbi, NULL); f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt)); return !dropped; } static int issue_discard_thread(void *data) { struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; struct discard_policy dpolicy; unsigned int wait_ms = dcc->min_discard_issue_time; int issued; set_freezable(); do { wait_event_freezable_timeout(*q, kthread_should_stop() || dcc->discard_wake, msecs_to_jiffies(wait_ms)); if (sbi->gc_mode == GC_URGENT_HIGH || !f2fs_available_free_memory(sbi, DISCARD_CACHE)) __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, MIN_DISCARD_GRANULARITY); else __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, dcc->discard_granularity); if (dcc->discard_wake) dcc->discard_wake = false; /* clean up pending candidates before going to sleep */ if (atomic_read(&dcc->queued_discard)) __wait_all_discard_cmd(sbi, NULL); if (f2fs_readonly(sbi->sb)) continue; if (kthread_should_stop()) return 0; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || !atomic_read(&dcc->discard_cmd_cnt)) { wait_ms = dpolicy.max_interval; continue; } sb_start_intwrite(sbi->sb); issued = __issue_discard_cmd(sbi, &dpolicy); if (issued > 0) { __wait_all_discard_cmd(sbi, &dpolicy); wait_ms = dpolicy.min_interval; } else if (issued == -1) { wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME); if (!wait_ms) wait_ms = dpolicy.mid_interval; } else { wait_ms = dpolicy.max_interval; } if (!atomic_read(&dcc->discard_cmd_cnt)) wait_ms = dpolicy.max_interval; sb_end_intwrite(sbi->sb); } while (!kthread_should_stop()); return 0; } #ifdef CONFIG_BLK_DEV_ZONED static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { sector_t sector, nr_sects; block_t lblkstart = blkstart; int devi = 0; u64 remainder = 0; if (f2fs_is_multi_device(sbi)) { devi = f2fs_target_device_index(sbi, blkstart); if (blkstart < FDEV(devi).start_blk || blkstart > FDEV(devi).end_blk) { f2fs_err(sbi, "Invalid block %x", blkstart); return -EIO; } blkstart -= FDEV(devi).start_blk; } /* For sequential zones, reset the zone write pointer */ if (f2fs_blkz_is_seq(sbi, devi, blkstart)) { sector = SECTOR_FROM_BLOCK(blkstart); nr_sects = SECTOR_FROM_BLOCK(blklen); div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder); if (remainder || nr_sects != bdev_zone_sectors(bdev)) { f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)", devi, sbi->s_ndevs ? FDEV(devi).path : "", blkstart, blklen); return -EIO; } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) { unsigned int nofs_flags; int ret; trace_f2fs_issue_reset_zone(bdev, blkstart); nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, sector, nr_sects); memalloc_nofs_restore(nofs_flags); return ret; } __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen); return 0; } /* For conventional zones, use regular discard if supported */ __queue_discard_cmd(sbi, bdev, lblkstart, blklen); return 0; } #endif static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif __queue_discard_cmd(sbi, bdev, blkstart, blklen); return 0; } static int f2fs_issue_discard(struct f2fs_sb_info *sbi, block_t blkstart, block_t blklen) { sector_t start = blkstart, len = 0; struct block_device *bdev; struct seg_entry *se; unsigned int offset; block_t i; int err = 0; bdev = f2fs_target_device(sbi, blkstart, NULL); for (i = blkstart; i < blkstart + blklen; i++, len++) { if (i != start) { struct block_device *bdev2 = f2fs_target_device(sbi, i, NULL); if (bdev2 != bdev) { err = __issue_discard_async(sbi, bdev, start, len); if (err) return err; bdev = bdev2; start = i; len = 0; } } se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); offset = GET_BLKOFF_FROM_SEG0(sbi, i); if (f2fs_block_unit_discard(sbi) && !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; } if (len) err = __issue_discard_async(sbi, bdev, start, len); return err; } static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, bool check_only) { int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); unsigned long *cur_map = (unsigned long *)se->cur_valid_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; unsigned long *discard_map = (unsigned long *)se->discard_map; unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; bool force = (cpc->reason & CP_DISCARD); struct discard_entry *de = NULL; struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; if (se->valid_blocks == BLKS_PER_SEG(sbi) || !f2fs_hw_support_discard(sbi) || !f2fs_block_unit_discard(sbi)) return false; if (!force) { if (!f2fs_realtime_discard_enable(sbi) || (!se->valid_blocks && !IS_CURSEG(sbi, cpc->trim_start)) || SM_I(sbi)->dcc_info->nr_discards >= SM_I(sbi)->dcc_info->max_discards) return false; } /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ for (i = 0; i < entries; i++) dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; while (force || SM_I(sbi)->dcc_info->nr_discards <= SM_I(sbi)->dcc_info->max_discards) { start = __find_rev_next_bit(dmap, BLKS_PER_SEG(sbi), end + 1); if (start >= BLKS_PER_SEG(sbi)) break; end = __find_rev_next_zero_bit(dmap, BLKS_PER_SEG(sbi), start + 1); if (force && start && end != BLKS_PER_SEG(sbi) && (end - start) < cpc->trim_minlen) continue; if (check_only) return true; if (!de) { de = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_F2FS_ZERO, true, NULL); de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start); list_add_tail(&de->list, head); } for (i = start; i < end; i++) __set_bit_le(i, (void *)de->discard_map); SM_I(sbi)->dcc_info->nr_discards += end - start; } return false; } static void release_discard_addr(struct discard_entry *entry) { list_del(&entry->list); kmem_cache_free(discard_entry_slab, entry); } void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi) { struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; /* drop caches */ list_for_each_entry_safe(entry, this, head, list) release_discard_addr(entry); } /* * Should call f2fs_clear_prefree_segments after checkpoint is done. */ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int segno; mutex_lock(&dirty_i->seglist_lock); for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) __set_test_and_free(sbi, segno, false); mutex_unlock(&dirty_i->seglist_lock); } void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *head = &dcc->entry_list; struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); bool section_alignment = F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION; if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) section_alignment = true; mutex_lock(&dirty_i->seglist_lock); while (1) { int i; if (section_alignment && end != -1) end--; start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); if (start >= MAIN_SEGS(sbi)) break; end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), start + 1); if (section_alignment) { start = rounddown(start, SEGS_PER_SEC(sbi)); end = roundup(end, SEGS_PER_SEC(sbi)); } for (i = start; i < end; i++) { if (test_and_clear_bit(i, prefree_map)) dirty_i->nr_dirty[PRE]--; } if (!f2fs_realtime_discard_enable(sbi)) continue; if (force && start >= cpc->trim_start && (end - 1) <= cpc->trim_end) continue; /* Should cover 2MB zoned device for zone-based reset */ if (!f2fs_sb_has_blkzoned(sbi) && (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), SEGS_TO_BLKS(sbi, end - start)); continue; } next: secno = GET_SEC_FROM_SEG(sbi, start); start_segno = GET_SEG_FROM_SEC(sbi, secno); if (!IS_CURSEC(sbi, secno) && !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), BLKS_PER_SEC(sbi)); start = start_segno + SEGS_PER_SEC(sbi); if (start < end) goto next; else end = start - 1; } mutex_unlock(&dirty_i->seglist_lock); if (!f2fs_block_unit_discard(sbi)) goto wakeup; /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { unsigned int cur_pos = 0, next_pos, len, total_len = 0; bool is_valid = test_bit_le(0, entry->discard_map); find_next: if (is_valid) { next_pos = find_next_zero_bit_le(entry->discard_map, BLKS_PER_SEG(sbi), cur_pos); len = next_pos - cur_pos; if (f2fs_sb_has_blkzoned(sbi) || (force && len < cpc->trim_minlen)) goto skip; f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, len); total_len += len; } else { next_pos = find_next_bit_le(entry->discard_map, BLKS_PER_SEG(sbi), cur_pos); } skip: cur_pos = next_pos; is_valid = !is_valid; if (cur_pos < BLKS_PER_SEG(sbi)) goto find_next; release_discard_addr(entry); dcc->nr_discards -= total_len; } wakeup: wake_up_discard_thread(sbi, false); } int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; int err = 0; if (f2fs_sb_has_readonly(sbi)) { f2fs_info(sbi, "Skip to start discard thread for readonly image"); return 0; } if (!f2fs_realtime_discard_enable(sbi)) return 0; dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(dcc->f2fs_issue_discard)) { err = PTR_ERR(dcc->f2fs_issue_discard); dcc->f2fs_issue_discard = NULL; } return err; } static int create_discard_cmd_control(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc; int err = 0, i; if (SM_I(sbi)->dcc_info) { dcc = SM_I(sbi)->dcc_info; goto init_thread; } dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL); if (!dcc) return -ENOMEM; dcc->discard_io_aware_gran = MAX_PLIST_NUM; dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY; dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE; if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT || F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) dcc->discard_granularity = BLKS_PER_SEG(sbi); INIT_LIST_HEAD(&dcc->entry_list); for (i = 0; i < MAX_PLIST_NUM; i++) INIT_LIST_HEAD(&dcc->pend_list[i]); INIT_LIST_HEAD(&dcc->wait_list); INIT_LIST_HEAD(&dcc->fstrim_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); atomic_set(&dcc->queued_discard, 0); atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi)); dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST; dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME; dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME; dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME; dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL; dcc->undiscard_blks = 0; dcc->next_pos = 0; dcc->root = RB_ROOT_CACHED; dcc->rbtree_check = false; init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; init_thread: err = f2fs_start_discard_thread(sbi); if (err) { kfree(dcc); SM_I(sbi)->dcc_info = NULL; } return err; } static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; if (!dcc) return; f2fs_stop_discard_thread(sbi); /* * Recovery can cache discard commands, so in error path of * fill_super(), it needs to give a chance to handle them. */ f2fs_issue_discard_timeout(sbi); kfree(dcc); SM_I(sbi)->dcc_info = NULL; } static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) { sit_i->dirty_sentries++; return false; } return true; } static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, unsigned int segno, int modified) { struct seg_entry *se = get_seg_entry(sbi, segno); se->type = type; if (modified) __mark_sit_entry_dirty(sbi, segno); } static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr) { unsigned int segno = GET_SEGNO(sbi, blkaddr); if (segno == NULL_SEGNO) return 0; return get_seg_entry(sbi, segno)->mtime; } static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, unsigned long long old_mtime) { struct seg_entry *se; unsigned int segno = GET_SEGNO(sbi, blkaddr); unsigned long long ctime = get_mtime(sbi, false); unsigned long long mtime = old_mtime ? old_mtime : ctime; if (segno == NULL_SEGNO) return; se = get_seg_entry(sbi, segno); if (!se->mtime) se->mtime = mtime; else se->mtime = div_u64(se->mtime * se->valid_blocks + mtime, se->valid_blocks + 1); if (ctime > SIT_I(sbi)->max_mtime) SIT_I(sbi)->max_mtime = ctime; } /* * NOTE: when updating multiple blocks at the same time, please ensure * that the consecutive input blocks belong to the same segment. */ static int update_sit_entry_for_release(struct f2fs_sb_info *sbi, struct seg_entry *se, unsigned int segno, block_t blkaddr, unsigned int offset, int del) { bool exist; #ifdef CONFIG_F2FS_CHECK_FS bool mir_exist; #endif int i; int del_count = -del; f2fs_bug_on(sbi, GET_SEGNO(sbi, blkaddr) != GET_SEGNO(sbi, blkaddr + del_count - 1)); for (i = 0; i < del_count; i++) { exist = f2fs_test_and_clear_bit(offset + i, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS mir_exist = f2fs_test_and_clear_bit(offset + i, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", blkaddr + i, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(!exist)) { f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", blkaddr + i); f2fs_bug_on(sbi, 1); se->valid_blocks++; del += 1; } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { /* * If checkpoints are off, we must not reuse data that * was used in the previous checkpoint. If it was used * before, we must track that to know how much space we * really have. */ if (f2fs_test_bit(offset + i, se->ckpt_valid_map)) { spin_lock(&sbi->stat_lock); sbi->unusable_block_count++; spin_unlock(&sbi->stat_lock); } } if (f2fs_block_unit_discard(sbi) && f2fs_test_and_clear_bit(offset + i, se->discard_map)) sbi->discard_blks++; if (!f2fs_test_bit(offset + i, se->ckpt_valid_map)) { se->ckpt_valid_blocks -= 1; if (__is_large_section(sbi)) get_sec_entry(sbi, segno)->ckpt_valid_blocks -= 1; } } if (__is_large_section(sbi)) sanity_check_valid_blocks(sbi, segno); return del; } static int update_sit_entry_for_alloc(struct f2fs_sb_info *sbi, struct seg_entry *se, unsigned int segno, block_t blkaddr, unsigned int offset, int del) { bool exist; #ifdef CONFIG_F2FS_CHECK_FS bool mir_exist; #endif exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS mir_exist = f2fs_test_and_set_bit(offset, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", blkaddr, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(exist)) { f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", blkaddr); f2fs_bug_on(sbi, 1); se->valid_blocks--; del = 0; } if (f2fs_block_unit_discard(sbi) && !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; /* * SSR should never reuse block which is checkpointed * or newly invalidated. */ if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) { se->ckpt_valid_blocks++; if (__is_large_section(sbi)) get_sec_entry(sbi, segno)->ckpt_valid_blocks++; } } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) { se->ckpt_valid_blocks += del; if (__is_large_section(sbi)) get_sec_entry(sbi, segno)->ckpt_valid_blocks += del; } if (__is_large_section(sbi)) sanity_check_valid_blocks(sbi, segno); return del; } /* * If releasing blocks, this function supports updating multiple consecutive blocks * at one time, but please note that these consecutive blocks need to belong to the * same segment. */ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) { struct seg_entry *se; unsigned int segno, offset; long int new_vblocks; segno = GET_SEGNO(sbi, blkaddr); if (segno == NULL_SEGNO) return; se = get_seg_entry(sbi, segno); new_vblocks = se->valid_blocks + del; offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); f2fs_bug_on(sbi, (new_vblocks < 0 || (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno)))); se->valid_blocks = new_vblocks; /* Update valid block bitmap */ if (del > 0) { del = update_sit_entry_for_alloc(sbi, se, segno, blkaddr, offset, del); } else { del = update_sit_entry_for_release(sbi, se, segno, blkaddr, offset, del); } __mark_sit_entry_dirty(sbi, segno); /* update total number of valid blocks to be written in ckpt area */ SIT_I(sbi)->written_valid_blocks += del; if (__is_large_section(sbi)) get_sec_entry(sbi, segno)->valid_blocks += del; } void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr, unsigned int len) { unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); block_t addr_start = addr, addr_end = addr + len - 1; unsigned int seg_num = GET_SEGNO(sbi, addr_end) - segno + 1; unsigned int i = 1, max_blocks = sbi->blocks_per_seg, cnt; f2fs_bug_on(sbi, addr == NULL_ADDR); if (addr == NEW_ADDR || addr == COMPRESS_ADDR) return; f2fs_invalidate_internal_cache(sbi, addr, len); /* add it into sit main buffer */ down_write(&sit_i->sentry_lock); if (seg_num == 1) cnt = len; else cnt = max_blocks - GET_BLKOFF_FROM_SEG0(sbi, addr); do { update_segment_mtime(sbi, addr_start, 0); update_sit_entry(sbi, addr_start, -cnt); /* add it into dirty seglist */ locate_dirty_segment(sbi, segno); /* update @addr_start and @cnt and @segno */ addr_start = START_BLOCK(sbi, ++segno); if (++i == seg_num) cnt = GET_BLKOFF_FROM_SEG0(sbi, addr_end) + 1; else cnt = max_blocks; } while (i <= seg_num); up_write(&sit_i->sentry_lock); } bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) { struct sit_info *sit_i = SIT_I(sbi); unsigned int segno, offset; struct seg_entry *se; bool is_cp = false; if (!__is_valid_data_blkaddr(blkaddr)) return true; down_read(&sit_i->sentry_lock); segno = GET_SEGNO(sbi, blkaddr); se = get_seg_entry(sbi, segno); offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); if (f2fs_test_bit(offset, se->ckpt_valid_map)) is_cp = true; up_read(&sit_i->sentry_lock); return is_cp; } static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); if (sbi->ckpt->alloc_type[type] == SSR) return BLKS_PER_SEG(sbi); return curseg->next_blkoff; } /* * Calculate the number of current summary pages for writing */ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) { int valid_sum_count = 0; int i, sum_in_page; for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { if (sbi->ckpt->alloc_type[i] != SSR && for_ra) valid_sum_count += le16_to_cpu(F2FS_CKPT(sbi)->cur_data_blkoff[i]); else valid_sum_count += f2fs_curseg_valid_blocks(sbi, i); } sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE; if (valid_sum_count <= sum_in_page) return 1; else if ((valid_sum_count - sum_in_page) <= (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE) return 2; return 3; } /* * Caller should put this summary folio */ struct folio *f2fs_get_sum_folio(struct f2fs_sb_info *sbi, unsigned int segno) { if (unlikely(f2fs_cp_error(sbi))) return ERR_PTR(-EIO); return f2fs_get_meta_folio_retry(sbi, GET_SUM_BLOCK(sbi, segno)); } void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { struct folio *folio = f2fs_grab_meta_folio(sbi, blk_addr); memcpy(folio_address(folio), src, PAGE_SIZE); folio_mark_dirty(folio); f2fs_folio_put(folio, true); } static void write_sum_page(struct f2fs_sb_info *sbi, struct f2fs_summary_block *sum_blk, block_t blk_addr) { f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr); } static void write_current_sum_page(struct f2fs_sb_info *sbi, int type, block_t blk_addr) { struct curseg_info *curseg = CURSEG_I(sbi, type); struct folio *folio = f2fs_grab_meta_folio(sbi, blk_addr); struct f2fs_summary_block *src = curseg->sum_blk; struct f2fs_summary_block *dst; dst = folio_address(folio); memset(dst, 0, PAGE_SIZE); mutex_lock(&curseg->curseg_mutex); down_read(&curseg->journal_rwsem); memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE); up_read(&curseg->journal_rwsem); memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE); memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE); mutex_unlock(&curseg->curseg_mutex); folio_mark_dirty(folio); f2fs_folio_put(folio, true); } static int is_next_segment_free(struct f2fs_sb_info *sbi, struct curseg_info *curseg) { unsigned int segno = curseg->segno + 1; struct free_segmap_info *free_i = FREE_I(sbi); if (segno < MAIN_SEGS(sbi) && segno % SEGS_PER_SEC(sbi)) return !test_bit(segno, free_i->free_segmap); return 0; } /* * Find a new segment from the free segments bitmap to right order * This function should be returned with success, otherwise BUG */ static int get_new_segment(struct f2fs_sb_info *sbi, unsigned int *newseg, bool new_sec, bool pinning) { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); bool init = true; int i; int ret = 0; spin_lock(&free_i->segmap_lock); if (time_to_inject(sbi, FAULT_NO_SEGMENT)) { ret = -ENOSPC; goto out_unlock; } if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) { segno = find_next_zero_bit(free_i->free_segmap, GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) goto got_it; } #ifdef CONFIG_BLK_DEV_ZONED /* * If we format f2fs on zoned storage, let's try to get pinned sections * from beginning of the storage, which should be a conventional one. */ if (f2fs_sb_has_blkzoned(sbi)) { /* Prioritize writing to conventional zones */ if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning) segno = 0; else segno = max(sbi->first_seq_zone_segno, *newseg); hint = GET_SEC_FROM_SEG(sbi, segno); } #endif find_other_zone: secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); #ifdef CONFIG_BLK_DEV_ZONED if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) { /* Write only to sequential zones */ if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) { hint = GET_SEC_FROM_SEG(sbi, sbi->first_seq_zone_segno); secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); } else secno = find_first_zero_bit(free_i->free_secmap, MAIN_SECS(sbi)); if (secno >= MAIN_SECS(sbi)) { ret = -ENOSPC; f2fs_bug_on(sbi, 1); goto out_unlock; } } #endif if (secno >= MAIN_SECS(sbi)) { secno = find_first_zero_bit(free_i->free_secmap, MAIN_SECS(sbi)); if (secno >= MAIN_SECS(sbi)) { ret = -ENOSPC; f2fs_bug_on(sbi, !pinning); goto out_unlock; } } segno = GET_SEG_FROM_SEC(sbi, secno); zoneno = GET_ZONE_FROM_SEC(sbi, secno); /* give up on finding another zone */ if (!init) goto got_it; if (sbi->secs_per_zone == 1) goto got_it; if (zoneno == old_zoneno) goto got_it; for (i = 0; i < NR_CURSEG_TYPE; i++) if (CURSEG_I(sbi, i)->zone == zoneno) break; if (i < NR_CURSEG_TYPE) { /* zone is in user, try another */ if (zoneno + 1 >= total_zones) hint = 0; else hint = (zoneno + 1) * sbi->secs_per_zone; init = false; goto find_other_zone; } got_it: /* set it as dirty segment in free segmap */ if (test_bit(segno, free_i->free_segmap)) { ret = -EFSCORRUPTED; f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_FREE_BITMAP); goto out_unlock; } /* no free section in conventional device or conventional zone */ if (new_sec && pinning && f2fs_is_sequential_zone_area(sbi, START_BLOCK(sbi, segno))) { ret = -EAGAIN; goto out_unlock; } __set_inuse(sbi, segno); *newseg = segno; out_unlock: spin_unlock(&free_i->segmap_lock); if (ret == -ENOSPC && !pinning) f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT); return ret; } static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) { struct curseg_info *curseg = CURSEG_I(sbi, type); struct summary_footer *sum_footer; unsigned short seg_type = curseg->seg_type; /* only happen when get_new_segment() fails */ if (curseg->next_segno == NULL_SEGNO) return; curseg->inited = true; curseg->segno = curseg->next_segno; curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); curseg->next_blkoff = 0; curseg->next_segno = NULL_SEGNO; sum_footer = &(curseg->sum_blk->footer); memset(sum_footer, 0, sizeof(struct summary_footer)); sanity_check_seg_type(sbi, seg_type); if (IS_DATASEG(seg_type)) SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA); if (IS_NODESEG(seg_type)) SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE); __set_sit_entry_type(sbi, seg_type, curseg->segno, modified); } static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned short seg_type = curseg->seg_type; sanity_check_seg_type(sbi, seg_type); if (__is_large_section(sbi)) { if (f2fs_need_rand_seg(sbi)) { unsigned int hint = GET_SEC_FROM_SEG(sbi, curseg->segno); if (GET_SEC_FROM_SEG(sbi, curseg->segno + 1) != hint) return curseg->segno; return get_random_u32_inclusive(curseg->segno + 1, GET_SEG_FROM_SEC(sbi, hint + 1) - 1); } return curseg->segno; } else if (f2fs_need_rand_seg(sbi)) { return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); } /* inmem log may not locate on any segment after mount */ if (!curseg->inited) return 0; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return 0; if (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)) return 0; if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) return SIT_I(sbi)->last_victim[ALLOC_NEXT]; /* find segments from 0 to reuse freed segments */ if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) return 0; return curseg->segno; } static void reset_curseg_fields(struct curseg_info *curseg) { curseg->inited = false; curseg->segno = NULL_SEGNO; curseg->next_segno = 0; } /* * Allocate a current working segment. * This function always allocates a free segment in LFS manner. */ static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int segno = curseg->segno; bool pinning = type == CURSEG_COLD_DATA_PINNED; int ret; if (curseg->inited) write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); segno = __get_next_segno(sbi, type); ret = get_new_segment(sbi, &segno, new_sec, pinning); if (ret) { if (ret == -ENOSPC) reset_curseg_fields(curseg); return ret; } curseg->next_segno = segno; reset_curseg(sbi, type, 1); curseg->alloc_type = LFS; if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) curseg->fragment_remained_chunk = get_random_u32_inclusive(1, sbi->max_fragment_chunk); return 0; } static int __next_free_blkoff(struct f2fs_sb_info *sbi, int segno, block_t start) { struct seg_entry *se = get_seg_entry(sbi, segno); int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); unsigned long *target_map = SIT_I(sbi)->tmp_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; unsigned long *cur_map = (unsigned long *)se->cur_valid_map; int i; for (i = 0; i < entries; i++) target_map[i] = ckpt_map[i] | cur_map[i]; return __find_rev_next_zero_bit(target_map, BLKS_PER_SEG(sbi), start); } static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi, struct curseg_info *seg) { return __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1); } bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) { return __next_free_blkoff(sbi, segno, 0) < BLKS_PER_SEG(sbi); } /* * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ static int change_curseg(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int new_segno = curseg->next_segno; struct f2fs_summary_block *sum_node; struct folio *sum_folio; if (curseg->inited) write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno)); __set_test_and_inuse(sbi, new_segno); mutex_lock(&dirty_i->seglist_lock); __remove_dirty_segment(sbi, new_segno, PRE); __remove_dirty_segment(sbi, new_segno, DIRTY); mutex_unlock(&dirty_i->seglist_lock); reset_curseg(sbi, type, 1); curseg->alloc_type = SSR; curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0); sum_folio = f2fs_get_sum_folio(sbi, new_segno); if (IS_ERR(sum_folio)) { /* GC won't be able to use stale summary pages by cp_error */ memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE); return PTR_ERR(sum_folio); } sum_node = folio_address(sum_folio); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); f2fs_folio_put(sum_folio, true); return 0; } static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, int alloc_mode, unsigned long long age); static int get_atssr_segment(struct f2fs_sb_info *sbi, int type, int target_type, int alloc_mode, unsigned long long age) { struct curseg_info *curseg = CURSEG_I(sbi, type); int ret = 0; curseg->seg_type = target_type; if (get_ssr_segment(sbi, type, alloc_mode, age)) { struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno); curseg->seg_type = se->type; ret = change_curseg(sbi, type); } else { /* allocate cold segment by default */ curseg->seg_type = CURSEG_COLD_DATA; ret = new_curseg(sbi, type, true); } stat_inc_seg_type(sbi, curseg); return ret; } static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC); int ret = 0; if (!sbi->am.atgc_enabled && !force) return 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&SIT_I(sbi)->sentry_lock); ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0); up_write(&SIT_I(sbi)->sentry_lock); mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); return ret; } int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) { return __f2fs_init_atgc_curseg(sbi, false); } int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi) { int ret; if (!test_opt(sbi, ATGC)) return 0; if (sbi->am.atgc_enabled) return 0; if (le64_to_cpu(F2FS_CKPT(sbi)->elapsed_time) < sbi->am.age_threshold) return 0; ret = __f2fs_init_atgc_curseg(sbi, true); if (!ret) { sbi->am.atgc_enabled = true; f2fs_info(sbi, "reenabled age threshold GC"); } return ret; } static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); if (!curseg->inited) goto out; if (get_valid_blocks(sbi, curseg->segno, false)) { write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno)); } else { mutex_lock(&DIRTY_I(sbi)->seglist_lock); __set_test_and_free(sbi, curseg->segno, true); mutex_unlock(&DIRTY_I(sbi)->seglist_lock); } out: mutex_unlock(&curseg->curseg_mutex); } void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi) { __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED); if (sbi->am.atgc_enabled) __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC); } static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); if (!curseg->inited) goto out; if (get_valid_blocks(sbi, curseg->segno, false)) goto out; mutex_lock(&DIRTY_I(sbi)->seglist_lock); __set_test_and_inuse(sbi, curseg->segno); mutex_unlock(&DIRTY_I(sbi)->seglist_lock); out: mutex_unlock(&curseg->curseg_mutex); } void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi) { __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED); if (sbi->am.atgc_enabled) __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC); } static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, int alloc_mode, unsigned long long age) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned segno = NULL_SEGNO; unsigned short seg_type = curseg->seg_type; int i, cnt; bool reversed = false; sanity_check_seg_type(sbi, seg_type); /* f2fs_need_SSR() already forces to do this */ if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } /* For node segments, let's do SSR more intensively */ if (IS_NODESEG(seg_type)) { if (seg_type >= CURSEG_WARM_NODE) { reversed = true; i = CURSEG_COLD_NODE; } else { i = CURSEG_HOT_NODE; } cnt = NR_CURSEG_NODE_TYPE; } else { if (seg_type >= CURSEG_WARM_DATA) { reversed = true; i = CURSEG_COLD_DATA; } else { i = CURSEG_HOT_DATA; } cnt = NR_CURSEG_DATA_TYPE; } for (; cnt-- > 0; reversed ? i-- : i++) { if (i == seg_type) continue; if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } } /* find valid_blocks=0 in dirty list */ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { segno = get_free_segment(sbi); if (segno != NULL_SEGNO) { curseg->next_segno = segno; return 1; } } return 0; } static bool need_new_seg(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && curseg->seg_type == CURSEG_WARM_NODE) return true; if (curseg->alloc_type == LFS && is_next_segment_free(sbi, curseg) && likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0)) return true; return false; } int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int segno; int ret = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&SIT_I(sbi)->sentry_lock); segno = CURSEG_I(sbi, type)->segno; if (segno < start || segno > end) goto unlock; if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) ret = change_curseg(sbi, type); else ret = new_curseg(sbi, type, true); stat_inc_seg_type(sbi, curseg); locate_dirty_segment(sbi, segno); unlock: up_write(&SIT_I(sbi)->sentry_lock); if (segno != curseg->segno) f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u", type, segno, curseg->segno); mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); return ret; } static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, bool new_sec, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int old_segno; int err = 0; if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited) goto allocate; if (!force && curseg->inited && !curseg->next_blkoff && !get_valid_blocks(sbi, curseg->segno, new_sec) && !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) return 0; allocate: old_segno = curseg->segno; err = new_curseg(sbi, type, true); if (err) return err; stat_inc_seg_type(sbi, curseg); locate_dirty_segment(sbi, old_segno); return 0; } int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) { int ret; f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); ret = __allocate_new_segment(sbi, type, true, force); up_write(&SIT_I(sbi)->sentry_lock); f2fs_up_read(&SM_I(sbi)->curseg_lock); return ret; } int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) { int err; bool gc_required = true; retry: f2fs_lock_op(sbi); err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); f2fs_unlock_op(sbi); if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) { f2fs_down_write(&sbi->gc_lock); err = f2fs_gc_range(sbi, 0, sbi->first_seq_zone_segno - 1, true, ZONED_PIN_SEC_REQUIRED_COUNT); f2fs_up_write(&sbi->gc_lock); gc_required = false; if (!err) goto retry; } return err; } int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { int i; int err = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) err += __allocate_new_segment(sbi, i, false, false); up_write(&SIT_I(sbi)->sentry_lock); f2fs_up_read(&SM_I(sbi)->curseg_lock); return err; } bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) { __u64 trim_start = cpc->trim_start; bool has_candidate = false; down_write(&SIT_I(sbi)->sentry_lock); for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { if (add_discard_addrs(sbi, cpc, true)) { has_candidate = true; break; } } up_write(&SIT_I(sbi)->sentry_lock); cpc->trim_start = trim_start; return has_candidate; } static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, unsigned int start, unsigned int end) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *prev_dc = NULL, *next_dc = NULL; struct rb_node **insert_p = NULL, *insert_parent = NULL; struct discard_cmd *dc; struct blk_plug plug; int issued; unsigned int trimmed = 0; next: issued = 0; mutex_lock(&dcc->cmd_lock); if (unlikely(dcc->rbtree_check)) f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi)); dc = __lookup_discard_cmd_ret(&dcc->root, start, &prev_dc, &next_dc, &insert_p, &insert_parent); if (!dc) dc = next_dc; blk_start_plug(&plug); while (dc && dc->di.lstart <= end) { struct rb_node *node; int err = 0; if (dc->di.len < dpolicy->granularity) goto skip; if (dc->state != D_PREP) { list_move_tail(&dc->list, &dcc->fstrim_list); goto skip; } err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); if (issued >= dpolicy->max_requests) { start = dc->di.lstart + dc->di.len; if (err) __remove_discard_cmd(sbi, dc); blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); trimmed += __wait_all_discard_cmd(sbi, NULL); f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); goto next; } skip: node = rb_next(&dc->rb_node); if (err) __remove_discard_cmd(sbi, dc); dc = rb_entry_safe(node, struct discard_cmd, rb_node); if (fatal_signal_pending(current)) break; } blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); return trimmed; } int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { __u64 start = F2FS_BYTES_TO_BLK(range->start); __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; unsigned int start_segno, end_segno; block_t start_block, end_block; struct cp_control cpc; struct discard_policy dpolicy; unsigned long long trimmed = 0; int err = 0; bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; if (end < MAIN_BLKADDR(sbi)) goto out; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { f2fs_warn(sbi, "Found FS corruption, run fsck to fix."); return -EFSCORRUPTED; } /* start/end segment number in main_area */ start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); if (need_align) { start_segno = rounddown(start_segno, SEGS_PER_SEC(sbi)); end_segno = roundup(end_segno + 1, SEGS_PER_SEC(sbi)) - 1; } cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); cpc.trim_start = start_segno; cpc.trim_end = end_segno; if (sbi->discard_blks == 0) goto out; f2fs_down_write(&sbi->gc_lock); stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); f2fs_up_write(&sbi->gc_lock); if (err) goto out; /* * We filed discard candidates, but actually we don't need to wait for * all of them, since they'll be issued in idle time along with runtime * discard option. User configuration looks like using runtime discard * or periodic fstrim instead of it. */ if (f2fs_realtime_discard_enable(sbi)) goto out; start_block = START_BLOCK(sbi, start_segno); end_block = START_BLOCK(sbi, end_segno + 1); __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); trimmed = __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); trimmed += __wait_discard_cmd_range(sbi, &dpolicy, start_block, end_block); out: if (!err) range->len = F2FS_BLK_TO_BYTES(trimmed); return err; } int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint) { if (F2FS_OPTION(sbi).active_logs == 2) return CURSEG_HOT_DATA; else if (F2FS_OPTION(sbi).active_logs == 4) return CURSEG_COLD_DATA; /* active_log == 6 */ switch (hint) { case WRITE_LIFE_SHORT: return CURSEG_HOT_DATA; case WRITE_LIFE_EXTREME: return CURSEG_COLD_DATA; default: return CURSEG_WARM_DATA; } } /* * This returns write hints for each segment type. This hints will be * passed down to block layer as below by default. * * User F2FS Block * ---- ---- ----- * META WRITE_LIFE_NONE|REQ_META * HOT_NODE WRITE_LIFE_NONE * WARM_NODE WRITE_LIFE_MEDIUM * COLD_NODE WRITE_LIFE_LONG * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME * extension list " " * * -- buffered io * COLD_DATA WRITE_LIFE_EXTREME * HOT_DATA WRITE_LIFE_SHORT * WARM_DATA WRITE_LIFE_NOT_SET * * -- direct io * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET * WRITE_LIFE_NONE " WRITE_LIFE_NONE * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM * WRITE_LIFE_LONG " WRITE_LIFE_LONG */ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp) { switch (type) { case DATA: switch (temp) { case WARM: return WRITE_LIFE_NOT_SET; case HOT: return WRITE_LIFE_SHORT; case COLD: return WRITE_LIFE_EXTREME; default: return WRITE_LIFE_NONE; } case NODE: switch (temp) { case WARM: return WRITE_LIFE_MEDIUM; case HOT: return WRITE_LIFE_NONE; case COLD: return WRITE_LIFE_LONG; default: return WRITE_LIFE_NONE; } case META: return WRITE_LIFE_NONE; default: return WRITE_LIFE_NONE; } } static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) return CURSEG_HOT_DATA; else return CURSEG_HOT_NODE; } static int __get_segment_type_4(struct f2fs_io_info *fio) { if (fio->type == DATA) { struct inode *inode = fio_inode(fio); if (S_ISDIR(inode->i_mode)) return CURSEG_HOT_DATA; else return CURSEG_COLD_DATA; } else { if (IS_DNODE(fio->page) && is_cold_node(fio->page)) return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } } static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_info ei = {}; if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) { if (!ei.age) return NO_CHECK_TYPE; if (ei.age <= sbi->hot_data_age_threshold) return CURSEG_HOT_DATA; if (ei.age <= sbi->warm_data_age_threshold) return CURSEG_WARM_DATA; return CURSEG_COLD_DATA; } return NO_CHECK_TYPE; } static int __get_segment_type_6(struct f2fs_io_info *fio) { if (fio->type == DATA) { struct inode *inode = fio_inode(fio); int type; if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) return CURSEG_COLD_DATA_PINNED; if (page_private_gcing(fio->page)) { if (fio->sbi->am.atgc_enabled && (fio->io_type == FS_DATA_IO) && (fio->sbi->gc_mode != GC_URGENT_HIGH) && __is_valid_data_blkaddr(fio->old_blkaddr) && !is_inode_flag_set(inode, FI_OPU_WRITE)) return CURSEG_ALL_DATA_ATGC; else return CURSEG_COLD_DATA; } if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; type = __get_age_segment_type(inode, page_folio(fio->page)->index); if (type != NO_CHECK_TYPE) return type; if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || f2fs_is_cow_file(inode)) return CURSEG_HOT_DATA; return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint); } else { if (IS_DNODE(fio->page)) return is_cold_node(fio->page) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; return CURSEG_COLD_NODE; } } enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi, enum log_type type) { struct curseg_info *curseg = CURSEG_I(sbi, type); enum temp_type temp = COLD; switch (curseg->seg_type) { case CURSEG_HOT_NODE: case CURSEG_HOT_DATA: temp = HOT; break; case CURSEG_WARM_NODE: case CURSEG_WARM_DATA: temp = WARM; break; case CURSEG_COLD_NODE: case CURSEG_COLD_DATA: temp = COLD; break; default: f2fs_bug_on(sbi, 1); } return temp; } static int __get_segment_type(struct f2fs_io_info *fio) { enum log_type type = CURSEG_HOT_DATA; switch (F2FS_OPTION(fio->sbi).active_logs) { case 2: type = __get_segment_type_2(fio); break; case 4: type = __get_segment_type_4(fio); break; case 6: type = __get_segment_type_6(fio); break; default: f2fs_bug_on(fio->sbi, true); } fio->temp = f2fs_get_segment_temp(fio->sbi, type); return type; } static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi, struct curseg_info *seg) { /* To allocate block chunks in different sizes, use random number */ if (--seg->fragment_remained_chunk > 0) return; seg->fragment_remained_chunk = get_random_u32_inclusive(1, sbi->max_fragment_chunk); seg->next_blkoff += get_random_u32_inclusive(1, sbi->max_fragment_hole); } int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned long long old_mtime; bool from_gc = (type == CURSEG_ALL_DATA_ATGC); struct seg_entry *se = NULL; bool segment_full = false; int ret = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); if (curseg->segno == NULL_SEGNO) { ret = -ENOSPC; goto out_err; } if (from_gc) { f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO); se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr)); sanity_check_seg_type(sbi, se->type); f2fs_bug_on(sbi, IS_NODESEG(se->type)); } *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); f2fs_bug_on(sbi, curseg->next_blkoff >= BLKS_PER_SEG(sbi)); f2fs_wait_discard_bio(sbi, *new_blkaddr); curseg->sum_blk->entries[curseg->next_blkoff] = *sum; if (curseg->alloc_type == SSR) { curseg->next_blkoff = f2fs_find_next_ssr_block(sbi, curseg); } else { curseg->next_blkoff++; if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) f2fs_randomize_chunk(sbi, curseg); } if (curseg->next_blkoff >= f2fs_usable_blks_in_seg(sbi, curseg->segno)) segment_full = true; stat_inc_block_count(sbi, curseg); if (from_gc) { old_mtime = get_segment_mtime(sbi, old_blkaddr); } else { update_segment_mtime(sbi, old_blkaddr, 0); old_mtime = 0; } update_segment_mtime(sbi, *new_blkaddr, old_mtime); /* * SIT information should be updated before segment allocation, * since SSR needs latest valid block information. */ update_sit_entry(sbi, *new_blkaddr, 1); update_sit_entry(sbi, old_blkaddr, -1); /* * If the current segment is full, flush it out and replace it with a * new segment. */ if (segment_full) { if (type == CURSEG_COLD_DATA_PINNED && !((curseg->segno + 1) % sbi->segs_per_sec)) { write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno)); reset_curseg_fields(curseg); goto skip_new_segment; } if (from_gc) { ret = get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); } else { if (need_new_seg(sbi, type)) ret = new_curseg(sbi, type, false); else ret = change_curseg(sbi, type); stat_inc_seg_type(sbi, curseg); } if (ret) goto out_err; } skip_new_segment: /* * segment dirty status should be updated after segment allocation, * so we just need to update status only one time after previous * segment being closed. */ locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); if (IS_DATASEG(curseg->seg_type)) atomic64_inc(&sbi->allocated_data_blocks); up_write(&sit_i->sentry_lock); if (page && IS_NODESEG(curseg->seg_type)) { fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); f2fs_inode_chksum_set(sbi, page); } if (fio) { struct f2fs_bio_info *io; INIT_LIST_HEAD(&fio->list); fio->in_list = 1; io = sbi->write_io[fio->type] + fio->temp; spin_lock(&io->io_lock); list_add_tail(&fio->list, &io->io_list); spin_unlock(&io->io_lock); } mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); return 0; out_err: *new_blkaddr = NULL_ADDR; up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); return ret; } void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, block_t blkaddr, unsigned int blkcnt) { if (!f2fs_is_multi_device(sbi)) return; while (1) { unsigned int devidx = f2fs_target_device_index(sbi, blkaddr); unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1; /* update device state for fsync */ f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO); /* update device state for checkpoint */ if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { spin_lock(&sbi->dev_lock); f2fs_set_bit(devidx, (char *)&sbi->dirty_device); spin_unlock(&sbi->dev_lock); } if (blkcnt <= blks) break; blkcnt -= blks; blkaddr += blks; } } static int log_type_to_seg_type(enum log_type type) { int seg_type = CURSEG_COLD_DATA; switch (type) { case CURSEG_HOT_DATA: case CURSEG_WARM_DATA: case CURSEG_COLD_DATA: case CURSEG_HOT_NODE: case CURSEG_WARM_NODE: case CURSEG_COLD_NODE: seg_type = (int)type; break; case CURSEG_COLD_DATA_PINNED: case CURSEG_ALL_DATA_ATGC: seg_type = CURSEG_COLD_DATA; break; default: break; } return seg_type; } static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { struct folio *folio = page_folio(fio->page); enum log_type type = __get_segment_type(fio); int seg_type = log_type_to_seg_type(type); bool keep_order = (f2fs_lfs_mode(fio->sbi) && seg_type == CURSEG_COLD_DATA); if (keep_order) f2fs_down_read(&fio->sbi->io_order_lock); if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio)) { if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host)) fscrypt_finalize_bounce_page(&fio->encrypted_page); folio_end_writeback(folio); if (f2fs_in_warm_node_list(fio->sbi, folio)) f2fs_del_fsync_node_entry(fio->sbi, folio); goto out; } if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr, 1); /* writeout dirty page into bdev */ f2fs_submit_page_write(fio); f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); out: if (keep_order) f2fs_up_read(&fio->sbi->io_order_lock); } void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, enum iostat_type io_type) { struct f2fs_io_info fio = { .sbi = sbi, .type = META, .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = folio->index, .new_blkaddr = folio->index, .page = folio_page(folio, 0), .encrypted_page = NULL, .in_list = 0, }; if (unlikely(folio->index >= MAIN_BLKADDR(sbi))) fio.op_flags &= ~REQ_META; folio_start_writeback(folio); f2fs_submit_page_write(&fio); stat_inc_meta_count(sbi, folio->index); f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE); } void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio) { struct f2fs_summary sum; set_summary(&sum, nid, 0, 0); do_write_page(&sum, fio); f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE); } void f2fs_outplace_write_data(struct dnode_of_data *dn, struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; struct f2fs_summary sum; f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO) f2fs_update_age_extent_cache(dn); set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE); } int f2fs_inplace_write_data(struct f2fs_io_info *fio) { int err; struct f2fs_sb_info *sbi = fio->sbi; unsigned int segno; fio->new_blkaddr = fio->old_blkaddr; /* i/o temperature is needed for passing down write hints */ __get_segment_type(fio); segno = GET_SEGNO(sbi, fio->new_blkaddr); if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", __func__, segno); err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE); goto drop_bio; } if (f2fs_cp_error(sbi)) { err = -EIO; goto drop_bio; } if (fio->meta_gc) f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1); stat_inc_inplace_blocks(fio->sbi); if (fio->bio && !IS_F2FS_IPU_NOCACHE(sbi)) err = f2fs_merge_page_bio(fio); else err = f2fs_submit_page_bio(fio); if (!err) { f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); f2fs_update_iostat(fio->sbi, fio_inode(fio), fio->io_type, F2FS_BLKSIZE); } return err; drop_bio: if (fio->bio && *(fio->bio)) { struct bio *bio = *(fio->bio); bio->bi_status = BLK_STS_IOERR; bio_endio(bio); *(fio->bio) = NULL; } return err; } static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi, unsigned int segno) { int i; for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { if (CURSEG_I(sbi, i)->segno == segno) break; } return i; } void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr, bool from_gc) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; unsigned int segno, old_cursegno; struct seg_entry *se; int type; unsigned short old_blkoff; unsigned char old_alloc_type; segno = GET_SEGNO(sbi, new_blkaddr); se = get_seg_entry(sbi, segno); type = se->type; f2fs_down_write(&SM_I(sbi)->curseg_lock); if (!recover_curseg) { /* for recovery flow */ if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { if (old_blkaddr == NULL_ADDR) type = CURSEG_COLD_DATA; else type = CURSEG_WARM_DATA; } } else { if (IS_CURSEG(sbi, segno)) { /* se->type is volatile as SSR allocation */ type = __f2fs_get_curseg(sbi, segno); f2fs_bug_on(sbi, type == NO_CHECK_TYPE); } else { type = CURSEG_WARM_DATA; } } curseg = CURSEG_I(sbi, type); f2fs_bug_on(sbi, !IS_DATASEG(curseg->seg_type)); mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); old_cursegno = curseg->segno; old_blkoff = curseg->next_blkoff; old_alloc_type = curseg->alloc_type; /* change the current segment */ if (segno != curseg->segno) { curseg->next_segno = segno; if (change_curseg(sbi, type)) goto out_unlock; } curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); curseg->sum_blk->entries[curseg->next_blkoff] = *sum; if (!recover_curseg || recover_newaddr) { if (!from_gc) update_segment_mtime(sbi, new_blkaddr, 0); update_sit_entry(sbi, new_blkaddr, 1); } if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1); if (!from_gc) update_segment_mtime(sbi, old_blkaddr, 0); update_sit_entry(sbi, old_blkaddr, -1); } locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr)); locate_dirty_segment(sbi, old_cursegno); if (recover_curseg) { if (old_cursegno != curseg->segno) { curseg->next_segno = old_cursegno; if (change_curseg(sbi, type)) goto out_unlock; } curseg->next_blkoff = old_blkoff; curseg->alloc_type = old_alloc_type; } out_unlock: up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); f2fs_up_write(&SM_I(sbi)->curseg_lock); } void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, block_t old_addr, block_t new_addr, unsigned char version, bool recover_curseg, bool recover_newaddr) { struct f2fs_summary sum; set_summary(&sum, dn->nid, dn->ofs_in_node, version); f2fs_do_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg, recover_newaddr, false); f2fs_update_data_blkaddr(dn, new_addr); } void f2fs_folio_wait_writeback(struct folio *folio, enum page_type type, bool ordered, bool locked) { if (folio_test_writeback(folio)) { struct f2fs_sb_info *sbi = F2FS_F_SB(folio); /* submit cached LFS IO */ f2fs_submit_merged_write_cond(sbi, NULL, &folio->page, 0, type); /* submit cached IPU IO */ f2fs_submit_merged_ipu_write(sbi, NULL, folio); if (ordered) { folio_wait_writeback(folio); f2fs_bug_on(sbi, locked && folio_test_writeback(folio)); } else { folio_wait_stable(folio); } } } void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct folio *cfolio; if (!f2fs_meta_inode_gc_required(inode)) return; if (!__is_valid_data_blkaddr(blkaddr)) return; cfolio = filemap_lock_folio(META_MAPPING(sbi), blkaddr); if (!IS_ERR(cfolio)) { f2fs_folio_wait_writeback(cfolio, DATA, true, true); f2fs_folio_put(cfolio, true); } } void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, block_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); block_t i; if (!f2fs_meta_inode_gc_required(inode)) return; for (i = 0; i < len; i++) f2fs_wait_on_block_writeback(inode, blkaddr + i); f2fs_truncate_meta_inode_pages(sbi, blkaddr, len); } static int read_compacted_summaries(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *seg_i; unsigned char *kaddr; struct folio *folio; block_t start; int i, j, offset; start = start_sum_block(sbi); folio = f2fs_get_meta_folio(sbi, start++); if (IS_ERR(folio)) return PTR_ERR(folio); kaddr = folio_address(folio); /* Step 1: restore nat cache */ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE); /* Step 2: restore sit cache */ seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE); offset = 2 * SUM_JOURNAL_SIZE; /* Step 3: restore summary entries */ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { unsigned short blk_off; unsigned int segno; seg_i = CURSEG_I(sbi, i); segno = le32_to_cpu(ckpt->cur_data_segno[i]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); seg_i->next_segno = segno; reset_curseg(sbi, i, 0); seg_i->alloc_type = ckpt->alloc_type[i]; seg_i->next_blkoff = blk_off; if (seg_i->alloc_type == SSR) blk_off = BLKS_PER_SEG(sbi); for (j = 0; j < blk_off; j++) { struct f2fs_summary *s; s = (struct f2fs_summary *)(kaddr + offset); seg_i->sum_blk->entries[j] = *s; offset += SUMMARY_SIZE; if (offset + SUMMARY_SIZE <= PAGE_SIZE - SUM_FOOTER_SIZE) continue; f2fs_folio_put(folio, true); folio = f2fs_get_meta_folio(sbi, start++); if (IS_ERR(folio)) return PTR_ERR(folio); kaddr = folio_address(folio); offset = 0; } } f2fs_folio_put(folio, true); return 0; } static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_summary_block *sum; struct curseg_info *curseg; struct folio *new; unsigned short blk_off; unsigned int segno = 0; block_t blk_addr = 0; int err = 0; /* get segment number and block addr */ if (IS_DATASEG(type)) { segno = le32_to_cpu(ckpt->cur_data_segno[type]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - CURSEG_HOT_DATA]); if (__exist_node_summaries(sbi)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type); else blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); } else { segno = le32_to_cpu(ckpt->cur_node_segno[type - CURSEG_HOT_NODE]); blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - CURSEG_HOT_NODE]); if (__exist_node_summaries(sbi)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, type - CURSEG_HOT_NODE); else blk_addr = GET_SUM_BLOCK(sbi, segno); } new = f2fs_get_meta_folio(sbi, blk_addr); if (IS_ERR(new)) return PTR_ERR(new); sum = folio_address(new); if (IS_NODESEG(type)) { if (__exist_node_summaries(sbi)) { struct f2fs_summary *ns = &sum->entries[0]; int i; for (i = 0; i < BLKS_PER_SEG(sbi); i++, ns++) { ns->version = 0; ns->ofs_in_node = 0; } } else { err = f2fs_restore_node_summary(sbi, segno, sum); if (err) goto out; } } /* set uncompleted segment to curseg */ curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); /* update journal info */ down_write(&curseg->journal_rwsem); memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE); up_write(&curseg->journal_rwsem); memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE); memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE); curseg->next_segno = segno; reset_curseg(sbi, type, 0); curseg->alloc_type = ckpt->alloc_type[type]; curseg->next_blkoff = blk_off; mutex_unlock(&curseg->curseg_mutex); out: f2fs_folio_put(new, true); return err; } static int restore_curseg_summaries(struct f2fs_sb_info *sbi) { struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal; struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal; int type = CURSEG_HOT_DATA; int err; if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) { int npages = f2fs_npages_for_summary_flush(sbi, true); if (npages >= 2) f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages, META_CP, true); /* restore for compacted data summary */ err = read_compacted_summaries(sbi); if (err) return err; type = CURSEG_HOT_NODE; } if (__exist_node_summaries(sbi)) f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type), NR_CURSEG_PERSIST_TYPE - type, META_CP, true); for (; type <= CURSEG_COLD_NODE; type++) { err = read_normal_summaries(sbi, type); if (err) return err; } /* sanity check for summary blocks */ if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { f2fs_err(sbi, "invalid journal entries nats %u sits %u", nats_in_cursum(nat_j), sits_in_cursum(sit_j)); return -EINVAL; } return 0; } static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) { struct folio *folio; unsigned char *kaddr; struct f2fs_summary *summary; struct curseg_info *seg_i; int written_size = 0; int i, j; folio = f2fs_grab_meta_folio(sbi, blkaddr++); kaddr = folio_address(folio); memset(kaddr, 0, PAGE_SIZE); /* Step 1: write nat cache */ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE); written_size += SUM_JOURNAL_SIZE; /* Step 2: write sit cache */ seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA); memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE); written_size += SUM_JOURNAL_SIZE; /* Step 3: write summary entries */ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { seg_i = CURSEG_I(sbi, i); for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) { if (!folio) { folio = f2fs_grab_meta_folio(sbi, blkaddr++); kaddr = folio_address(folio); memset(kaddr, 0, PAGE_SIZE); written_size = 0; } summary = (struct f2fs_summary *)(kaddr + written_size); *summary = seg_i->sum_blk->entries[j]; written_size += SUMMARY_SIZE; if (written_size + SUMMARY_SIZE <= PAGE_SIZE - SUM_FOOTER_SIZE) continue; folio_mark_dirty(folio); f2fs_folio_put(folio, true); folio = NULL; } } if (folio) { folio_mark_dirty(folio); f2fs_folio_put(folio, true); } } static void write_normal_summaries(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { int i, end; if (IS_DATASEG(type)) end = type + NR_CURSEG_DATA_TYPE; else end = type + NR_CURSEG_NODE_TYPE; for (i = type; i < end; i++) write_current_sum_page(sbi, i, blkaddr + (i - type)); } void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) write_compacted_summaries(sbi, start_blk); else write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); } void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); } int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc) { int i; if (type == NAT_JOURNAL) { for (i = 0; i < nats_in_cursum(journal); i++) { if (le32_to_cpu(nid_in_journal(journal, i)) == val) return i; } if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL)) return update_nats_in_cursum(journal, 1); } else if (type == SIT_JOURNAL) { for (i = 0; i < sits_in_cursum(journal); i++) if (le32_to_cpu(segno_in_journal(journal, i)) == val) return i; if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL)) return update_sits_in_cursum(journal, 1); } return -1; } static struct folio *get_current_sit_folio(struct f2fs_sb_info *sbi, unsigned int segno) { return f2fs_get_meta_folio(sbi, current_sit_addr(sbi, segno)); } static struct folio *get_next_sit_folio(struct f2fs_sb_info *sbi, unsigned int start) { struct sit_info *sit_i = SIT_I(sbi); struct folio *folio; pgoff_t src_off, dst_off; src_off = current_sit_addr(sbi, start); dst_off = next_sit_addr(sbi, src_off); folio = f2fs_grab_meta_folio(sbi, dst_off); seg_info_to_sit_folio(sbi, folio, start); folio_mark_dirty(folio); set_to_next_sit(sit_i, start); return folio; } static struct sit_entry_set *grab_sit_entry_set(void) { struct sit_entry_set *ses = f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS, true, NULL); ses->entry_cnt = 0; INIT_LIST_HEAD(&ses->set_list); return ses; } static void release_sit_entry_set(struct sit_entry_set *ses) { list_del(&ses->set_list); kmem_cache_free(sit_entry_set_slab, ses); } static void adjust_sit_entry_set(struct sit_entry_set *ses, struct list_head *head) { struct sit_entry_set *next = ses; if (list_is_last(&ses->set_list, head)) return; list_for_each_entry_continue(next, head, set_list) if (ses->entry_cnt <= next->entry_cnt) { list_move_tail(&ses->set_list, &next->set_list); return; } list_move_tail(&ses->set_list, head); } static void add_sit_entry(unsigned int segno, struct list_head *head) { struct sit_entry_set *ses; unsigned int start_segno = START_SEGNO(segno); list_for_each_entry(ses, head, set_list) { if (ses->start_segno == start_segno) { ses->entry_cnt++; adjust_sit_entry_set(ses, head); return; } } ses = grab_sit_entry_set(); ses->start_segno = start_segno; ses->entry_cnt++; list_add(&ses->set_list, head); } static void add_sits_in_set(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); struct list_head *set_list = &sm_info->sit_entry_set; unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap; unsigned int segno; for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi)) add_sit_entry(segno, set_list); } static void remove_sits_in_journal(struct f2fs_sb_info *sbi) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_journal *journal = curseg->journal; int i; down_write(&curseg->journal_rwsem); for (i = 0; i < sits_in_cursum(journal); i++) { unsigned int segno; bool dirtied; segno = le32_to_cpu(segno_in_journal(journal, i)); dirtied = __mark_sit_entry_dirty(sbi, segno); if (!dirtied) add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); } update_sits_in_cursum(journal, -i); up_write(&curseg->journal_rwsem); } /* * CP calls this function, which flushes SIT entries including sit_journal, * and moves prefree segs to free segs. */ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct sit_info *sit_i = SIT_I(sbi); unsigned long *bitmap = sit_i->dirty_sentries_bitmap; struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_journal *journal = curseg->journal; struct sit_entry_set *ses, *tmp; struct list_head *head = &SM_I(sbi)->sit_entry_set; bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS); struct seg_entry *se; down_write(&sit_i->sentry_lock); if (!sit_i->dirty_sentries) goto out; /* * add and account sit entries of dirty bitmap in sit entry * set temporarily */ add_sits_in_set(sbi); /* * if there are no enough space in journal to store dirty sit * entries, remove all entries from journal and add and account * them in sit entry set. */ if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) || !to_journal) remove_sits_in_journal(sbi); /* * there are two steps to flush sit entries: * #1, flush sit entries to journal in current cold data summary block. * #2, flush sit entries to sit page. */ list_for_each_entry_safe(ses, tmp, head, set_list) { struct folio *folio = NULL; struct f2fs_sit_block *raw_sit = NULL; unsigned int start_segno = ses->start_segno; unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, (unsigned long)MAIN_SEGS(sbi)); unsigned int segno = start_segno; if (to_journal && !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL)) to_journal = false; if (to_journal) { down_write(&curseg->journal_rwsem); } else { folio = get_next_sit_folio(sbi, start_segno); raw_sit = folio_address(folio); } /* flush dirty sit entries in region of current sit set */ for_each_set_bit_from(segno, bitmap, end) { int offset, sit_offset; se = get_seg_entry(sbi, segno); #ifdef CONFIG_F2FS_CHECK_FS if (memcmp(se->cur_valid_map, se->cur_valid_map_mir, SIT_VBLOCK_MAP_SIZE)) f2fs_bug_on(sbi, 1); #endif /* add discard candidates */ if (!(cpc->reason & CP_DISCARD)) { cpc->trim_start = segno; add_discard_addrs(sbi, cpc, false); } if (to_journal) { offset = f2fs_lookup_journal_in_cursum(journal, SIT_JOURNAL, segno, 1); f2fs_bug_on(sbi, offset < 0); segno_in_journal(journal, offset) = cpu_to_le32(segno); seg_info_to_raw_sit(se, &sit_in_journal(journal, offset)); check_block_count(sbi, segno, &sit_in_journal(journal, offset)); } else { sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); check_block_count(sbi, segno, &raw_sit->entries[sit_offset]); } /* update ckpt_valid_block */ if (__is_large_section(sbi)) { set_ckpt_valid_blocks(sbi, segno); sanity_check_valid_blocks(sbi, segno); } __clear_bit(segno, bitmap); sit_i->dirty_sentries--; ses->entry_cnt--; } if (to_journal) up_write(&curseg->journal_rwsem); else f2fs_folio_put(folio, true); f2fs_bug_on(sbi, ses->entry_cnt); release_sit_entry_set(ses); } f2fs_bug_on(sbi, !list_empty(head)); f2fs_bug_on(sbi, sit_i->dirty_sentries); out: if (cpc->reason & CP_DISCARD) { __u64 trim_start = cpc->trim_start; for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) add_discard_addrs(sbi, cpc, false); cpc->trim_start = trim_start; } up_write(&sit_i->sentry_lock); set_prefree_as_free_segments(sbi); } static int build_sit_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct sit_info *sit_i; unsigned int sit_segs, start; char *src_bitmap, *bitmap; unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size; unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0; /* allocate memory for SIT information */ sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL); if (!sit_i) return -ENOMEM; SM_I(sbi)->sit_info = sit_i; sit_i->sentries = f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry), MAIN_SEGS(sbi)), GFP_KERNEL); if (!sit_i->sentries) return -ENOMEM; main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size, GFP_KERNEL); if (!sit_i->dirty_sentries_bitmap) return -ENOMEM; #ifdef CONFIG_F2FS_CHECK_FS bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map); #else bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map); #endif sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); if (!sit_i->bitmap) return -ENOMEM; bitmap = sit_i->bitmap; for (start = 0; start < MAIN_SEGS(sbi); start++) { sit_i->sentries[start].cur_valid_map = bitmap; bitmap += SIT_VBLOCK_MAP_SIZE; sit_i->sentries[start].ckpt_valid_map = bitmap; bitmap += SIT_VBLOCK_MAP_SIZE; #ifdef CONFIG_F2FS_CHECK_FS sit_i->sentries[start].cur_valid_map_mir = bitmap; bitmap += SIT_VBLOCK_MAP_SIZE; #endif if (discard_map) { sit_i->sentries[start].discard_map = bitmap; bitmap += SIT_VBLOCK_MAP_SIZE; } } sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->tmp_map) return -ENOMEM; if (__is_large_section(sbi)) { sit_i->sec_entries = f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry), MAIN_SECS(sbi)), GFP_KERNEL); if (!sit_i->sec_entries) return -ENOMEM; } /* get information related with SIT */ sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1; /* setup SIT bitmap from ckeckpoint pack */ sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP); src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP); sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL); if (!sit_i->sit_bitmap) return -ENOMEM; #ifdef CONFIG_F2FS_CHECK_FS sit_i->sit_bitmap_mir = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL); if (!sit_i->sit_bitmap_mir) return -ENOMEM; sit_i->invalid_segmap = f2fs_kvzalloc(sbi, main_bitmap_size, GFP_KERNEL); if (!sit_i->invalid_segmap) return -ENOMEM; #endif sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs); sit_i->written_valid_blocks = 0; sit_i->bitmap_size = sit_bitmap_size; sit_i->dirty_sentries = 0; sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); sit_i->mounted_time = ktime_get_boottime_seconds(); init_rwsem(&sit_i->sentry_lock); return 0; } static int build_free_segmap(struct f2fs_sb_info *sbi) { struct free_segmap_info *free_i; unsigned int bitmap_size, sec_bitmap_size; /* allocate memory for free segmap information */ free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL); if (!free_i) return -ENOMEM; SM_I(sbi)->free_info = free_i; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL); if (!free_i->free_segmap) return -ENOMEM; sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; /* set all segments as dirty temporarily */ memset(free_i->free_segmap, 0xff, bitmap_size); memset(free_i->free_secmap, 0xff, sec_bitmap_size); /* init free segmap information */ free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); free_i->free_segments = 0; free_i->free_sections = 0; spin_lock_init(&free_i->segmap_lock); return 0; } static int build_curseg(struct f2fs_sb_info *sbi) { struct curseg_info *array; int i; array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)), GFP_KERNEL); if (!array) return -ENOMEM; SM_I(sbi)->curseg_array = array; for (i = 0; i < NO_CHECK_TYPE; i++) { mutex_init(&array[i].curseg_mutex); array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL); if (!array[i].sum_blk) return -ENOMEM; init_rwsem(&array[i].journal_rwsem); array[i].journal = f2fs_kzalloc(sbi, sizeof(struct f2fs_journal), GFP_KERNEL); if (!array[i].journal) return -ENOMEM; array[i].seg_type = log_type_to_seg_type(i); reset_curseg_fields(&array[i]); } return restore_curseg_summaries(sbi); } static int build_sit_entries(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_journal *journal = curseg->journal; struct seg_entry *se; struct f2fs_sit_entry sit; int sit_blk_cnt = SIT_BLK_CNT(sbi); unsigned int i, start, end; unsigned int readed, start_blk = 0; int err = 0; block_t sit_valid_blocks[2] = {0, 0}; do { readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS, META_SIT, true); start = start_blk * sit_i->sents_per_block; end = (start_blk + readed) * sit_i->sents_per_block; for (; start < end && start < MAIN_SEGS(sbi); start++) { struct f2fs_sit_block *sit_blk; struct folio *folio; se = &sit_i->sentries[start]; folio = get_current_sit_folio(sbi, start); if (IS_ERR(folio)) return PTR_ERR(folio); sit_blk = folio_address(folio); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_folio_put(folio, true); err = check_block_count(sbi, start, &sit); if (err) return err; seg_info_from_raw_sit(se, &sit); if (se->type >= NR_PERSISTENT_LOG) { f2fs_err(sbi, "Invalid segment type: %u, segno: %u", se->type, start); f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE); return -EFSCORRUPTED; } sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; if (!f2fs_block_unit_discard(sbi)) goto init_discard_map_done; /* build discard map only one time */ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); goto init_discard_map_done; } memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); sbi->discard_blks += BLKS_PER_SEG(sbi) - se->valid_blocks; init_discard_map_done: if (__is_large_section(sbi)) get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; } start_blk += readed; } while (start_blk < sit_blk_cnt); down_read(&curseg->journal_rwsem); for (i = 0; i < sits_in_cursum(journal); i++) { unsigned int old_valid_blocks; start = le32_to_cpu(segno_in_journal(journal, i)); if (start >= MAIN_SEGS(sbi)) { f2fs_err(sbi, "Wrong journal entry on segno %u", start); err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL); break; } se = &sit_i->sentries[start]; sit = sit_in_journal(journal, i); old_valid_blocks = se->valid_blocks; sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks; err = check_block_count(sbi, start, &sit); if (err) break; seg_info_from_raw_sit(se, &sit); if (se->type >= NR_PERSISTENT_LOG) { f2fs_err(sbi, "Invalid segment type: %u, segno: %u", se->type, start); err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE); break; } sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; if (f2fs_block_unit_discard(sbi)) { if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); } else { memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); sbi->discard_blks += old_valid_blocks; sbi->discard_blks -= se->valid_blocks; } } if (__is_large_section(sbi)) { get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; get_sec_entry(sbi, start)->valid_blocks -= old_valid_blocks; } } up_read(&curseg->journal_rwsem); /* update ckpt_valid_block */ if (__is_large_section(sbi)) { unsigned int segno; for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { set_ckpt_valid_blocks(sbi, segno); sanity_check_valid_blocks(sbi, segno); } } if (err) return err; if (sit_valid_blocks[NODE] != valid_node_count(sbi)) { f2fs_err(sbi, "SIT is corrupted node# %u vs %u", sit_valid_blocks[NODE], valid_node_count(sbi)); f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT); return -EFSCORRUPTED; } if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] > valid_user_blocks(sbi)) { f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u", sit_valid_blocks[DATA], sit_valid_blocks[NODE], valid_user_blocks(sbi)); f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT); return -EFSCORRUPTED; } return 0; } static void init_free_segmap(struct f2fs_sb_info *sbi) { unsigned int start; int type; struct seg_entry *sentry; for (start = 0; start < MAIN_SEGS(sbi); start++) { if (f2fs_usable_blks_in_seg(sbi, start) == 0) continue; sentry = get_seg_entry(sbi, start); if (!sentry->valid_blocks) __set_free(sbi, start); else SIT_I(sbi)->written_valid_blocks += sentry->valid_blocks; } /* set use the current segments */ for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) { struct curseg_info *curseg_t = CURSEG_I(sbi, type); __set_test_and_inuse(sbi, curseg_t->segno); } } static void init_dirty_segmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno = 0, offset = 0, secno; block_t valid_blocks, usable_blks_in_seg; while (1) { /* find dirty segment based on free segmap */ segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset); if (segno >= MAIN_SEGS(sbi)) break; offset = segno + 1; valid_blocks = get_valid_blocks(sbi, segno, false); usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno); if (valid_blocks == usable_blks_in_seg || !valid_blocks) continue; if (valid_blocks > usable_blks_in_seg) { f2fs_bug_on(sbi, 1); continue; } mutex_lock(&dirty_i->seglist_lock); __locate_dirty_segment(sbi, segno, DIRTY); mutex_unlock(&dirty_i->seglist_lock); } if (!__is_large_section(sbi)) return; mutex_lock(&dirty_i->seglist_lock); for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { valid_blocks = get_valid_blocks(sbi, segno, true); secno = GET_SEC_FROM_SEG(sbi, segno); if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi)) continue; if (IS_CURSEC(sbi, secno)) continue; set_bit(secno, dirty_i->dirty_secmap); } mutex_unlock(&dirty_i->seglist_lock); } static int init_victim_secmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); if (!dirty_i->victim_secmap) return -ENOMEM; dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); if (!dirty_i->pinned_secmap) return -ENOMEM; dirty_i->pinned_secmap_cnt = 0; dirty_i->enable_pin_section = true; return 0; } static int build_dirty_segmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i; unsigned int bitmap_size, i; /* allocate memory for dirty segments list information */ dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info), GFP_KERNEL); if (!dirty_i) return -ENOMEM; SM_I(sbi)->dirty_info = dirty_i; mutex_init(&dirty_i->seglist_lock); bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); for (i = 0; i < NR_DIRTY_TYPE; i++) { dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); if (!dirty_i->dirty_segmap[i]) return -ENOMEM; } if (__is_large_section(sbi)) { bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); dirty_i->dirty_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); if (!dirty_i->dirty_secmap) return -ENOMEM; } init_dirty_segmap(sbi); return init_victim_secmap(sbi); } static int sanity_check_curseg(struct f2fs_sb_info *sbi) { int i; /* * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr; * In LFS curseg, all blkaddr after .next_blkoff should be unused. */ for (i = 0; i < NR_PERSISTENT_LOG; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); struct seg_entry *se = get_seg_entry(sbi, curseg->segno); unsigned int blkofs = curseg->next_blkoff; if (f2fs_sb_has_readonly(sbi) && i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE) continue; sanity_check_seg_type(sbi, curseg->seg_type); if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) { f2fs_err(sbi, "Current segment has invalid alloc_type:%d", curseg->alloc_type); f2fs_handle_error(sbi, ERROR_INVALID_CURSEG); return -EFSCORRUPTED; } if (f2fs_test_bit(blkofs, se->cur_valid_map)) goto out; if (curseg->alloc_type == SSR) continue; for (blkofs += 1; blkofs < BLKS_PER_SEG(sbi); blkofs++) { if (!f2fs_test_bit(blkofs, se->cur_valid_map)) continue; out: f2fs_err(sbi, "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u", i, curseg->segno, curseg->alloc_type, curseg->next_blkoff, blkofs); f2fs_handle_error(sbi, ERROR_INVALID_CURSEG); return -EFSCORRUPTED; } } return 0; } #ifdef CONFIG_BLK_DEV_ZONED static int check_zone_write_pointer(struct f2fs_sb_info *sbi, struct f2fs_dev_info *fdev, struct blk_zone *zone) { unsigned int zone_segno; block_t zone_block, valid_block_cnt; unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; int ret; unsigned int nofs_flags; if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block); zone_segno = GET_SEGNO(sbi, zone_block); /* * Skip check of zones cursegs point to, since * fix_curseg_write_pointer() checks them. */ if (zone_segno >= MAIN_SEGS(sbi)) return 0; /* * Get # of valid block of the zone. */ valid_block_cnt = get_valid_blocks(sbi, zone_segno, true); if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) { f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]", zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond)); return 0; } if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) || (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL)) return 0; if (!valid_block_cnt) { f2fs_notice(sbi, "Zone without valid block has non-zero write " "pointer. Reset the write pointer: cond[%s]", blk_zone_cond_str(zone->cond)); ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, zone->len >> log_sectors_per_block); if (ret) f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", fdev->path, ret); return ret; } /* * If there are valid blocks and the write pointer doesn't match * with them, we need to report the inconsistency and fill * the zone till the end to close the zone. This inconsistency * does not cause write error because the zone will not be * selected for write operation until it get discarded. */ f2fs_notice(sbi, "Valid blocks are not aligned with write " "pointer: valid block[0x%x,0x%x] cond[%s]", zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond)); nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH, zone->start, zone->len); memalloc_nofs_restore(nofs_flags); if (ret == -EOPNOTSUPP) { ret = blkdev_issue_zeroout(fdev->bdev, zone->wp, zone->len - (zone->wp - zone->start), GFP_NOFS, 0); if (ret) f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)", fdev->path, ret); } else if (ret) { f2fs_err(sbi, "Finishing zone failed: %s (errno=%d)", fdev->path, ret); } return ret; } static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi, block_t zone_blkaddr) { int i; for (i = 0; i < sbi->s_ndevs; i++) { if (!bdev_is_zoned(FDEV(i).bdev)) continue; if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr && zone_blkaddr <= FDEV(i).end_blk)) return &FDEV(i); } return NULL; } static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx, void *data) { memcpy(data, zone, sizeof(struct blk_zone)); return 0; } static int do_fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) { struct curseg_info *cs = CURSEG_I(sbi, type); struct f2fs_dev_info *zbd; struct blk_zone zone; unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off; block_t cs_zone_block, wp_block; unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; sector_t zone_sector; int err; cs_section = GET_SEC_FROM_SEG(sbi, cs->segno); cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section)); zbd = get_target_zoned_dev(sbi, cs_zone_block); if (!zbd) return 0; /* report zone for the sector the curseg points to */ zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) << log_sectors_per_block; err = blkdev_report_zones(zbd->bdev, zone_sector, 1, report_one_zone_cb, &zone); if (err != 1) { f2fs_err(sbi, "Report zone failed: %s errno=(%d)", zbd->path, err); return err; } if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; /* * When safely unmounted in the previous mount, we could use current * segments. Otherwise, allocate new sections. */ if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block); wp_segno = GET_SEGNO(sbi, wp_block); wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0); if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff && wp_sector_off == 0) return 0; f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: " "curseg[0x%x,0x%x] wp[0x%x,0x%x]", type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff); } /* Allocate a new section if it's not new. */ if (cs->next_blkoff || cs->segno != GET_SEG_FROM_SEC(sbi, GET_ZONE_FROM_SEC(sbi, cs_section))) { unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff; f2fs_allocate_new_section(sbi, type, true); f2fs_notice(sbi, "Assign new section to curseg[%d]: " "[0x%x,0x%x] -> [0x%x,0x%x]", type, old_segno, old_blkoff, cs->segno, cs->next_blkoff); } /* check consistency of the zone curseg pointed to */ if (check_zone_write_pointer(sbi, zbd, &zone)) return -EIO; /* check newly assigned zone */ cs_section = GET_SEC_FROM_SEG(sbi, cs->segno); cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section)); zbd = get_target_zoned_dev(sbi, cs_zone_block); if (!zbd) return 0; zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) << log_sectors_per_block; err = blkdev_report_zones(zbd->bdev, zone_sector, 1, report_one_zone_cb, &zone); if (err != 1) { f2fs_err(sbi, "Report zone failed: %s errno=(%d)", zbd->path, err); return err; } if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; if (zone.wp != zone.start) { f2fs_notice(sbi, "New zone for curseg[%d] is not yet discarded. " "Reset the zone: curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block, zone.len >> log_sectors_per_block); if (err) { f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", zbd->path, err); return err; } } return 0; } static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi) { int i, ret; for (i = 0; i < NR_PERSISTENT_LOG; i++) { ret = do_fix_curseg_write_pointer(sbi, i); if (ret) return ret; } return 0; } struct check_zone_write_pointer_args { struct f2fs_sb_info *sbi; struct f2fs_dev_info *fdev; }; static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct check_zone_write_pointer_args *args; args = (struct check_zone_write_pointer_args *)data; return check_zone_write_pointer(args->sbi, args->fdev, zone); } static int check_write_pointer(struct f2fs_sb_info *sbi) { int i, ret; struct check_zone_write_pointer_args args; for (i = 0; i < sbi->s_ndevs; i++) { if (!bdev_is_zoned(FDEV(i).bdev)) continue; args.sbi = sbi; args.fdev = &FDEV(i); ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES, check_zone_write_pointer_cb, &args); if (ret < 0) return ret; } return 0; } int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi) { int ret; if (!f2fs_sb_has_blkzoned(sbi) || f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi)) return 0; f2fs_notice(sbi, "Checking entire write pointers"); ret = fix_curseg_write_pointer(sbi); if (!ret) ret = check_write_pointer(sbi); return ret; } /* * Return the number of usable blocks in a segment. The number of blocks * returned is always equal to the number of blocks in a segment for * segments fully contained within a sequential zone capacity or a * conventional zone. For segments partially contained in a sequential * zone capacity, the number of usable blocks up to the zone capacity * is returned. 0 is returned in all other cases. */ static inline unsigned int f2fs_usable_zone_blks_in_seg( struct f2fs_sb_info *sbi, unsigned int segno) { block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr; unsigned int secno; if (!sbi->unusable_blocks_per_sec) return BLKS_PER_SEG(sbi); secno = GET_SEC_FROM_SEG(sbi, segno); seg_start = START_BLOCK(sbi, segno); sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno)); sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi); /* * If segment starts before zone capacity and spans beyond * zone capacity, then usable blocks are from seg start to * zone capacity. If the segment starts after the zone capacity, * then there are no usable blocks. */ if (seg_start >= sec_cap_blkaddr) return 0; if (seg_start + BLKS_PER_SEG(sbi) > sec_cap_blkaddr) return sec_cap_blkaddr - seg_start; return BLKS_PER_SEG(sbi); } #else int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi) { return 0; } static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno) { return 0; } #endif unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno) { if (f2fs_sb_has_blkzoned(sbi)) return f2fs_usable_zone_blks_in_seg(sbi, segno); return BLKS_PER_SEG(sbi); } unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi) { if (f2fs_sb_has_blkzoned(sbi)) return CAP_SEGS_PER_SEC(sbi); return SEGS_PER_SEC(sbi); } unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, unsigned int segno) { unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); unsigned int secno = 0, start = 0; unsigned int total_valid_blocks = 0; unsigned long long mtime = 0; unsigned int i = 0; secno = GET_SEC_FROM_SEG(sbi, segno); start = GET_SEG_FROM_SEC(sbi, secno); if (!__is_large_section(sbi)) { mtime = get_seg_entry(sbi, start + i)->mtime; goto out; } for (i = 0; i < usable_segs_per_sec; i++) { /* for large section, only check the mtime of valid segments */ struct seg_entry *se = get_seg_entry(sbi, start+i); mtime += se->mtime * se->valid_blocks; total_valid_blocks += se->valid_blocks; } if (total_valid_blocks == 0) return INVALID_MTIME; mtime = div_u64(mtime, total_valid_blocks); out: if (unlikely(mtime == INVALID_MTIME)) mtime -= 1; return mtime; } /* * Update min, max modified time for cost-benefit GC algorithm */ static void init_min_max_mtime(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); unsigned int segno; down_write(&sit_i->sentry_lock); sit_i->min_mtime = ULLONG_MAX; for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { unsigned long long mtime = 0; mtime = f2fs_get_section_mtime(sbi, segno); if (sit_i->min_mtime > mtime) sit_i->min_mtime = mtime; } sit_i->max_mtime = get_mtime(sbi, false); sit_i->dirty_max_mtime = 0; up_write(&sit_i->sentry_lock); } int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_sm_info *sm_info; int err; sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL); if (!sm_info) return -ENOMEM; /* init sm info */ sbi->sm_info = sm_info; sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); sm_info->segment_count = le32_to_cpu(raw_super->segment_count); sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); sm_info->rec_prefree_segments = sm_info->main_segments * DEF_RECLAIM_PREFREE_SEGMENTS / 100; if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; if (!f2fs_lfs_mode(sbi)) sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC); sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; sm_info->min_seq_blocks = BLKS_PER_SEG(sbi); sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->min_ssr_sections = reserved_sections(sbi); INIT_LIST_HEAD(&sm_info->sit_entry_set); init_f2fs_rwsem(&sm_info->curseg_lock); err = f2fs_create_flush_cmd_control(sbi); if (err) return err; err = create_discard_cmd_control(sbi); if (err) return err; err = build_sit_info(sbi); if (err) return err; err = build_free_segmap(sbi); if (err) return err; err = build_curseg(sbi); if (err) return err; /* reinit free segmap based on SIT */ err = build_sit_entries(sbi); if (err) return err; init_free_segmap(sbi); err = build_dirty_segmap(sbi); if (err) return err; err = sanity_check_curseg(sbi); if (err) return err; init_min_max_mtime(sbi); return 0; } static void discard_dirty_segmap(struct f2fs_sb_info *sbi, enum dirty_type dirty_type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); mutex_lock(&dirty_i->seglist_lock); kvfree(dirty_i->dirty_segmap[dirty_type]); dirty_i->nr_dirty[dirty_type] = 0; mutex_unlock(&dirty_i->seglist_lock); } static void destroy_victim_secmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); kvfree(dirty_i->pinned_secmap); kvfree(dirty_i->victim_secmap); } static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); int i; if (!dirty_i) return; /* discard pre-free/dirty segments list */ for (i = 0; i < NR_DIRTY_TYPE; i++) discard_dirty_segmap(sbi, i); if (__is_large_section(sbi)) { mutex_lock(&dirty_i->seglist_lock); kvfree(dirty_i->dirty_secmap); mutex_unlock(&dirty_i->seglist_lock); } destroy_victim_secmap(sbi); SM_I(sbi)->dirty_info = NULL; kfree(dirty_i); } static void destroy_curseg(struct f2fs_sb_info *sbi) { struct curseg_info *array = SM_I(sbi)->curseg_array; int i; if (!array) return; SM_I(sbi)->curseg_array = NULL; for (i = 0; i < NR_CURSEG_TYPE; i++) { kfree(array[i].sum_blk); kfree(array[i].journal); } kfree(array); } static void destroy_free_segmap(struct f2fs_sb_info *sbi) { struct free_segmap_info *free_i = SM_I(sbi)->free_info; if (!free_i) return; SM_I(sbi)->free_info = NULL; kvfree(free_i->free_segmap); kvfree(free_i->free_secmap); kfree(free_i); } static void destroy_sit_info(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); if (!sit_i) return; if (sit_i->sentries) kvfree(sit_i->bitmap); kfree(sit_i->tmp_map); kvfree(sit_i->sentries); kvfree(sit_i->sec_entries); kvfree(sit_i->dirty_sentries_bitmap); SM_I(sbi)->sit_info = NULL; kvfree(sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS kvfree(sit_i->sit_bitmap_mir); kvfree(sit_i->invalid_segmap); #endif kfree(sit_i); } void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); if (!sm_info) return; f2fs_destroy_flush_cmd_control(sbi, true); destroy_discard_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); destroy_sit_info(sbi); sbi->sm_info = NULL; kfree(sm_info); } int __init f2fs_create_segment_manager_caches(void) { discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry", sizeof(struct discard_entry)); if (!discard_entry_slab) goto fail; discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd", sizeof(struct discard_cmd)); if (!discard_cmd_slab) goto destroy_discard_entry; sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set", sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) goto destroy_discard_cmd; revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry", sizeof(struct revoke_entry)); if (!revoke_entry_slab) goto destroy_sit_entry_set; return 0; destroy_sit_entry_set: kmem_cache_destroy(sit_entry_set_slab); destroy_discard_cmd: kmem_cache_destroy(discard_cmd_slab); destroy_discard_entry: kmem_cache_destroy(discard_entry_slab); fail: return -ENOMEM; } void f2fs_destroy_segment_manager_caches(void) { kmem_cache_destroy(sit_entry_set_slab); kmem_cache_destroy(discard_cmd_slab); kmem_cache_destroy(discard_entry_slab); kmem_cache_destroy(revoke_entry_slab); }
47 110 1 57 324 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_BKEY_TYPES_H #define _BCACHEFS_BKEY_TYPES_H #include "bcachefs_format.h" /* * bkey_i - bkey with inline value * bkey_s - bkey with split value * bkey_s_c - bkey with split value, const */ #define bkey_p_next(_k) vstruct_next(_k) static inline struct bkey_i *bkey_next(struct bkey_i *k) { return (struct bkey_i *) ((u64 *) k->_data + k->k.u64s); } #define bkey_val_u64s(_k) ((_k)->u64s - BKEY_U64s) static inline size_t bkey_val_bytes(const struct bkey *k) { return bkey_val_u64s(k) * sizeof(u64); } static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s) { unsigned u64s = BKEY_U64s + val_u64s; BUG_ON(u64s > U8_MAX); k->u64s = u64s; } static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes) { set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64))); } #define bkey_val_end(_k) ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k))) #define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted) #define bkey_whiteout(_k) \ ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout) /* bkey with split value, const */ struct bkey_s_c { const struct bkey *k; const struct bch_val *v; }; /* bkey with split value */ struct bkey_s { union { struct { struct bkey *k; struct bch_val *v; }; struct bkey_s_c s_c; }; }; #define bkey_s_null ((struct bkey_s) { .k = NULL }) #define bkey_s_c_null ((struct bkey_s_c) { .k = NULL }) #define bkey_s_err(err) ((struct bkey_s) { .k = ERR_PTR(err) }) #define bkey_s_c_err(err) ((struct bkey_s_c) { .k = ERR_PTR(err) }) static inline struct bkey_s bkey_to_s(struct bkey *k) { return (struct bkey_s) { .k = k, .v = NULL }; } static inline struct bkey_s_c bkey_to_s_c(const struct bkey *k) { return (struct bkey_s_c) { .k = k, .v = NULL }; } static inline struct bkey_s bkey_i_to_s(struct bkey_i *k) { return (struct bkey_s) { .k = &k->k, .v = &k->v }; } static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k) { return (struct bkey_s_c) { .k = &k->k, .v = &k->v }; } /* * For a given type of value (e.g. struct bch_extent), generates the types for * bkey + bch_extent - inline, split, split const - and also all the conversion * functions, which also check that the value is of the correct type. * * We use anonymous unions for upcasting - e.g. converting from e.g. a * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion * functions. */ #define x(name, ...) \ struct bkey_i_##name { \ union { \ struct bkey k; \ struct bkey_i k_i; \ }; \ struct bch_##name v; \ }; \ \ struct bkey_s_c_##name { \ union { \ struct { \ const struct bkey *k; \ const struct bch_##name *v; \ }; \ struct bkey_s_c s_c; \ }; \ }; \ \ struct bkey_s_##name { \ union { \ struct { \ struct bkey *k; \ struct bch_##name *v; \ }; \ struct bkey_s_c_##name c; \ struct bkey_s s; \ struct bkey_s_c s_c; \ }; \ }; \ \ static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \ { \ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \ return container_of(&k->k, struct bkey_i_##name, k); \ } \ \ static inline const struct bkey_i_##name * \ bkey_i_to_##name##_c(const struct bkey_i *k) \ { \ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \ return container_of(&k->k, struct bkey_i_##name, k); \ } \ \ static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \ { \ EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name); \ return (struct bkey_s_##name) { \ .k = k.k, \ .v = container_of(k.v, struct bch_##name, v), \ }; \ } \ \ static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\ { \ EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name); \ return (struct bkey_s_c_##name) { \ .k = k.k, \ .v = container_of(k.v, struct bch_##name, v), \ }; \ } \ \ static inline struct bkey_s_##name name##_i_to_s(struct bkey_i_##name *k)\ { \ return (struct bkey_s_##name) { \ .k = &k->k, \ .v = &k->v, \ }; \ } \ \ static inline struct bkey_s_c_##name \ name##_i_to_s_c(const struct bkey_i_##name *k) \ { \ return (struct bkey_s_c_##name) { \ .k = &k->k, \ .v = &k->v, \ }; \ } \ \ static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \ { \ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \ return (struct bkey_s_##name) { \ .k = &k->k, \ .v = container_of(&k->v, struct bch_##name, v), \ }; \ } \ \ static inline struct bkey_s_c_##name \ bkey_i_to_s_c_##name(const struct bkey_i *k) \ { \ EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name); \ return (struct bkey_s_c_##name) { \ .k = &k->k, \ .v = container_of(&k->v, struct bch_##name, v), \ }; \ } \ \ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\ { \ struct bkey_i_##name *k = \ container_of(&_k->k, struct bkey_i_##name, k); \ \ bkey_init(&k->k); \ memset(&k->v, 0, sizeof(k->v)); \ k->k.type = KEY_TYPE_##name; \ set_bkey_val_bytes(&k->k, sizeof(k->v)); \ \ return k; \ } BCH_BKEY_TYPES(); #undef x enum bch_validate_flags { BCH_VALIDATE_write = BIT(0), BCH_VALIDATE_commit = BIT(1), BCH_VALIDATE_silent = BIT(2), }; #define BKEY_VALIDATE_CONTEXTS() \ x(unknown) \ x(superblock) \ x(journal) \ x(btree_root) \ x(btree_node) \ x(commit) struct bkey_validate_context { enum { #define x(n) BKEY_VALIDATE_##n, BKEY_VALIDATE_CONTEXTS() #undef x } from:8; enum bch_validate_flags flags:8; u8 level; enum btree_id btree; bool root:1; unsigned journal_offset; u64 journal_seq; }; #endif /* _BCACHEFS_BKEY_TYPES_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_IALLOC_H__ #define __XFS_IALLOC_H__ struct xfs_buf; struct xfs_dinode; struct xfs_imap; struct xfs_mount; struct xfs_trans; struct xfs_btree_cur; struct xfs_perag; /* Move inodes in clusters of this size */ #define XFS_INODE_BIG_CLUSTER_SIZE 8192 struct xfs_icluster { bool deleted; /* record is deleted */ xfs_ino_t first_ino; /* first inode number */ uint64_t alloc; /* inode phys. allocation bitmap for * sparse chunks */ }; /* * Make an inode pointer out of the buffer/offset. */ static inline struct xfs_dinode * xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) { return xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog); } struct xfs_icreate_args; /* * Allocate an inode on disk. Mode is used to tell whether the new inode will * need space, and whether it is a directory. */ int xfs_dialloc(struct xfs_trans **tpp, const struct xfs_icreate_args *args, xfs_ino_t *new_ino); int xfs_difree(struct xfs_trans *tp, struct xfs_perag *pag, xfs_ino_t ino, struct xfs_icluster *ifree); /* * Return the location of the inode in imap, for mapping it into a buffer. */ int xfs_imap( struct xfs_perag *pag, struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t ino, /* inode to locate */ struct xfs_imap *imap, /* location map structure */ uint flags); /* flags for inode btree lookup */ /* * Log specified fields for the ag hdr (inode section) */ void xfs_ialloc_log_agi( struct xfs_trans *tp, /* transaction pointer */ struct xfs_buf *bp, /* allocation group header buffer */ uint32_t fields); /* bitmask of fields to log */ int xfs_read_agi(struct xfs_perag *pag, struct xfs_trans *tp, xfs_buf_flags_t flags, struct xfs_buf **agibpp); int xfs_ialloc_read_agi(struct xfs_perag *pag, struct xfs_trans *tp, int flags, struct xfs_buf **agibpp); #define XFS_IALLOC_FLAG_TRYLOCK (1U << 0) /* use trylock for buffer locking */ /* * Lookup a record by ino in the btree given by cur. */ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, xfs_lookup_t dir, int *stat); /* * Get the data from the pointed-to record. */ int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_inobt_rec_incore_t *rec, int *stat); uint8_t xfs_inobt_rec_freecount(const struct xfs_inobt_rec_incore *irec); /* * Inode chunk initialisation routine */ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, struct list_head *buffer_list, int icount, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, unsigned int gen); union xfs_btree_rec; void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, const union xfs_btree_rec *rec, struct xfs_inobt_rec_incore *irec); xfs_failaddr_t xfs_inobt_check_irec(struct xfs_perag *pag, const struct xfs_inobt_rec_incore *irec); int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, enum xbtree_recpacking *outcome); int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count, xfs_agino_t *freecount); int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask, uint8_t count, int32_t freecount, xfs_inofree_t free, int *stat); int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); void xfs_ialloc_setup_geometry(struct xfs_mount *mp); xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit); int xfs_ialloc_check_shrink(struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agibp, xfs_agblock_t new_length); #endif /* __XFS_IALLOC_H__ */
4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FB_H #define _LINUX_FB_H #include <uapi/linux/fb.h> #define FBIO_CURSOR _IOWR('F', 0x08, struct fb_cursor_user) #include <linux/mutex.h> #include <linux/printk.h> #include <linux/refcount.h> #include <linux/types.h> #include <linux/workqueue.h> #include <asm/video.h> struct backlight_device; struct device; struct device_node; struct fb_info; struct file; struct i2c_adapter; struct inode; struct lcd_device; struct module; struct notifier_block; struct page; struct videomode; struct vm_area_struct; /* Definitions below are used in the parsed monitor specs */ #define FB_DPMS_ACTIVE_OFF 1 #define FB_DPMS_SUSPEND 2 #define FB_DPMS_STANDBY 4 #define FB_DISP_DDI 1 #define FB_DISP_ANA_700_300 2 #define FB_DISP_ANA_714_286 4 #define FB_DISP_ANA_1000_400 8 #define FB_DISP_ANA_700_000 16 #define FB_DISP_MONO 32 #define FB_DISP_RGB 64 #define FB_DISP_MULTI 128 #define FB_DISP_UNKNOWN 256 #define FB_SIGNAL_NONE 0 #define FB_SIGNAL_BLANK_BLANK 1 #define FB_SIGNAL_SEPARATE 2 #define FB_SIGNAL_COMPOSITE 4 #define FB_SIGNAL_SYNC_ON_GREEN 8 #define FB_SIGNAL_SERRATION_ON 16 #define FB_MISC_PRIM_COLOR 1 #define FB_MISC_1ST_DETAIL 2 /* First Detailed Timing is preferred */ #define FB_MISC_HDMI 4 struct fb_chroma { __u32 redx; /* in fraction of 1024 */ __u32 greenx; __u32 bluex; __u32 whitex; __u32 redy; __u32 greeny; __u32 bluey; __u32 whitey; }; struct fb_monspecs { struct fb_chroma chroma; struct fb_videomode *modedb; /* mode database */ __u8 manufacturer[4]; /* Manufacturer */ __u8 monitor[14]; /* Monitor String */ __u8 serial_no[14]; /* Serial Number */ __u8 ascii[14]; /* ? */ __u32 modedb_len; /* mode database length */ __u32 model; /* Monitor Model */ __u32 serial; /* Serial Number - Integer */ __u32 year; /* Year manufactured */ __u32 week; /* Week Manufactured */ __u32 hfmin; /* hfreq lower limit (Hz) */ __u32 hfmax; /* hfreq upper limit (Hz) */ __u32 dclkmin; /* pixelclock lower limit (Hz) */ __u32 dclkmax; /* pixelclock upper limit (Hz) */ __u16 input; /* display type - see FB_DISP_* */ __u16 dpms; /* DPMS support - see FB_DPMS_ */ __u16 signal; /* Signal Type - see FB_SIGNAL_* */ __u16 vfmin; /* vfreq lower limit (Hz) */ __u16 vfmax; /* vfreq upper limit (Hz) */ __u16 gamma; /* Gamma - in fractions of 100 */ __u16 gtf : 1; /* supports GTF */ __u16 misc; /* Misc flags - see FB_MISC_* */ __u8 version; /* EDID version... */ __u8 revision; /* ...and revision */ __u8 max_x; /* Maximum horizontal size (cm) */ __u8 max_y; /* Maximum vertical size (cm) */ }; struct fb_cmap_user { __u32 start; /* First entry */ __u32 len; /* Number of entries */ __u16 __user *red; /* Red values */ __u16 __user *green; __u16 __user *blue; __u16 __user *transp; /* transparency, can be NULL */ }; struct fb_image_user { __u32 dx; /* Where to place image */ __u32 dy; __u32 width; /* Size of image */ __u32 height; __u32 fg_color; /* Only used when a mono bitmap */ __u32 bg_color; __u8 depth; /* Depth of the image */ const char __user *data; /* Pointer to image data */ struct fb_cmap_user cmap; /* color map info */ }; struct fb_cursor_user { __u16 set; /* what to set */ __u16 enable; /* cursor on/off */ __u16 rop; /* bitop operation */ const char __user *mask; /* cursor mask bits */ struct fbcurpos hot; /* cursor hot spot */ struct fb_image_user image; /* Cursor image */ }; /* * Register/unregister for framebuffer events */ #ifdef CONFIG_GUMSTIX_AM200EPD /* only used by mach-pxa/am200epd.c */ #define FB_EVENT_FB_REGISTERED 0x05 #define FB_EVENT_FB_UNREGISTERED 0x06 #endif struct fb_event { struct fb_info *info; void *data; }; /* Enough for the VT console needs, see its max_font_width/height */ #define FB_MAX_BLIT_WIDTH 64 #define FB_MAX_BLIT_HEIGHT 128 struct fb_blit_caps { DECLARE_BITMAP(x, FB_MAX_BLIT_WIDTH); DECLARE_BITMAP(y, FB_MAX_BLIT_HEIGHT); u32 len; u32 flags; }; #ifdef CONFIG_FB_NOTIFY extern int fb_register_client(struct notifier_block *nb); extern int fb_unregister_client(struct notifier_block *nb); extern int fb_notifier_call_chain(unsigned long val, void *v); #else static inline int fb_register_client(struct notifier_block *nb) { return 0; }; static inline int fb_unregister_client(struct notifier_block *nb) { return 0; }; static inline int fb_notifier_call_chain(unsigned long val, void *v) { return 0; }; #endif /* * Pixmap structure definition * * The purpose of this structure is to translate data * from the hardware independent format of fbdev to what * format the hardware needs. */ #define FB_PIXMAP_DEFAULT 1 /* used internally by fbcon */ #define FB_PIXMAP_SYSTEM 2 /* memory is in system RAM */ #define FB_PIXMAP_IO 4 /* memory is iomapped */ #define FB_PIXMAP_SYNC 256 /* set if GPU can DMA */ struct fb_pixmap { u8 *addr; /* pointer to memory */ u32 size; /* size of buffer in bytes */ u32 offset; /* current offset to buffer */ u32 buf_align; /* byte alignment of each bitmap */ u32 scan_align; /* alignment per scanline */ u32 access_align; /* alignment per read/write (bits) */ u32 flags; /* see FB_PIXMAP_* */ /* supported bit block dimensions */ /* Format: test_bit(width - 1, blit_x) */ /* test_bit(height - 1, blit_y) */ /* if zero, will be set to full (all) */ DECLARE_BITMAP(blit_x, FB_MAX_BLIT_WIDTH); DECLARE_BITMAP(blit_y, FB_MAX_BLIT_HEIGHT); /* access methods */ void (*writeio)(struct fb_info *info, void __iomem *dst, void *src, unsigned int size); void (*readio) (struct fb_info *info, void *dst, void __iomem *src, unsigned int size); }; #ifdef CONFIG_FB_DEFERRED_IO struct fb_deferred_io_pageref { struct page *page; unsigned long offset; /* private */ struct list_head list; }; struct fb_deferred_io { /* delay between mkwrite and deferred handler */ unsigned long delay; bool sort_pagereflist; /* sort pagelist by offset */ int open_count; /* number of opened files; protected by fb_info lock */ struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ struct address_space *mapping; /* page cache object for fb device */ /* callback */ struct page *(*get_page)(struct fb_info *info, unsigned long offset); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); }; #endif /* * Frame buffer operations * * LOCKING NOTE: those functions must _ALL_ be called with the console * semaphore held, this is the only suitable locking mechanism we have * in 2.6. Some may be called at interrupt time at this point though. * * The exception to this is the debug related hooks. Putting the fb * into a debug state (e.g. flipping to the kernel console) and restoring * it must be done in a lock-free manner, so low level drivers should * keep track of the initial console (if applicable) and may need to * perform direct, unlocked hardware writes in these hooks. */ struct fb_ops { /* open/release and usage marking */ struct module *owner; int (*fb_open)(struct fb_info *info, int user); int (*fb_release)(struct fb_info *info, int user); /* For framebuffers with strange non linear layouts or that do not * work with normal memory mapped access */ ssize_t (*fb_read)(struct fb_info *info, char __user *buf, size_t count, loff_t *ppos); ssize_t (*fb_write)(struct fb_info *info, const char __user *buf, size_t count, loff_t *ppos); /* checks var and eventually tweaks it to something supported, * DO NOT MODIFY PAR */ int (*fb_check_var)(struct fb_var_screeninfo *var, struct fb_info *info); /* set the video mode according to info->var */ int (*fb_set_par)(struct fb_info *info); /* set color register */ int (*fb_setcolreg)(unsigned regno, unsigned red, unsigned green, unsigned blue, unsigned transp, struct fb_info *info); /* set color registers in batch */ int (*fb_setcmap)(struct fb_cmap *cmap, struct fb_info *info); /* blank display */ int (*fb_blank)(int blank, struct fb_info *info); /* pan display */ int (*fb_pan_display)(struct fb_var_screeninfo *var, struct fb_info *info); /* Draws a rectangle */ void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect); /* Copy data from area to another */ void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region); /* Draws a image to the display */ void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image); /* Draws cursor */ int (*fb_cursor) (struct fb_info *info, struct fb_cursor *cursor); /* wait for blit idle, optional */ int (*fb_sync)(struct fb_info *info); /* perform fb specific ioctl (optional) */ int (*fb_ioctl)(struct fb_info *info, unsigned int cmd, unsigned long arg); /* Handle 32bit compat ioctl (optional) */ int (*fb_compat_ioctl)(struct fb_info *info, unsigned cmd, unsigned long arg); /* perform fb specific mmap */ int (*fb_mmap)(struct fb_info *info, struct vm_area_struct *vma); /* get capability given var */ void (*fb_get_caps)(struct fb_info *info, struct fb_blit_caps *caps, struct fb_var_screeninfo *var); /* teardown any resources to do with this framebuffer */ void (*fb_destroy)(struct fb_info *info); /* called at KDB enter and leave time to prepare the console */ int (*fb_debug_enter)(struct fb_info *info); int (*fb_debug_leave)(struct fb_info *info); }; #ifdef CONFIG_FB_TILEBLITTING #define FB_TILE_CURSOR_NONE 0 #define FB_TILE_CURSOR_UNDERLINE 1 #define FB_TILE_CURSOR_LOWER_THIRD 2 #define FB_TILE_CURSOR_LOWER_HALF 3 #define FB_TILE_CURSOR_TWO_THIRDS 4 #define FB_TILE_CURSOR_BLOCK 5 struct fb_tilemap { __u32 width; /* width of each tile in pixels */ __u32 height; /* height of each tile in scanlines */ __u32 depth; /* color depth of each tile */ __u32 length; /* number of tiles in the map */ const __u8 *data; /* actual tile map: a bitmap array, packed to the nearest byte */ }; struct fb_tilerect { __u32 sx; /* origin in the x-axis */ __u32 sy; /* origin in the y-axis */ __u32 width; /* number of tiles in the x-axis */ __u32 height; /* number of tiles in the y-axis */ __u32 index; /* what tile to use: index to tile map */ __u32 fg; /* foreground color */ __u32 bg; /* background color */ __u32 rop; /* raster operation */ }; struct fb_tilearea { __u32 sx; /* source origin in the x-axis */ __u32 sy; /* source origin in the y-axis */ __u32 dx; /* destination origin in the x-axis */ __u32 dy; /* destination origin in the y-axis */ __u32 width; /* number of tiles in the x-axis */ __u32 height; /* number of tiles in the y-axis */ }; struct fb_tileblit { __u32 sx; /* origin in the x-axis */ __u32 sy; /* origin in the y-axis */ __u32 width; /* number of tiles in the x-axis */ __u32 height; /* number of tiles in the y-axis */ __u32 fg; /* foreground color */ __u32 bg; /* background color */ __u32 length; /* number of tiles to draw */ __u32 *indices; /* array of indices to tile map */ }; struct fb_tilecursor { __u32 sx; /* cursor position in the x-axis */ __u32 sy; /* cursor position in the y-axis */ __u32 mode; /* 0 = erase, 1 = draw */ __u32 shape; /* see FB_TILE_CURSOR_* */ __u32 fg; /* foreground color */ __u32 bg; /* background color */ }; struct fb_tile_ops { /* set tile characteristics */ void (*fb_settile)(struct fb_info *info, struct fb_tilemap *map); /* all dimensions from hereon are in terms of tiles */ /* move a rectangular region of tiles from one area to another*/ void (*fb_tilecopy)(struct fb_info *info, struct fb_tilearea *area); /* fill a rectangular region with a tile */ void (*fb_tilefill)(struct fb_info *info, struct fb_tilerect *rect); /* copy an array of tiles */ void (*fb_tileblit)(struct fb_info *info, struct fb_tileblit *blit); /* cursor */ void (*fb_tilecursor)(struct fb_info *info, struct fb_tilecursor *cursor); /* get maximum length of the tile map */ int (*fb_get_tilemax)(struct fb_info *info); }; #endif /* CONFIG_FB_TILEBLITTING */ /* FBINFO_* = fb_info.flags bit flags */ #define FBINFO_HWACCEL_DISABLED 0x0002 /* When FBINFO_HWACCEL_DISABLED is set: * Hardware acceleration is turned off. Software implementations * of required functions (copyarea(), fillrect(), and imageblit()) * takes over; acceleration engine should be in a quiescent state */ /* hints */ #define FBINFO_VIRTFB 0x0004 /* FB is System RAM, not device. */ #define FBINFO_PARTIAL_PAN_OK 0x0040 /* otw use pan only for double-buffering */ #define FBINFO_READS_FAST 0x0080 /* soft-copy faster than rendering */ /* hardware supported ops */ /* semantics: when a bit is set, it indicates that the operation is * accelerated by hardware. * required functions will still work even if the bit is not set. * optional functions may not even exist if the flag bit is not set. */ #define FBINFO_HWACCEL_NONE 0x0000 #define FBINFO_HWACCEL_COPYAREA 0x0100 /* required */ #define FBINFO_HWACCEL_FILLRECT 0x0200 /* required */ #define FBINFO_HWACCEL_IMAGEBLIT 0x0400 /* required */ #define FBINFO_HWACCEL_ROTATE 0x0800 /* optional */ #define FBINFO_HWACCEL_XPAN 0x1000 /* optional */ #define FBINFO_HWACCEL_YPAN 0x2000 /* optional */ #define FBINFO_HWACCEL_YWRAP 0x4000 /* optional */ #define FBINFO_MISC_TILEBLITTING 0x20000 /* use tile blitting */ /* A driver may set this flag to indicate that it does want a set_par to be * called every time when fbcon_switch is executed. The advantage is that with * this flag set you can really be sure that set_par is always called before * any of the functions dependent on the correct hardware state or altering * that state, even if you are using some broken X releases. The disadvantage * is that it introduces unwanted delays to every console switch if set_par * is slow. It is a good idea to try this flag in the drivers initialization * code whenever there is a bug report related to switching between X and the * framebuffer console. */ #define FBINFO_MISC_ALWAYS_SETPAR 0x40000 /* * Host and GPU endianness differ. */ #define FBINFO_FOREIGN_ENDIAN 0x100000 /* * Big endian math. This is the same flags as above, but with different * meaning, it is set by the fb subsystem depending FOREIGN_ENDIAN flag * and host endianness. Drivers should not use this flag. */ #define FBINFO_BE_MATH 0x100000 /* * Hide smem_start in the FBIOGET_FSCREENINFO IOCTL. This is used by modern DRM * drivers to stop userspace from trying to share buffers behind the kernel's * back. Instead dma-buf based buffer sharing should be used. */ #define FBINFO_HIDE_SMEM_START 0x200000 struct fb_info { refcount_t count; int node; int flags; /* * -1 by default, set to a FB_ROTATE_* value by the driver, if it knows * a lcd is not mounted upright and fbcon should rotate to compensate. */ int fbcon_rotate_hint; struct mutex lock; /* Lock for open/release/ioctl funcs */ struct mutex mm_lock; /* Lock for fb_mmap and smem_* fields */ struct fb_var_screeninfo var; /* Current var */ struct fb_fix_screeninfo fix; /* Current fix */ struct fb_monspecs monspecs; /* Current Monitor specs */ struct fb_pixmap pixmap; /* Image hardware mapper */ struct fb_pixmap sprite; /* Cursor hardware mapper */ struct fb_cmap cmap; /* Current cmap */ struct list_head modelist; /* mode list */ struct fb_videomode *mode; /* current mode */ int blank; /* current blanking; see FB_BLANK_ constants */ #if IS_ENABLED(CONFIG_FB_BACKLIGHT) /* assigned backlight device */ /* set before framebuffer registration, remove after unregister */ struct backlight_device *bl_dev; /* Backlight level curve */ struct mutex bl_curve_mutex; u8 bl_curve[FB_BACKLIGHT_LEVELS]; #endif /* * Assigned LCD device; set before framebuffer * registration, remove after unregister */ struct lcd_device *lcd_dev; #ifdef CONFIG_FB_DEFERRED_IO struct delayed_work deferred_work; unsigned long npagerefs; struct fb_deferred_io_pageref *pagerefs; struct fb_deferred_io *fbdefio; #endif const struct fb_ops *fbops; struct device *device; /* This is the parent */ #if defined(CONFIG_FB_DEVICE) struct device *dev; /* This is this fb device */ #endif int class_flag; /* private sysfs flags */ #ifdef CONFIG_FB_TILEBLITTING struct fb_tile_ops *tileops; /* Tile Blitting */ #endif union { char __iomem *screen_base; /* Virtual address */ char *screen_buffer; }; unsigned long screen_size; /* Amount of ioremapped VRAM or 0 */ void *pseudo_palette; /* Fake palette of 16 colors */ #define FBINFO_STATE_RUNNING 0 #define FBINFO_STATE_SUSPENDED 1 u32 state; /* Hardware state i.e suspend */ void *fbcon_par; /* fbcon use-only private area */ /* From here on everything is device dependent */ void *par; bool skip_vt_switch; /* no VT switch on suspend/resume required */ bool skip_panic; /* Do not write to the fb after a panic */ }; /* This will go away * fbset currently hacks in FB_ACCELF_TEXT into var.accel_flags * when it wants to turn the acceleration engine on. This is * really a separate operation, and should be modified via sysfs. * But for now, we leave it broken with the following define */ #define STUPID_ACCELF_TEXT_SHIT #define FB_LEFT_POS(p, bpp) (fb_be_math(p) ? (32 - (bpp)) : 0) #define FB_SHIFT_HIGH(p, val, bits) (fb_be_math(p) ? (val) >> (bits) : \ (val) << (bits)) #define FB_SHIFT_LOW(p, val, bits) (fb_be_math(p) ? (val) << (bits) : \ (val) >> (bits)) /* * `Generic' versions of the frame buffer device operations */ extern int fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var); extern int fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var); extern int fb_blank(struct fb_info *info, int blank); /* * Helpers for framebuffers in I/O memory */ extern void cfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect); extern void cfb_copyarea(struct fb_info *info, const struct fb_copyarea *area); extern void cfb_imageblit(struct fb_info *info, const struct fb_image *image); extern ssize_t fb_io_read(struct fb_info *info, char __user *buf, size_t count, loff_t *ppos); extern ssize_t fb_io_write(struct fb_info *info, const char __user *buf, size_t count, loff_t *ppos); int fb_io_mmap(struct fb_info *info, struct vm_area_struct *vma); #define __FB_DEFAULT_IOMEM_OPS_RDWR \ .fb_read = fb_io_read, \ .fb_write = fb_io_write #define __FB_DEFAULT_IOMEM_OPS_DRAW \ .fb_fillrect = cfb_fillrect, \ .fb_copyarea = cfb_copyarea, \ .fb_imageblit = cfb_imageblit #define __FB_DEFAULT_IOMEM_OPS_MMAP \ .fb_mmap = fb_io_mmap #define FB_DEFAULT_IOMEM_OPS \ __FB_DEFAULT_IOMEM_OPS_RDWR, \ __FB_DEFAULT_IOMEM_OPS_DRAW, \ __FB_DEFAULT_IOMEM_OPS_MMAP /* * Helpers for framebuffers in system memory */ extern void sys_fillrect(struct fb_info *info, const struct fb_fillrect *rect); extern void sys_copyarea(struct fb_info *info, const struct fb_copyarea *area); extern void sys_imageblit(struct fb_info *info, const struct fb_image *image); extern ssize_t fb_sys_read(struct fb_info *info, char __user *buf, size_t count, loff_t *ppos); extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, size_t count, loff_t *ppos); #define __FB_DEFAULT_SYSMEM_OPS_RDWR \ .fb_read = fb_sys_read, \ .fb_write = fb_sys_write #define __FB_DEFAULT_SYSMEM_OPS_DRAW \ .fb_fillrect = sys_fillrect, \ .fb_copyarea = sys_copyarea, \ .fb_imageblit = sys_imageblit /* * Helpers for framebuffers in DMA-able memory */ #define __FB_DEFAULT_DMAMEM_OPS_RDWR \ .fb_read = fb_sys_read, \ .fb_write = fb_sys_write #define __FB_DEFAULT_DMAMEM_OPS_DRAW \ .fb_fillrect = sys_fillrect, \ .fb_copyarea = sys_copyarea, \ .fb_imageblit = sys_imageblit /* fbmem.c */ extern int register_framebuffer(struct fb_info *fb_info); extern void unregister_framebuffer(struct fb_info *fb_info); extern int devm_register_framebuffer(struct device *dev, struct fb_info *fb_info); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height, u32 shift_high, u32 shift_low, u32 mod); extern void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height); extern void fb_set_suspend(struct fb_info *info, int state); extern int fb_get_color_depth(struct fb_var_screeninfo *var, struct fb_fix_screeninfo *fix); extern int fb_get_options(const char *name, char **option); extern int fb_new_modelist(struct fb_info *info); static inline void lock_fb_info(struct fb_info *info) { mutex_lock(&info->lock); } static inline void unlock_fb_info(struct fb_info *info) { mutex_unlock(&info->lock); } static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height) { u32 i, j; d_pitch -= s_pitch; for (i = height; i--; ) { /* s_pitch is a few bytes at the most, memcpy is suboptimal */ for (j = 0; j < s_pitch; j++) *dst++ = *src++; dst += d_pitch; } } /* fb_defio.c */ int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma); extern int fb_deferred_io_init(struct fb_info *info); extern void fb_deferred_io_open(struct fb_info *info, struct inode *inode, struct file *file); extern void fb_deferred_io_release(struct fb_info *info); extern void fb_deferred_io_cleanup(struct fb_info *info); extern int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync); /* * Generate callbacks for deferred I/O */ #define __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, __mode) \ static ssize_t __prefix ## _defio_read(struct fb_info *info, char __user *buf, \ size_t count, loff_t *ppos) \ { \ return fb_ ## __mode ## _read(info, buf, count, ppos); \ } \ static ssize_t __prefix ## _defio_write(struct fb_info *info, const char __user *buf, \ size_t count, loff_t *ppos) \ { \ unsigned long offset = *ppos; \ ssize_t ret = fb_ ## __mode ## _write(info, buf, count, ppos); \ if (ret > 0) \ __damage_range(info, offset, ret); \ return ret; \ } #define __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, __mode) \ static void __prefix ## _defio_fillrect(struct fb_info *info, \ const struct fb_fillrect *rect) \ { \ __mode ## _fillrect(info, rect); \ __damage_area(info, rect->dx, rect->dy, rect->width, rect->height); \ } \ static void __prefix ## _defio_copyarea(struct fb_info *info, \ const struct fb_copyarea *area) \ { \ __mode ## _copyarea(info, area); \ __damage_area(info, area->dx, area->dy, area->width, area->height); \ } \ static void __prefix ## _defio_imageblit(struct fb_info *info, \ const struct fb_image *image) \ { \ __mode ## _imageblit(info, image); \ __damage_area(info, image->dx, image->dy, image->width, image->height); \ } #define FB_GEN_DEFAULT_DEFERRED_IOMEM_OPS(__prefix, __damage_range, __damage_area) \ __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, io) \ __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, cfb) #define FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(__prefix, __damage_range, __damage_area) \ __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, sys) \ __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, sys) #define FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(__prefix, __damage_range, __damage_area) \ __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, sys) \ __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, sys) /* * Initializes struct fb_ops for deferred I/O. */ #define __FB_DEFAULT_DEFERRED_OPS_RDWR(__prefix) \ .fb_read = __prefix ## _defio_read, \ .fb_write = __prefix ## _defio_write #define __FB_DEFAULT_DEFERRED_OPS_DRAW(__prefix) \ .fb_fillrect = __prefix ## _defio_fillrect, \ .fb_copyarea = __prefix ## _defio_copyarea, \ .fb_imageblit = __prefix ## _defio_imageblit #define __FB_DEFAULT_DEFERRED_OPS_MMAP(__prefix) \ .fb_mmap = fb_deferred_io_mmap #define FB_DEFAULT_DEFERRED_OPS(__prefix) \ __FB_DEFAULT_DEFERRED_OPS_RDWR(__prefix), \ __FB_DEFAULT_DEFERRED_OPS_DRAW(__prefix), \ __FB_DEFAULT_DEFERRED_OPS_MMAP(__prefix) static inline bool fb_be_math(struct fb_info *info) { #ifdef CONFIG_FB_FOREIGN_ENDIAN #if defined(CONFIG_FB_BOTH_ENDIAN) return info->flags & FBINFO_BE_MATH; #elif defined(CONFIG_FB_BIG_ENDIAN) return true; #elif defined(CONFIG_FB_LITTLE_ENDIAN) return false; #endif /* CONFIG_FB_BOTH_ENDIAN */ #else #ifdef __BIG_ENDIAN return true; #else return false; #endif /* __BIG_ENDIAN */ #endif /* CONFIG_FB_FOREIGN_ENDIAN */ } extern struct fb_info *framebuffer_alloc(size_t size, struct device *dev); extern void framebuffer_release(struct fb_info *info); extern void fb_bl_default_curve(struct fb_info *fb_info, u8 off, u8 min, u8 max); #if IS_ENABLED(CONFIG_FB_BACKLIGHT) struct backlight_device *fb_bl_device(struct fb_info *info); void fb_bl_notify_blank(struct fb_info *info, int old_blank); #else static inline struct backlight_device *fb_bl_device(struct fb_info *info) { return NULL; } static inline void fb_bl_notify_blank(struct fb_info *info, int old_blank) { } #endif static inline struct lcd_device *fb_lcd_device(struct fb_info *info) { return info->lcd_dev; } /* fbmon.c */ #define FB_MAXTIMINGS 0 #define FB_VSYNCTIMINGS 1 #define FB_HSYNCTIMINGS 2 #define FB_DCLKTIMINGS 3 #define FB_IGNOREMON 0x100 #define FB_MODE_IS_UNKNOWN 0 #define FB_MODE_IS_DETAILED 1 #define FB_MODE_IS_STANDARD 2 #define FB_MODE_IS_VESA 4 #define FB_MODE_IS_CALCULATED 8 #define FB_MODE_IS_FIRST 16 #define FB_MODE_IS_FROM_VAR 32 extern int fbmon_dpms(const struct fb_info *fb_info); extern int fb_get_mode(int flags, u32 val, struct fb_var_screeninfo *var, struct fb_info *info); extern int fb_validate_mode(const struct fb_var_screeninfo *var, struct fb_info *info); extern int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var); extern const unsigned char *fb_firmware_edid(struct device *device); extern void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs); extern void fb_destroy_modedb(struct fb_videomode *modedb); extern int fb_find_mode_cvt(struct fb_videomode *mode, int margins, int rb); extern unsigned char *fb_ddc_read(struct i2c_adapter *adapter); extern int of_get_fb_videomode(struct device_node *np, struct fb_videomode *fb, int index); extern int fb_videomode_from_videomode(const struct videomode *vm, struct fb_videomode *fbmode); /* modedb.c */ #define VESA_MODEDB_SIZE 43 #define DMT_SIZE 0x50 extern void fb_var_to_videomode(struct fb_videomode *mode, const struct fb_var_screeninfo *var); extern void fb_videomode_to_var(struct fb_var_screeninfo *var, const struct fb_videomode *mode); extern int fb_mode_is_equal(const struct fb_videomode *mode1, const struct fb_videomode *mode2); extern int fb_add_videomode(const struct fb_videomode *mode, struct list_head *head); extern void fb_delete_videomode(const struct fb_videomode *mode, struct list_head *head); extern const struct fb_videomode *fb_match_mode(const struct fb_var_screeninfo *var, struct list_head *head); extern const struct fb_videomode *fb_find_best_mode(const struct fb_var_screeninfo *var, struct list_head *head); extern const struct fb_videomode *fb_find_nearest_mode(const struct fb_videomode *mode, struct list_head *head); extern void fb_destroy_modelist(struct list_head *head); extern void fb_videomode_to_modelist(const struct fb_videomode *modedb, int num, struct list_head *head); extern const struct fb_videomode *fb_find_best_display(const struct fb_monspecs *specs, struct list_head *head); /* fbcmap.c */ extern int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp); extern int fb_alloc_cmap_gfp(struct fb_cmap *cmap, int len, int transp, gfp_t flags); extern void fb_dealloc_cmap(struct fb_cmap *cmap); extern int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to); extern int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to); extern int fb_set_cmap(struct fb_cmap *cmap, struct fb_info *fb_info); extern int fb_set_user_cmap(struct fb_cmap_user *cmap, struct fb_info *fb_info); extern const struct fb_cmap *fb_default_cmap(int len); extern void fb_invert_cmaps(void); struct fb_videomode { const char *name; /* optional */ u32 refresh; /* optional */ u32 xres; u32 yres; u32 pixclock; u32 left_margin; u32 right_margin; u32 upper_margin; u32 lower_margin; u32 hsync_len; u32 vsync_len; u32 sync; u32 vmode; u32 flag; }; struct dmt_videomode { u32 dmt_id; u32 std_2byte_code; u32 cvt_3byte_code; const struct fb_videomode *mode; }; extern const struct fb_videomode vesa_modes[]; extern const struct dmt_videomode dmt_modes[]; struct fb_modelist { struct list_head list; struct fb_videomode mode; }; extern int fb_find_mode(struct fb_var_screeninfo *var, struct fb_info *info, const char *mode_option, const struct fb_videomode *db, unsigned int dbsize, const struct fb_videomode *default_mode, unsigned int default_bpp); bool fb_modesetting_disabled(const char *drvname); /* * Convenience logging macros */ #define fb_err(fb_info, fmt, ...) \ pr_err("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) #define fb_notice(info, fmt, ...) \ pr_notice("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) #define fb_warn(fb_info, fmt, ...) \ pr_warn("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) #define fb_info(fb_info, fmt, ...) \ pr_info("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) #define fb_dbg(fb_info, fmt, ...) \ pr_debug("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) #define fb_warn_once(fb_info, fmt, ...) \ pr_warn_once("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) #define fb_WARN_ONCE(fb_info, condition, fmt, ...) \ WARN_ONCE(condition, "fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) #define fb_WARN_ON_ONCE(fb_info, x) \ fb_WARN_ONCE(fb_info, (x), "%s", "fb_WARN_ON_ONCE(" __stringify(x) ")") #endif /* _LINUX_FB_H */
1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 // SPDX-License-Identifier: GPL-2.0-or-later /** -*- linux-c -*- *********************************************************** * Linux PPP over Ethernet (PPPoX/PPPoE) Sockets * * PPPoX --- Generic PPP encapsulation socket family * PPPoE --- PPP over Ethernet (RFC 2516) * * Version: 0.7.0 * * 070228 : Fix to allow multiple sessions with same remote MAC and same * session id by including the local device ifindex in the * tuple identifying a session. This also ensures packets can't * be injected into a session from interfaces other than the one * specified by userspace. Florian Zumbiehl <florz@florz.de> * (Oh, BTW, this one is YYMMDD, in case you were wondering ...) * 220102 : Fix module use count on failure in pppoe_create, pppox_sk -acme * 030700 : Fixed connect logic to allow for disconnect. * 270700 : Fixed potential SMP problems; we must protect against * simultaneous invocation of ppp_input * and ppp_unregister_channel. * 040800 : Respect reference count mechanisms on net-devices. * 200800 : fix kfree(skb) in pppoe_rcv (acme) * Module reference count is decremented in the right spot now, * guards against sock_put not actually freeing the sk * in pppoe_release. * 051000 : Initialization cleanup. * 111100 : Fix recvmsg. * 050101 : Fix PADT processing. * 140501 : Use pppoe_rcv_core to handle all backlog. (Alexey) * 170701 : Do not lock_sock with rwlock held. (DaveM) * Ignore discovery frames if user has socket * locked. (DaveM) * Ignore return value of dev_queue_xmit in __pppoe_xmit * or else we may kfree an SKB twice. (DaveM) * 190701 : When doing copies of skb's in __pppoe_xmit, always delete * the original skb that was passed in on success, never on * failure. Delete the copy of the skb on failure to avoid * a memory leak. * 081001 : Misc. cleanup (licence string, non-blocking, prevent * reference of device on close). * 121301 : New ppp channels interface; cannot unregister a channel * from interrupts. Thus, we mark the socket as a ZOMBIE * and do the unregistration later. * 081002 : seq_file support for proc stuff -acme * 111602 : Merge all 2.4 fixes into 2.5/2.6 tree. Label 2.5/2.6 * as version 0.7. Spacing cleanup. * Author: Michal Ostrowski <mostrows@speakeasy.net> * Contributors: * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * David S. Miller (davem@redhat.com) * * License: */ #include <linux/string.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/net.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/if_ether.h> #include <linux/if_pppox.h> #include <linux/ppp_channel.h> #include <linux/ppp_defs.h> #include <linux/ppp-ioctl.h> #include <linux/notifier.h> #include <linux/file.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/nsproxy.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> #include <linux/uaccess.h> #define PPPOE_HASH_BITS CONFIG_PPPOE_HASH_BITS #define PPPOE_HASH_SIZE (1 << PPPOE_HASH_BITS) #define PPPOE_HASH_MASK (PPPOE_HASH_SIZE - 1) static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb); static const struct proto_ops pppoe_ops; static const struct ppp_channel_ops pppoe_chan_ops; /* per-net private data for this module */ static unsigned int pppoe_net_id __read_mostly; struct pppoe_net { /* * we could use _single_ hash table for all * nets by injecting net id into the hash but * it would increase hash chains and add * a few additional math comparisons messy * as well, moreover in case of SMP less locking * controversy here */ struct pppox_sock *hash_table[PPPOE_HASH_SIZE]; rwlock_t hash_lock; }; /* * PPPoE could be in the following stages: * 1) Discovery stage (to obtain remote MAC and Session ID) * 2) Session stage (MAC and SID are known) * * Ethernet frames have a special tag for this but * we use simpler approach based on session id */ static inline bool stage_session(__be16 sid) { return sid != 0; } static inline struct pppoe_net *pppoe_pernet(struct net *net) { return net_generic(net, pppoe_net_id); } static inline int cmp_2_addr(struct pppoe_addr *a, struct pppoe_addr *b) { return a->sid == b->sid && ether_addr_equal(a->remote, b->remote); } static inline int cmp_addr(struct pppoe_addr *a, __be16 sid, char *addr) { return a->sid == sid && ether_addr_equal(a->remote, addr); } #if 8 % PPPOE_HASH_BITS #error 8 must be a multiple of PPPOE_HASH_BITS #endif static int hash_item(__be16 sid, unsigned char *addr) { unsigned char hash = 0; unsigned int i; for (i = 0; i < ETH_ALEN; i++) hash ^= addr[i]; for (i = 0; i < sizeof(sid_t) * 8; i += 8) hash ^= (__force __u32)sid >> i; for (i = 8; (i >>= 1) >= PPPOE_HASH_BITS;) hash ^= hash >> i; return hash & PPPOE_HASH_MASK; } /********************************************************************** * * Set/get/delete/rehash items (internal versions) * **********************************************************************/ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid, unsigned char *addr, int ifindex) { int hash = hash_item(sid, addr); struct pppox_sock *ret; ret = pn->hash_table[hash]; while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) return ret; ret = ret->next; } return NULL; } static int __set_item(struct pppoe_net *pn, struct pppox_sock *po) { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); struct pppox_sock *ret; ret = pn->hash_table[hash]; while (ret) { if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) && ret->pppoe_ifindex == po->pppoe_ifindex) return -EALREADY; ret = ret->next; } po->next = pn->hash_table[hash]; pn->hash_table[hash] = po; return 0; } static void __delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { int hash = hash_item(sid, addr); struct pppox_sock *ret, **src; ret = pn->hash_table[hash]; src = &pn->hash_table[hash]; while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) { *src = ret->next; break; } src = &ret->next; ret = ret->next; } } /********************************************************************** * * Set/get/delete/rehash items * **********************************************************************/ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid, unsigned char *addr, int ifindex) { struct pppox_sock *po; read_lock_bh(&pn->hash_lock); po = __get_item(pn, sid, addr, ifindex); if (po) sock_hold(sk_pppox(po)); read_unlock_bh(&pn->hash_lock); return po; } static inline struct pppox_sock *get_item_by_addr(struct net *net, struct sockaddr_pppox *sp) { struct net_device *dev; struct pppoe_net *pn; struct pppox_sock *pppox_sock = NULL; int ifindex; rcu_read_lock(); dev = dev_get_by_name_rcu(net, sp->sa_addr.pppoe.dev); if (dev) { ifindex = dev->ifindex; pn = pppoe_pernet(net); pppox_sock = get_item(pn, sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote, ifindex); } rcu_read_unlock(); return pppox_sock; } static inline void delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { write_lock_bh(&pn->hash_lock); __delete_item(pn, sid, addr, ifindex); write_unlock_bh(&pn->hash_lock); } /*************************************************************************** * * Handler for device events. * Certain device events require that sockets be unconnected. * **************************************************************************/ static void pppoe_flush_dev(struct net_device *dev) { struct pppoe_net *pn; int i; pn = pppoe_pernet(dev_net(dev)); write_lock_bh(&pn->hash_lock); for (i = 0; i < PPPOE_HASH_SIZE; i++) { struct pppox_sock *po = pn->hash_table[i]; struct sock *sk; while (po) { while (po && po->pppoe_dev != dev) { po = po->next; } if (!po) break; sk = sk_pppox(po); /* We always grab the socket lock, followed by the * hash_lock, in that order. Since we should hold the * sock lock while doing any unbinding, we need to * release the lock we're holding. Hold a reference to * the sock so it doesn't disappear as we're jumping * between locks. */ sock_hold(sk); write_unlock_bh(&pn->hash_lock); lock_sock(sk); if (po->pppoe_dev == dev && sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { pppox_unbind_sock(sk); sk->sk_state_change(sk); po->pppoe_dev = NULL; dev_put(dev); } release_sock(sk); sock_put(sk); /* Restart the process from the start of the current * hash chain. We dropped locks so the world may have * change from underneath us. */ BUG_ON(pppoe_pernet(dev_net(dev)) == NULL); write_lock_bh(&pn->hash_lock); po = pn->hash_table[i]; } } write_unlock_bh(&pn->hash_lock); } static int pppoe_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* Only look at sockets that are using this specific device. */ switch (event) { case NETDEV_CHANGEADDR: case NETDEV_CHANGEMTU: /* A change in mtu or address is a bad thing, requiring * LCP re-negotiation. */ case NETDEV_GOING_DOWN: case NETDEV_DOWN: /* Find every socket on this device and kill it. */ pppoe_flush_dev(dev); break; default: break; } return NOTIFY_DONE; } static struct notifier_block pppoe_notifier = { .notifier_call = pppoe_device_event, }; /************************************************************************ * * Do the real work of receiving a PPPoE Session frame. * ***********************************************************************/ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) { struct pppox_sock *po = pppox_sk(sk); struct pppox_sock *relay_po; /* Backlog receive. Semantics of backlog rcv preclude any code from * executing in lock_sock()/release_sock() bounds; meaning sk->sk_state * can't change. */ if (skb->pkt_type == PACKET_OTHERHOST) goto abort_kfree; if (sk->sk_state & PPPOX_BOUND) { ppp_input(&po->chan, skb); } else if (sk->sk_state & PPPOX_RELAY) { relay_po = get_item_by_addr(sock_net(sk), &po->pppoe_relay); if (relay_po == NULL) goto abort_kfree; if ((sk_pppox(relay_po)->sk_state & PPPOX_CONNECTED) == 0) goto abort_put; if (!__pppoe_xmit(sk_pppox(relay_po), skb)) goto abort_put; sock_put(sk_pppox(relay_po)); } else { if (sock_queue_rcv_skb(sk, skb)) goto abort_kfree; } return NET_RX_SUCCESS; abort_put: sock_put(sk_pppox(relay_po)); abort_kfree: kfree_skb(skb); return NET_RX_DROP; } /************************************************************************ * * Receive wrapper called in BH context. * ***********************************************************************/ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct pppoe_hdr *ph; struct pppox_sock *po; struct pppoe_net *pn; int len; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto out; if (skb_mac_header_len(skb) < ETH_HLEN) goto drop; if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto drop; ph = pppoe_hdr(skb); len = ntohs(ph->length); skb_pull_rcsum(skb, sizeof(*ph)); if (skb->len < len) goto drop; if (pskb_trim_rcsum(skb, len)) goto drop; ph = pppoe_hdr(skb); pn = pppoe_pernet(dev_net(dev)); /* Note that get_item does a sock_hold(), so sk_pppox(po) * is known to be safe. */ po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (!po) goto drop; return sk_receive_skb(sk_pppox(po), skb, 0); drop: kfree_skb(skb); out: return NET_RX_DROP; } static void pppoe_unbind_sock_work(struct work_struct *work) { struct pppox_sock *po = container_of(work, struct pppox_sock, proto.pppoe.padt_work); struct sock *sk = sk_pppox(po); lock_sock(sk); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } pppox_unbind_sock(sk); release_sock(sk); sock_put(sk); } /************************************************************************ * * Receive a PPPoE Discovery frame. * This is solely for detection of PADT frames * ***********************************************************************/ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct pppoe_hdr *ph; struct pppox_sock *po; struct pppoe_net *pn; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto out; if (skb->pkt_type != PACKET_HOST) goto abort; if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto abort; ph = pppoe_hdr(skb); if (ph->code != PADT_CODE) goto abort; pn = pppoe_pernet(dev_net(dev)); po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (po) if (!schedule_work(&po->proto.pppoe.padt_work)) sock_put(sk_pppox(po)); abort: kfree_skb(skb); out: return NET_RX_SUCCESS; /* Lies... :-) */ } static struct packet_type pppoes_ptype __read_mostly = { .type = cpu_to_be16(ETH_P_PPP_SES), .func = pppoe_rcv, }; static struct packet_type pppoed_ptype __read_mostly = { .type = cpu_to_be16(ETH_P_PPP_DISC), .func = pppoe_disc_rcv, }; static struct proto pppoe_sk_proto __read_mostly = { .name = "PPPOE", .owner = THIS_MODULE, .obj_size = sizeof(struct pppox_sock), }; /*********************************************************************** * * Initialize a new struct sock. * **********************************************************************/ static int pppoe_create(struct net *net, struct socket *sock, int kern) { struct sock *sk; sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, kern); if (!sk) return -ENOMEM; sock_init_data(sock, sk); sock->state = SS_UNCONNECTED; sock->ops = &pppoe_ops; sk->sk_backlog_rcv = pppoe_rcv_core; sk->sk_state = PPPOX_NONE; sk->sk_type = SOCK_STREAM; sk->sk_family = PF_PPPOX; sk->sk_protocol = PX_PROTO_OE; INIT_WORK(&pppox_sk(sk)->proto.pppoe.padt_work, pppoe_unbind_sock_work); return 0; } static int pppoe_release(struct socket *sock) { struct sock *sk = sock->sk; struct pppox_sock *po; struct pppoe_net *pn; struct net *net = NULL; if (!sk) return 0; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD)) { release_sock(sk); return -EBADF; } po = pppox_sk(sk); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } pppox_unbind_sock(sk); /* Signal the death of the socket. */ sk->sk_state = PPPOX_DEAD; net = sock_net(sk); pn = pppoe_pernet(net); /* * protect "po" from concurrent updates * on pppoe_flush_dev */ delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); sock_orphan(sk); sock->sk = NULL; skb_queue_purge(&sk->sk_receive_queue); release_sock(sk); sock_put(sk); return 0; } static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, int sockaddr_len, int flags) { struct sock *sk = sock->sk; struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr; struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = NULL; struct pppoe_net *pn; struct net *net = NULL; int error; lock_sock(sk); error = -EINVAL; if (sockaddr_len != sizeof(struct sockaddr_pppox)) goto end; if (sp->sa_protocol != PX_PROTO_OE) goto end; /* Check for already bound sockets */ error = -EBUSY; if ((sk->sk_state & PPPOX_CONNECTED) && stage_session(sp->sa_addr.pppoe.sid)) goto end; /* Check for already disconnected sockets, on attempts to disconnect */ error = -EALREADY; if ((sk->sk_state & PPPOX_DEAD) && !stage_session(sp->sa_addr.pppoe.sid)) goto end; error = 0; /* Delete the old binding */ if (stage_session(po->pppoe_pa.sid)) { pppox_unbind_sock(sk); pn = pppoe_pernet(sock_net(sk)); delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } po->pppoe_ifindex = 0; memset(&po->pppoe_pa, 0, sizeof(po->pppoe_pa)); memset(&po->pppoe_relay, 0, sizeof(po->pppoe_relay)); memset(&po->chan, 0, sizeof(po->chan)); po->next = NULL; po->num = 0; sk->sk_state = PPPOX_NONE; } /* Re-bind in session stage only */ if (stage_session(sp->sa_addr.pppoe.sid)) { error = -ENODEV; net = sock_net(sk); dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev); if (!dev) goto err_put; po->pppoe_dev = dev; po->pppoe_ifindex = dev->ifindex; pn = pppoe_pernet(net); if (!(dev->flags & IFF_UP)) { goto err_put; } memcpy(&po->pppoe_pa, &sp->sa_addr.pppoe, sizeof(struct pppoe_addr)); write_lock_bh(&pn->hash_lock); error = __set_item(pn, po); write_unlock_bh(&pn->hash_lock); if (error < 0) goto err_put; po->chan.hdrlen = (sizeof(struct pppoe_hdr) + dev->hard_header_len); po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr) - 2; po->chan.private = sk; po->chan.ops = &pppoe_chan_ops; po->chan.direct_xmit = true; error = ppp_register_net_channel(dev_net(dev), &po->chan); if (error) { delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); goto err_put; } sk->sk_state = PPPOX_CONNECTED; } po->num = sp->sa_addr.pppoe.sid; end: release_sock(sk); return error; err_put: if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } goto end; } static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { int len = sizeof(struct sockaddr_pppox); struct sockaddr_pppox sp; sp.sa_family = AF_PPPOX; sp.sa_protocol = PX_PROTO_OE; memcpy(&sp.sa_addr.pppoe, &pppox_sk(sock->sk)->pppoe_pa, sizeof(struct pppoe_addr)); memcpy(uaddr, &sp, len); return len; } static int pppoe_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); int val; int err; switch (cmd) { case PPPIOCGMRU: err = -ENXIO; if (!(sk->sk_state & PPPOX_CONNECTED)) break; err = -EFAULT; if (put_user(po->pppoe_dev->mtu - sizeof(struct pppoe_hdr) - PPP_HDRLEN, (int __user *)arg)) break; err = 0; break; case PPPIOCSMRU: err = -ENXIO; if (!(sk->sk_state & PPPOX_CONNECTED)) break; err = -EFAULT; if (get_user(val, (int __user *)arg)) break; if (val < (po->pppoe_dev->mtu - sizeof(struct pppoe_hdr) - PPP_HDRLEN)) err = 0; else err = -EINVAL; break; case PPPIOCSFLAGS: err = -EFAULT; if (get_user(val, (int __user *)arg)) break; err = 0; break; case PPPOEIOCSFWD: { struct pppox_sock *relay_po; err = -EBUSY; if (sk->sk_state & (PPPOX_BOUND | PPPOX_DEAD)) break; err = -ENOTCONN; if (!(sk->sk_state & PPPOX_CONNECTED)) break; /* PPPoE address from the user specifies an outbound PPPoE address which frames are forwarded to */ err = -EFAULT; if (copy_from_user(&po->pppoe_relay, (void __user *)arg, sizeof(struct sockaddr_pppox))) break; err = -EINVAL; if (po->pppoe_relay.sa_family != AF_PPPOX || po->pppoe_relay.sa_protocol != PX_PROTO_OE) break; /* Check that the socket referenced by the address actually exists. */ relay_po = get_item_by_addr(sock_net(sk), &po->pppoe_relay); if (!relay_po) break; sock_put(sk_pppox(relay_po)); sk->sk_state |= PPPOX_RELAY; err = 0; break; } case PPPOEIOCDFWD: err = -EALREADY; if (!(sk->sk_state & PPPOX_RELAY)) break; sk->sk_state &= ~PPPOX_RELAY; err = 0; break; default: err = -ENOTTY; } return err; } static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { struct sk_buff *skb; struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); int error; struct pppoe_hdr hdr; struct pppoe_hdr *ph; struct net_device *dev; char *start; int hlen; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { error = -ENOTCONN; goto end; } hdr.ver = 1; hdr.type = 1; hdr.code = 0; hdr.sid = po->num; dev = po->pppoe_dev; error = -EMSGSIZE; if (total_len > (dev->mtu + dev->hard_header_len)) goto end; hlen = LL_RESERVED_SPACE(dev); skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + dev->needed_tailroom, 0, GFP_KERNEL); if (!skb) { error = -ENOMEM; goto end; } /* Reserve space for headers. */ skb_reserve(skb, hlen); skb_reset_network_header(skb); skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->protocol = cpu_to_be16(ETH_P_PPP_SES); ph = skb_put(skb, total_len + sizeof(struct pppoe_hdr)); start = (char *)&ph->tag[0]; error = memcpy_from_msg(start, m, total_len); if (error < 0) { kfree_skb(skb); goto end; } error = total_len; dev_hard_header(skb, dev, ETH_P_PPP_SES, po->pppoe_pa.remote, NULL, total_len); memcpy(ph, &hdr, sizeof(struct pppoe_hdr)); ph->length = htons(total_len); dev_queue_xmit(skb); end: release_sock(sk); return error; } /************************************************************************ * * xmit function for internal use. * ***********************************************************************/ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) { struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = po->pppoe_dev; struct pppoe_hdr *ph; int data_len = skb->len; /* The higher-level PPP code (ppp_unregister_channel()) ensures the PPP * xmit operations conclude prior to an unregistration call. Thus * sk->sk_state cannot change, so we don't need to do lock_sock(). * But, we also can't do a lock_sock since that introduces a potential * deadlock as we'd reverse the lock ordering used when calling * ppp_unregister_channel(). */ if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) goto abort; if (!dev) goto abort; /* Copy the data if there is no space for the header or if it's * read-only. */ if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) goto abort; __skb_push(skb, sizeof(*ph)); skb_reset_network_header(skb); ph = pppoe_hdr(skb); ph->ver = 1; ph->type = 1; ph->code = 0; ph->sid = po->num; ph->length = htons(data_len); skb->protocol = cpu_to_be16(ETH_P_PPP_SES); skb->dev = dev; dev_hard_header(skb, dev, ETH_P_PPP_SES, po->pppoe_pa.remote, NULL, data_len); dev_queue_xmit(skb); return 1; abort: kfree_skb(skb); return 1; } /************************************************************************ * * xmit function called by generic PPP driver * sends PPP frame over PPPoE socket * ***********************************************************************/ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb) { struct sock *sk = chan->private; return __pppoe_xmit(sk, skb); } static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx, struct net_device_path *path, const struct ppp_channel *chan) { struct sock *sk = chan->private; struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = po->pppoe_dev; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED) || !dev) return -1; path->type = DEV_PATH_PPPOE; path->encap.proto = htons(ETH_P_PPP_SES); path->encap.id = be16_to_cpu(po->num); memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN); memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN); path->dev = ctx->dev; ctx->dev = dev; return 0; } static const struct ppp_channel_ops pppoe_chan_ops = { .start_xmit = pppoe_xmit, .fill_forward_path = pppoe_fill_forward_path, }; static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int error = 0; if (sk->sk_state & PPPOX_BOUND) return -EIO; skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; total_len = min_t(size_t, total_len, skb->len); error = skb_copy_datagram_msg(skb, 0, m, total_len); if (error == 0) { consume_skb(skb); return total_len; } kfree_skb(skb); return error; } #ifdef CONFIG_PROC_FS static int pppoe_seq_show(struct seq_file *seq, void *v) { struct pppox_sock *po; char *dev_name; if (v == SEQ_START_TOKEN) { seq_puts(seq, "Id Address Device\n"); goto out; } po = v; dev_name = po->pppoe_pa.dev; seq_printf(seq, "%08X %pM %8s\n", po->pppoe_pa.sid, po->pppoe_pa.remote, dev_name); out: return 0; } static inline struct pppox_sock *pppoe_get_idx(struct pppoe_net *pn, loff_t pos) { struct pppox_sock *po; int i; for (i = 0; i < PPPOE_HASH_SIZE; i++) { po = pn->hash_table[i]; while (po) { if (!pos--) goto out; po = po->next; } } out: return po; } static void *pppoe_seq_start(struct seq_file *seq, loff_t *pos) __acquires(pn->hash_lock) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); loff_t l = *pos; read_lock_bh(&pn->hash_lock); return l ? pppoe_get_idx(pn, --l) : SEQ_START_TOKEN; } static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); struct pppox_sock *po; ++*pos; if (v == SEQ_START_TOKEN) { po = pppoe_get_idx(pn, 0); goto out; } po = v; if (po->next) po = po->next; else { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); po = NULL; while (++hash < PPPOE_HASH_SIZE) { po = pn->hash_table[hash]; if (po) break; } } out: return po; } static void pppoe_seq_stop(struct seq_file *seq, void *v) __releases(pn->hash_lock) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); read_unlock_bh(&pn->hash_lock); } static const struct seq_operations pppoe_seq_ops = { .start = pppoe_seq_start, .next = pppoe_seq_next, .stop = pppoe_seq_stop, .show = pppoe_seq_show, }; #endif /* CONFIG_PROC_FS */ static const struct proto_ops pppoe_ops = { .family = AF_PPPOX, .owner = THIS_MODULE, .release = pppoe_release, .bind = sock_no_bind, .connect = pppoe_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pppoe_getname, .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .sendmsg = pppoe_sendmsg, .recvmsg = pppoe_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = pppox_compat_ioctl, #endif }; static const struct pppox_proto pppoe_proto = { .create = pppoe_create, .ioctl = pppoe_ioctl, .owner = THIS_MODULE, }; static __net_init int pppoe_init_net(struct net *net) { struct pppoe_net *pn = pppoe_pernet(net); struct proc_dir_entry *pde; rwlock_init(&pn->hash_lock); pde = proc_create_net("pppoe", 0444, net->proc_net, &pppoe_seq_ops, sizeof(struct seq_net_private)); #ifdef CONFIG_PROC_FS if (!pde) return -ENOMEM; #endif return 0; } static __net_exit void pppoe_exit_net(struct net *net) { remove_proc_entry("pppoe", net->proc_net); } static struct pernet_operations pppoe_net_ops = { .init = pppoe_init_net, .exit = pppoe_exit_net, .id = &pppoe_net_id, .size = sizeof(struct pppoe_net), }; static int __init pppoe_init(void) { int err; err = register_pernet_device(&pppoe_net_ops); if (err) goto out; err = proto_register(&pppoe_sk_proto, 0); if (err) goto out_unregister_net_ops; err = register_pppox_proto(PX_PROTO_OE, &pppoe_proto); if (err) goto out_unregister_pppoe_proto; dev_add_pack(&pppoes_ptype); dev_add_pack(&pppoed_ptype); register_netdevice_notifier(&pppoe_notifier); return 0; out_unregister_pppoe_proto: proto_unregister(&pppoe_sk_proto); out_unregister_net_ops: unregister_pernet_device(&pppoe_net_ops); out: return err; } static void __exit pppoe_exit(void) { unregister_netdevice_notifier(&pppoe_notifier); dev_remove_pack(&pppoed_ptype); dev_remove_pack(&pppoes_ptype); unregister_pppox_proto(PX_PROTO_OE); proto_unregister(&pppoe_sk_proto); unregister_pernet_device(&pppoe_net_ops); } module_init(pppoe_init); module_exit(pppoe_exit); MODULE_AUTHOR("Michal Ostrowski <mostrows@speakeasy.net>"); MODULE_DESCRIPTION("PPP over Ethernet driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OE);
7 49 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2002, 2004, 2005 Oracle. All rights reserved. */ #ifndef OCFS2_AOPS_H #define OCFS2_AOPS_H #include <linux/fs.h> int ocfs2_map_folio_blocks(struct folio *folio, u64 *p_blkno, struct inode *inode, unsigned int from, unsigned int to, int new); void ocfs2_unlock_and_free_folios(struct folio **folios, int num_folios); int walk_page_buffers( handle_t *handle, struct buffer_head *head, unsigned from, unsigned to, int *partial, int (*fn)( handle_t *handle, struct buffer_head *bh)); int ocfs2_write_end_nolock(struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, void *fsdata); typedef enum { OCFS2_WRITE_BUFFER = 0, OCFS2_WRITE_DIRECT, OCFS2_WRITE_MMAP, } ocfs2_write_type_t; int ocfs2_write_begin_nolock(struct address_space *mapping, loff_t pos, unsigned len, ocfs2_write_type_t type, struct folio **foliop, void **fsdata, struct buffer_head *di_bh, struct folio *mmap_folio); int ocfs2_read_inline_data(struct inode *inode, struct folio *folio, struct buffer_head *di_bh); int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size); int ocfs2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); /* all ocfs2_dio_end_io()'s fault */ #define ocfs2_iocb_is_rw_locked(iocb) \ test_bit(0, (unsigned long *)&iocb->private) static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level) { set_bit(0, (unsigned long *)&iocb->private); if (level) set_bit(1, (unsigned long *)&iocb->private); else clear_bit(1, (unsigned long *)&iocb->private); } /* * Using a named enum representing lock types in terms of #N bit stored in * iocb->private, which is going to be used for communication between * ocfs2_dio_end_io() and ocfs2_file_write/read_iter(). */ enum ocfs2_iocb_lock_bits { OCFS2_IOCB_RW_LOCK = 0, OCFS2_IOCB_RW_LOCK_LEVEL, OCFS2_IOCB_NUM_LOCKS }; #define ocfs2_iocb_init_rw_locked(iocb) \ (iocb->private = NULL) #define ocfs2_iocb_clear_rw_locked(iocb) \ clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private) #define ocfs2_iocb_rw_locked_level(iocb) \ test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private) #endif /* OCFS2_FILE_H */
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 // SPDX-License-Identifier: GPL-2.0-or-later /* * (c) 1999 Andreas Gal <gal@cs.uni-magdeburg.de> * (c) 2000-2001 Vojtech Pavlik <vojtech@ucw.cz> * (c) 2007-2009 Jiri Kosina * * HID debugging support */ /* * * Should you need to contact me, the author, you can do so either by * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail: * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/kfifo.h> #include <linux/sched/signal.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/poll.h> #include <linux/hid.h> #include <linux/hid-debug.h> static struct dentry *hid_debug_root; struct hid_usage_entry { unsigned page; unsigned usage; const char *description; }; static const struct hid_usage_entry hid_usage_table[] = { { 0x00, 0, "Undefined" }, { 0x01, 0, "GenericDesktop" }, { 0x01, 0x0001, "Pointer" }, { 0x01, 0x0002, "Mouse" }, { 0x01, 0x0004, "Joystick" }, { 0x01, 0x0005, "Gamepad" }, { 0x01, 0x0006, "Keyboard" }, { 0x01, 0x0007, "Keypad" }, { 0x01, 0x0008, "MultiaxisController" }, { 0x01, 0x0009, "TabletPCSystemControls" }, { 0x01, 0x000a, "WaterCoolingDevice" }, { 0x01, 0x000b, "ComputerChassisDevice" }, { 0x01, 0x000c, "WirelessRadioControls" }, { 0x01, 0x000d, "PortableDeviceControl" }, { 0x01, 0x000e, "SystemMultiAxisController" }, { 0x01, 0x000f, "SpatialController" }, { 0x01, 0x0010, "AssistiveControl" }, { 0x01, 0x0011, "DeviceDock" }, { 0x01, 0x0012, "DockableDevice" }, { 0x01, 0x0013, "CallStateManagementControl" }, { 0x01, 0x0030, "X" }, { 0x01, 0x0031, "Y" }, { 0x01, 0x0032, "Z" }, { 0x01, 0x0033, "Rx" }, { 0x01, 0x0034, "Ry" }, { 0x01, 0x0035, "Rz" }, { 0x01, 0x0036, "Slider" }, { 0x01, 0x0037, "Dial" }, { 0x01, 0x0038, "Wheel" }, { 0x01, 0x0039, "HatSwitch" }, { 0x01, 0x003a, "CountedBuffer" }, { 0x01, 0x003b, "ByteCount" }, { 0x01, 0x003c, "MotionWakeup" }, { 0x01, 0x003d, "Start" }, { 0x01, 0x003e, "Select" }, { 0x01, 0x0040, "Vx" }, { 0x01, 0x0041, "Vy" }, { 0x01, 0x0042, "Vz" }, { 0x01, 0x0043, "Vbrx" }, { 0x01, 0x0044, "Vbry" }, { 0x01, 0x0045, "Vbrz" }, { 0x01, 0x0046, "Vno" }, { 0x01, 0x0047, "FeatureNotification" }, { 0x01, 0x0048, "ResolutionMultiplier" }, { 0x01, 0x0049, "Qx" }, { 0x01, 0x004a, "Qy" }, { 0x01, 0x004b, "Qz" }, { 0x01, 0x004c, "Qw" }, { 0x01, 0x0080, "SystemControl" }, { 0x01, 0x0081, "SystemPowerDown" }, { 0x01, 0x0082, "SystemSleep" }, { 0x01, 0x0083, "SystemWakeUp" }, { 0x01, 0x0084, "SystemContextMenu" }, { 0x01, 0x0085, "SystemMainMenu" }, { 0x01, 0x0086, "SystemAppMenu" }, { 0x01, 0x0087, "SystemMenuHelp" }, { 0x01, 0x0088, "SystemMenuExit" }, { 0x01, 0x0089, "SystemMenuSelect" }, { 0x01, 0x008a, "SystemMenuRight" }, { 0x01, 0x008b, "SystemMenuLeft" }, { 0x01, 0x008c, "SystemMenuUp" }, { 0x01, 0x008d, "SystemMenuDown" }, { 0x01, 0x008e, "SystemColdRestart" }, { 0x01, 0x008f, "SystemWarmRestart" }, { 0x01, 0x0090, "DpadUp" }, { 0x01, 0x0091, "DpadDown" }, { 0x01, 0x0092, "DpadRight" }, { 0x01, 0x0093, "DpadLeft" }, { 0x01, 0x0094, "IndexTrigger" }, { 0x01, 0x0095, "PalmTrigger" }, { 0x01, 0x0096, "Thumbstick" }, { 0x01, 0x0097, "SystemFunctionShift" }, { 0x01, 0x0098, "SystemFunctionShiftLock" }, { 0x01, 0x0099, "SystemFunctionShiftLockIndicator" }, { 0x01, 0x009a, "SystemDismissNotification" }, { 0x01, 0x009b, "SystemDoNotDisturb" }, { 0x01, 0x00a0, "SystemDock" }, { 0x01, 0x00a1, "SystemUndock" }, { 0x01, 0x00a2, "SystemSetup" }, { 0x01, 0x00a3, "SystemBreak" }, { 0x01, 0x00a4, "SystemDebuggerBreak" }, { 0x01, 0x00a5, "ApplicationBreak" }, { 0x01, 0x00a6, "ApplicationDebuggerBreak" }, { 0x01, 0x00a7, "SystemSpeakerMute" }, { 0x01, 0x00a8, "SystemHibernate" }, { 0x01, 0x00a9, "SystemMicrophoneMute" }, { 0x01, 0x00b0, "SystemDisplayInvert" }, { 0x01, 0x00b1, "SystemDisplayInternal" }, { 0x01, 0x00b2, "SystemDisplayExternal" }, { 0x01, 0x00b3, "SystemDisplayBoth" }, { 0x01, 0x00b4, "SystemDisplayDual" }, { 0x01, 0x00b5, "SystemDisplayToggleIntExtMode" }, { 0x01, 0x00b6, "SystemDisplaySwapPrimarySecondary" }, { 0x01, 0x00b7, "SystemDisplayToggleLCDAutoscale" }, { 0x01, 0x00c0, "SensorZone" }, { 0x01, 0x00c1, "RPM" }, { 0x01, 0x00c2, "CoolantLevel" }, { 0x01, 0x00c3, "CoolantCriticalLevel" }, { 0x01, 0x00c4, "CoolantPump" }, { 0x01, 0x00c5, "ChassisEnclosure" }, { 0x01, 0x00c6, "WirelessRadioButton" }, { 0x01, 0x00c7, "WirelessRadioLED" }, { 0x01, 0x00c8, "WirelessRadioSliderSwitch" }, { 0x01, 0x00c9, "SystemDisplayRotationLockButton" }, { 0x01, 0x00ca, "SystemDisplayRotationLockSliderSwitch" }, { 0x01, 0x00cb, "ControlEnable" }, { 0x01, 0x00d0, "DockableDeviceUniqueID" }, { 0x01, 0x00d1, "DockableDeviceVendorID" }, { 0x01, 0x00d2, "DockableDevicePrimaryUsagePage" }, { 0x01, 0x00d3, "DockableDevicePrimaryUsageID" }, { 0x01, 0x00d4, "DockableDeviceDockingState" }, { 0x01, 0x00d5, "DockableDeviceDisplayOcclusion" }, { 0x01, 0x00d6, "DockableDeviceObjectType" }, { 0x01, 0x00e0, "CallActiveLED" }, { 0x01, 0x00e1, "CallMuteToggle" }, { 0x01, 0x00e2, "CallMuteLED" }, { 0x02, 0, "SimulationControls" }, { 0x02, 0x0001, "FlightSimulationDevice" }, { 0x02, 0x0002, "AutomobileSimulationDevice" }, { 0x02, 0x0003, "TankSimulationDevice" }, { 0x02, 0x0004, "SpaceshipSimulationDevice" }, { 0x02, 0x0005, "SubmarineSimulationDevice" }, { 0x02, 0x0006, "SailingSimulationDevice" }, { 0x02, 0x0007, "MotorcycleSimulationDevice" }, { 0x02, 0x0008, "SportsSimulationDevice" }, { 0x02, 0x0009, "AirplaneSimulationDevice" }, { 0x02, 0x000a, "HelicopterSimulationDevice" }, { 0x02, 0x000b, "MagicCarpetSimulationDevice" }, { 0x02, 0x000c, "BicycleSimulationDevice" }, { 0x02, 0x0020, "FlightControlStick" }, { 0x02, 0x0021, "FlightStick" }, { 0x02, 0x0022, "CyclicControl" }, { 0x02, 0x0023, "CyclicTrim" }, { 0x02, 0x0024, "FlightYoke" }, { 0x02, 0x0025, "TrackControl" }, { 0x02, 0x00b0, "Aileron" }, { 0x02, 0x00b1, "AileronTrim" }, { 0x02, 0x00b2, "AntiTorqueControl" }, { 0x02, 0x00b3, "AutopilotEnable" }, { 0x02, 0x00b4, "ChaffRelease" }, { 0x02, 0x00b5, "CollectiveControl" }, { 0x02, 0x00b6, "DiveBrake" }, { 0x02, 0x00b7, "ElectronicCountermeasures" }, { 0x02, 0x00b8, "Elevator" }, { 0x02, 0x00b9, "ElevatorTrim" }, { 0x02, 0x00ba, "Rudder" }, { 0x02, 0x00bb, "Throttle" }, { 0x02, 0x00bc, "FlightCommunications" }, { 0x02, 0x00bd, "FlareRelease" }, { 0x02, 0x00be, "LandingGear" }, { 0x02, 0x00bf, "ToeBrake" }, { 0x02, 0x00c0, "Trigger" }, { 0x02, 0x00c1, "WeaponsArm" }, { 0x02, 0x00c2, "WeaponsSelect" }, { 0x02, 0x00c3, "WingFlaps" }, { 0x02, 0x00c4, "Accelerator" }, { 0x02, 0x00c5, "Brake" }, { 0x02, 0x00c6, "Clutch" }, { 0x02, 0x00c7, "Shifter" }, { 0x02, 0x00c8, "Steering" }, { 0x02, 0x00c9, "TurretDirection" }, { 0x02, 0x00ca, "BarrelElevation" }, { 0x02, 0x00cb, "DivePlane" }, { 0x02, 0x00cc, "Ballast" }, { 0x02, 0x00cd, "BicycleCrank" }, { 0x02, 0x00ce, "HandleBars" }, { 0x02, 0x00cf, "FrontBrake" }, { 0x02, 0x00d0, "RearBrake" }, { 0x03, 0, "VRControls" }, { 0x03, 0x0001, "Belt" }, { 0x03, 0x0002, "BodySuit" }, { 0x03, 0x0003, "Flexor" }, { 0x03, 0x0004, "Glove" }, { 0x03, 0x0005, "HeadTracker" }, { 0x03, 0x0006, "HeadMountedDisplay" }, { 0x03, 0x0007, "HandTracker" }, { 0x03, 0x0008, "Oculometer" }, { 0x03, 0x0009, "Vest" }, { 0x03, 0x000a, "AnimatronicDevice" }, { 0x03, 0x0020, "StereoEnable" }, { 0x03, 0x0021, "DisplayEnable" }, { 0x04, 0, "SportControls" }, { 0x04, 0x0001, "BaseballBat" }, { 0x04, 0x0002, "GolfClub" }, { 0x04, 0x0003, "RowingMachine" }, { 0x04, 0x0004, "Treadmill" }, { 0x04, 0x0030, "Oar" }, { 0x04, 0x0031, "Slope" }, { 0x04, 0x0032, "Rate" }, { 0x04, 0x0033, "StickSpeed" }, { 0x04, 0x0034, "StickFaceAngle" }, { 0x04, 0x0035, "StickHeelToe" }, { 0x04, 0x0036, "StickFollowThrough" }, { 0x04, 0x0037, "StickTempo" }, { 0x04, 0x0038, "StickType" }, { 0x04, 0x0039, "StickHeight" }, { 0x04, 0x0050, "Putter" }, { 0x04, 0x0051, "1Iron" }, { 0x04, 0x0052, "2Iron" }, { 0x04, 0x0053, "3Iron" }, { 0x04, 0x0054, "4Iron" }, { 0x04, 0x0055, "5Iron" }, { 0x04, 0x0056, "6Iron" }, { 0x04, 0x0057, "7Iron" }, { 0x04, 0x0058, "8Iron" }, { 0x04, 0x0059, "9Iron" }, { 0x04, 0x005a, "10Iron" }, { 0x04, 0x005b, "11Iron" }, { 0x04, 0x005c, "SandWedge" }, { 0x04, 0x005d, "LoftWedge" }, { 0x04, 0x005e, "PowerWedge" }, { 0x04, 0x005f, "1Wood" }, { 0x04, 0x0060, "3Wood" }, { 0x04, 0x0061, "5Wood" }, { 0x04, 0x0062, "7Wood" }, { 0x04, 0x0063, "9Wood" }, { 0x05, 0, "GameControls" }, { 0x05, 0x0001, "3DGameController" }, { 0x05, 0x0002, "PinballDevice" }, { 0x05, 0x0003, "GunDevice" }, { 0x05, 0x0020, "PointofView" }, { 0x05, 0x0021, "TurnRightLeft" }, { 0x05, 0x0022, "PitchForwardBackward" }, { 0x05, 0x0023, "RollRightLeft" }, { 0x05, 0x0024, "MoveRightLeft" }, { 0x05, 0x0025, "MoveForwardBackward" }, { 0x05, 0x0026, "MoveUpDown" }, { 0x05, 0x0027, "LeanRightLeft" }, { 0x05, 0x0028, "LeanForwardBackward" }, { 0x05, 0x0029, "HeightofPOV" }, { 0x05, 0x002a, "Flipper" }, { 0x05, 0x002b, "SecondaryFlipper" }, { 0x05, 0x002c, "Bump" }, { 0x05, 0x002d, "NewGame" }, { 0x05, 0x002e, "ShootBall" }, { 0x05, 0x002f, "Player" }, { 0x05, 0x0030, "GunBolt" }, { 0x05, 0x0031, "GunClip" }, { 0x05, 0x0032, "GunSelector" }, { 0x05, 0x0033, "GunSingleShot" }, { 0x05, 0x0034, "GunBurst" }, { 0x05, 0x0035, "GunAutomatic" }, { 0x05, 0x0036, "GunSafety" }, { 0x05, 0x0037, "GamepadFireJump" }, { 0x05, 0x0039, "GamepadTrigger" }, { 0x05, 0x003a, "FormfittingGamepad" }, { 0x06, 0, "GenericDeviceControls" }, { 0x06, 0x0001, "BackgroundNonuserControls" }, { 0x06, 0x0020, "BatteryStrength" }, { 0x06, 0x0021, "WirelessChannel" }, { 0x06, 0x0022, "WirelessID" }, { 0x06, 0x0023, "DiscoverWirelessControl" }, { 0x06, 0x0024, "SecurityCodeCharacterEntered" }, { 0x06, 0x0025, "SecurityCodeCharacterErased" }, { 0x06, 0x0026, "SecurityCodeCleared" }, { 0x06, 0x0027, "SequenceID" }, { 0x06, 0x0028, "SequenceIDReset" }, { 0x06, 0x0029, "RFSignalStrength" }, { 0x06, 0x002a, "SoftwareVersion" }, { 0x06, 0x002b, "ProtocolVersion" }, { 0x06, 0x002c, "HardwareVersion" }, { 0x06, 0x002d, "Major" }, { 0x06, 0x002e, "Minor" }, { 0x06, 0x002f, "Revision" }, { 0x06, 0x0030, "Handedness" }, { 0x06, 0x0031, "EitherHand" }, { 0x06, 0x0032, "LeftHand" }, { 0x06, 0x0033, "RightHand" }, { 0x06, 0x0034, "BothHands" }, { 0x06, 0x0040, "GripPoseOffset" }, { 0x06, 0x0041, "PointerPoseOffset" }, { 0x07, 0, "KeyboardKeypad" }, { 0x07, 0x0001, "ErrorRollOver" }, { 0x07, 0x0002, "POSTFail" }, { 0x07, 0x0003, "ErrorUndefined" }, { 0x07, 0x0004, "KeyboardA" }, { 0x07, 0x0005, "KeyboardB" }, { 0x07, 0x0006, "KeyboardC" }, { 0x07, 0x0007, "KeyboardD" }, { 0x07, 0x0008, "KeyboardE" }, { 0x07, 0x0009, "KeyboardF" }, { 0x07, 0x000a, "KeyboardG" }, { 0x07, 0x000b, "KeyboardH" }, { 0x07, 0x000c, "KeyboardI" }, { 0x07, 0x000d, "KeyboardJ" }, { 0x07, 0x000e, "KeyboardK" }, { 0x07, 0x000f, "KeyboardL" }, { 0x07, 0x0010, "KeyboardM" }, { 0x07, 0x0011, "KeyboardN" }, { 0x07, 0x0012, "KeyboardO" }, { 0x07, 0x0013, "KeyboardP" }, { 0x07, 0x0014, "KeyboardQ" }, { 0x07, 0x0015, "KeyboardR" }, { 0x07, 0x0016, "KeyboardS" }, { 0x07, 0x0017, "KeyboardT" }, { 0x07, 0x0018, "KeyboardU" }, { 0x07, 0x0019, "KeyboardV" }, { 0x07, 0x001a, "KeyboardW" }, { 0x07, 0x001b, "KeyboardX" }, { 0x07, 0x001c, "KeyboardY" }, { 0x07, 0x001d, "KeyboardZ" }, { 0x07, 0x001e, "Keyboard1andBang" }, { 0x07, 0x001f, "Keyboard2andAt" }, { 0x07, 0x0020, "Keyboard3andHash" }, { 0x07, 0x0021, "Keyboard4andDollar" }, { 0x07, 0x0022, "Keyboard5andPercent" }, { 0x07, 0x0023, "Keyboard6andCaret" }, { 0x07, 0x0024, "Keyboard7andAmpersand" }, { 0x07, 0x0025, "Keyboard8andStar" }, { 0x07, 0x0026, "Keyboard9andLeftBracket" }, { 0x07, 0x0027, "Keyboard0andRightBracket" }, { 0x07, 0x0028, "KeyboardReturnEnter" }, { 0x07, 0x0029, "KeyboardEscape" }, { 0x07, 0x002a, "KeyboardDelete" }, { 0x07, 0x002b, "KeyboardTab" }, { 0x07, 0x002c, "KeyboardSpacebar" }, { 0x07, 0x002d, "KeyboardDashandUnderscore" }, { 0x07, 0x002e, "KeyboardEqualsandPlus" }, { 0x07, 0x002f, "KeyboardLeftBrace" }, { 0x07, 0x0030, "KeyboardRightBrace" }, { 0x07, 0x0031, "KeyboardBackslashandPipe" }, { 0x07, 0x0032, "KeyboardNonUSHashandTilde" }, { 0x07, 0x0033, "KeyboardSemiColonandColon" }, { 0x07, 0x0034, "KeyboardLeftAposandDouble" }, { 0x07, 0x0035, "KeyboardGraveAccentandTilde" }, { 0x07, 0x0036, "KeyboardCommaandLessThan" }, { 0x07, 0x0037, "KeyboardPeriodandGreaterThan" }, { 0x07, 0x0038, "KeyboardForwardSlashandQuestionMark" }, { 0x07, 0x0039, "KeyboardCapsLock" }, { 0x07, 0x003a, "KeyboardF1" }, { 0x07, 0x003b, "KeyboardF2" }, { 0x07, 0x003c, "KeyboardF3" }, { 0x07, 0x003d, "KeyboardF4" }, { 0x07, 0x003e, "KeyboardF5" }, { 0x07, 0x003f, "KeyboardF6" }, { 0x07, 0x0040, "KeyboardF7" }, { 0x07, 0x0041, "KeyboardF8" }, { 0x07, 0x0042, "KeyboardF9" }, { 0x07, 0x0043, "KeyboardF10" }, { 0x07, 0x0044, "KeyboardF11" }, { 0x07, 0x0045, "KeyboardF12" }, { 0x07, 0x0046, "KeyboardPrintScreen" }, { 0x07, 0x0047, "KeyboardScrollLock" }, { 0x07, 0x0048, "KeyboardPause" }, { 0x07, 0x0049, "KeyboardInsert" }, { 0x07, 0x004a, "KeyboardHome" }, { 0x07, 0x004b, "KeyboardPageUp" }, { 0x07, 0x004c, "KeyboardDeleteForward" }, { 0x07, 0x004d, "KeyboardEnd" }, { 0x07, 0x004e, "KeyboardPageDown" }, { 0x07, 0x004f, "KeyboardRightArrow" }, { 0x07, 0x0050, "KeyboardLeftArrow" }, { 0x07, 0x0051, "KeyboardDownArrow" }, { 0x07, 0x0052, "KeyboardUpArrow" }, { 0x07, 0x0053, "KeypadNumLockandClear" }, { 0x07, 0x0054, "KeypadForwardSlash" }, { 0x07, 0x0055, "KeypadStar" }, { 0x07, 0x0056, "KeypadDash" }, { 0x07, 0x0057, "KeypadPlus" }, { 0x07, 0x0058, "KeypadENTER" }, { 0x07, 0x0059, "Keypad1andEnd" }, { 0x07, 0x005a, "Keypad2andDownArrow" }, { 0x07, 0x005b, "Keypad3andPageDn" }, { 0x07, 0x005c, "Keypad4andLeftArrow" }, { 0x07, 0x005d, "Keypad5" }, { 0x07, 0x005e, "Keypad6andRightArrow" }, { 0x07, 0x005f, "Keypad7andHome" }, { 0x07, 0x0060, "Keypad8andUpArrow" }, { 0x07, 0x0061, "Keypad9andPageUp" }, { 0x07, 0x0062, "Keypad0andInsert" }, { 0x07, 0x0063, "KeypadPeriodandDelete" }, { 0x07, 0x0064, "KeyboardNonUSBackslashandPipe" }, { 0x07, 0x0065, "KeyboardApplication" }, { 0x07, 0x0066, "KeyboardPower" }, { 0x07, 0x0067, "KeypadEquals" }, { 0x07, 0x0068, "KeyboardF13" }, { 0x07, 0x0069, "KeyboardF14" }, { 0x07, 0x006a, "KeyboardF15" }, { 0x07, 0x006b, "KeyboardF16" }, { 0x07, 0x006c, "KeyboardF17" }, { 0x07, 0x006d, "KeyboardF18" }, { 0x07, 0x006e, "KeyboardF19" }, { 0x07, 0x006f, "KeyboardF20" }, { 0x07, 0x0070, "KeyboardF21" }, { 0x07, 0x0071, "KeyboardF22" }, { 0x07, 0x0072, "KeyboardF23" }, { 0x07, 0x0073, "KeyboardF24" }, { 0x07, 0x0074, "KeyboardExecute" }, { 0x07, 0x0075, "KeyboardHelp" }, { 0x07, 0x0076, "KeyboardMenu" }, { 0x07, 0x0077, "KeyboardSelect" }, { 0x07, 0x0078, "KeyboardStop" }, { 0x07, 0x0079, "KeyboardAgain" }, { 0x07, 0x007a, "KeyboardUndo" }, { 0x07, 0x007b, "KeyboardCut" }, { 0x07, 0x007c, "KeyboardCopy" }, { 0x07, 0x007d, "KeyboardPaste" }, { 0x07, 0x007e, "KeyboardFind" }, { 0x07, 0x007f, "KeyboardMute" }, { 0x07, 0x0080, "KeyboardVolumeUp" }, { 0x07, 0x0081, "KeyboardVolumeDown" }, { 0x07, 0x0082, "KeyboardLockingCapsLock" }, { 0x07, 0x0083, "KeyboardLockingNumLock" }, { 0x07, 0x0084, "KeyboardLockingScrollLock" }, { 0x07, 0x0085, "KeypadComma" }, { 0x07, 0x0086, "KeypadEqualSign" }, { 0x07, 0x0087, "KeyboardInternational1" }, { 0x07, 0x0088, "KeyboardInternational2" }, { 0x07, 0x0089, "KeyboardInternational3" }, { 0x07, 0x008a, "KeyboardInternational4" }, { 0x07, 0x008b, "KeyboardInternational5" }, { 0x07, 0x008c, "KeyboardInternational6" }, { 0x07, 0x008d, "KeyboardInternational7" }, { 0x07, 0x008e, "KeyboardInternational8" }, { 0x07, 0x008f, "KeyboardInternational9" }, { 0x07, 0x0090, "KeyboardLANG1" }, { 0x07, 0x0091, "KeyboardLANG2" }, { 0x07, 0x0092, "KeyboardLANG3" }, { 0x07, 0x0093, "KeyboardLANG4" }, { 0x07, 0x0094, "KeyboardLANG5" }, { 0x07, 0x0095, "KeyboardLANG6" }, { 0x07, 0x0096, "KeyboardLANG7" }, { 0x07, 0x0097, "KeyboardLANG8" }, { 0x07, 0x0098, "KeyboardLANG9" }, { 0x07, 0x0099, "KeyboardAlternateErase" }, { 0x07, 0x009a, "KeyboardSysReqAttention" }, { 0x07, 0x009b, "KeyboardCancel" }, { 0x07, 0x009c, "KeyboardClear" }, { 0x07, 0x009d, "KeyboardPrior" }, { 0x07, 0x009e, "KeyboardReturn" }, { 0x07, 0x009f, "KeyboardSeparator" }, { 0x07, 0x00a0, "KeyboardOut" }, { 0x07, 0x00a1, "KeyboardOper" }, { 0x07, 0x00a2, "KeyboardClearAgain" }, { 0x07, 0x00a3, "KeyboardCrSelProps" }, { 0x07, 0x00a4, "KeyboardExSel" }, { 0x07, 0x00b0, "KeypadDouble0" }, { 0x07, 0x00b1, "KeypadTriple0" }, { 0x07, 0x00b2, "ThousandsSeparator" }, { 0x07, 0x00b3, "DecimalSeparator" }, { 0x07, 0x00b4, "CurrencyUnit" }, { 0x07, 0x00b5, "CurrencySubunit" }, { 0x07, 0x00b6, "KeypadLeftBracket" }, { 0x07, 0x00b7, "KeypadRightBracket" }, { 0x07, 0x00b8, "KeypadLeftBrace" }, { 0x07, 0x00b9, "KeypadRightBrace" }, { 0x07, 0x00ba, "KeypadTab" }, { 0x07, 0x00bb, "KeypadBackspace" }, { 0x07, 0x00bc, "KeypadA" }, { 0x07, 0x00bd, "KeypadB" }, { 0x07, 0x00be, "KeypadC" }, { 0x07, 0x00bf, "KeypadD" }, { 0x07, 0x00c0, "KeypadE" }, { 0x07, 0x00c1, "KeypadF" }, { 0x07, 0x00c2, "KeypadXOR" }, { 0x07, 0x00c3, "KeypadCaret" }, { 0x07, 0x00c4, "KeypadPercentage" }, { 0x07, 0x00c5, "KeypadLess" }, { 0x07, 0x00c6, "KeypadGreater" }, { 0x07, 0x00c7, "KeypadAmpersand" }, { 0x07, 0x00c8, "KeypadDoubleAmpersand" }, { 0x07, 0x00c9, "KeypadBar" }, { 0x07, 0x00ca, "KeypadDoubleBar" }, { 0x07, 0x00cb, "KeypadColon" }, { 0x07, 0x00cc, "KeypadHash" }, { 0x07, 0x00cd, "KeypadSpace" }, { 0x07, 0x00ce, "KeypadAt" }, { 0x07, 0x00cf, "KeypadBang" }, { 0x07, 0x00d0, "KeypadMemoryStore" }, { 0x07, 0x00d1, "KeypadMemoryRecall" }, { 0x07, 0x00d2, "KeypadMemoryClear" }, { 0x07, 0x00d3, "KeypadMemoryAdd" }, { 0x07, 0x00d4, "KeypadMemorySubtract" }, { 0x07, 0x00d5, "KeypadMemoryMultiply" }, { 0x07, 0x00d6, "KeypadMemoryDivide" }, { 0x07, 0x00d7, "KeypadPlusMinus" }, { 0x07, 0x00d8, "KeypadClear" }, { 0x07, 0x00d9, "KeypadClearEntry" }, { 0x07, 0x00da, "KeypadBinary" }, { 0x07, 0x00db, "KeypadOctal" }, { 0x07, 0x00dc, "KeypadDecimal" }, { 0x07, 0x00dd, "KeypadHexadecimal" }, { 0x07, 0x00e0, "KeyboardLeftControl" }, { 0x07, 0x00e1, "KeyboardLeftShift" }, { 0x07, 0x00e2, "KeyboardLeftAlt" }, { 0x07, 0x00e3, "KeyboardLeftGUI" }, { 0x07, 0x00e4, "KeyboardRightControl" }, { 0x07, 0x00e5, "KeyboardRightShift" }, { 0x07, 0x00e6, "KeyboardRightAlt" }, { 0x07, 0x00e7, "KeyboardRightGUI" }, { 0x08, 0, "LED" }, { 0x08, 0x0001, "NumLock" }, { 0x08, 0x0002, "CapsLock" }, { 0x08, 0x0003, "ScrollLock" }, { 0x08, 0x0004, "Compose" }, { 0x08, 0x0005, "Kana" }, { 0x08, 0x0006, "Power" }, { 0x08, 0x0007, "Shift" }, { 0x08, 0x0008, "DoNotDisturb" }, { 0x08, 0x0009, "Mute" }, { 0x08, 0x000a, "ToneEnable" }, { 0x08, 0x000b, "HighCutFilter" }, { 0x08, 0x000c, "LowCutFilter" }, { 0x08, 0x000d, "EqualizerEnable" }, { 0x08, 0x000e, "SoundFieldOn" }, { 0x08, 0x000f, "SurroundOn" }, { 0x08, 0x0010, "Repeat" }, { 0x08, 0x0011, "Stereo" }, { 0x08, 0x0012, "SamplingRateDetect" }, { 0x08, 0x0013, "Spinning" }, { 0x08, 0x0014, "CAV" }, { 0x08, 0x0015, "CLV" }, { 0x08, 0x0016, "RecordingFormatDetect" }, { 0x08, 0x0017, "OffHook" }, { 0x08, 0x0018, "Ring" }, { 0x08, 0x0019, "MessageWaiting" }, { 0x08, 0x001a, "DataMode" }, { 0x08, 0x001b, "BatteryOperation" }, { 0x08, 0x001c, "BatteryOK" }, { 0x08, 0x001d, "BatteryLow" }, { 0x08, 0x001e, "Speaker" }, { 0x08, 0x001f, "Headset" }, { 0x08, 0x0020, "Hold" }, { 0x08, 0x0021, "Microphone" }, { 0x08, 0x0022, "Coverage" }, { 0x08, 0x0023, "NightMode" }, { 0x08, 0x0024, "SendCalls" }, { 0x08, 0x0025, "CallPickup" }, { 0x08, 0x0026, "Conference" }, { 0x08, 0x0027, "Standby" }, { 0x08, 0x0028, "CameraOn" }, { 0x08, 0x0029, "CameraOff" }, { 0x08, 0x002a, "OnLine" }, { 0x08, 0x002b, "OffLine" }, { 0x08, 0x002c, "Busy" }, { 0x08, 0x002d, "Ready" }, { 0x08, 0x002e, "PaperOut" }, { 0x08, 0x002f, "PaperJam" }, { 0x08, 0x0030, "Remote" }, { 0x08, 0x0031, "Forward" }, { 0x08, 0x0032, "Reverse" }, { 0x08, 0x0033, "Stop" }, { 0x08, 0x0034, "Rewind" }, { 0x08, 0x0035, "FastForward" }, { 0x08, 0x0036, "Play" }, { 0x08, 0x0037, "Pause" }, { 0x08, 0x0038, "Record" }, { 0x08, 0x0039, "Error" }, { 0x08, 0x003a, "UsageSelectedIndicator" }, { 0x08, 0x003b, "UsageInUseIndicator" }, { 0x08, 0x003c, "UsageMultiModeIndicator" }, { 0x08, 0x003d, "IndicatorOn" }, { 0x08, 0x003e, "IndicatorFlash" }, { 0x08, 0x003f, "IndicatorSlowBlink" }, { 0x08, 0x0040, "IndicatorFastBlink" }, { 0x08, 0x0041, "IndicatorOff" }, { 0x08, 0x0042, "FlashOnTime" }, { 0x08, 0x0043, "SlowBlinkOnTime" }, { 0x08, 0x0044, "SlowBlinkOffTime" }, { 0x08, 0x0045, "FastBlinkOnTime" }, { 0x08, 0x0046, "FastBlinkOffTime" }, { 0x08, 0x0047, "UsageIndicatorColor" }, { 0x08, 0x0048, "IndicatorRed" }, { 0x08, 0x0049, "IndicatorGreen" }, { 0x08, 0x004a, "IndicatorAmber" }, { 0x08, 0x004b, "GenericIndicator" }, { 0x08, 0x004c, "SystemSuspend" }, { 0x08, 0x004d, "ExternalPowerConnected" }, { 0x08, 0x004e, "IndicatorBlue" }, { 0x08, 0x004f, "IndicatorOrange" }, { 0x08, 0x0050, "GoodStatus" }, { 0x08, 0x0051, "WarningStatus" }, { 0x08, 0x0052, "RGBLED" }, { 0x08, 0x0053, "RedLEDChannel" }, { 0x08, 0x0054, "BlueLEDChannel" }, { 0x08, 0x0055, "GreenLEDChannel" }, { 0x08, 0x0056, "LEDIntensity" }, { 0x08, 0x0057, "SystemMicrophoneMute" }, { 0x08, 0x0060, "PlayerIndicator" }, { 0x08, 0x0061, "Player1" }, { 0x08, 0x0062, "Player2" }, { 0x08, 0x0063, "Player3" }, { 0x08, 0x0064, "Player4" }, { 0x08, 0x0065, "Player5" }, { 0x08, 0x0066, "Player6" }, { 0x08, 0x0067, "Player7" }, { 0x08, 0x0068, "Player8" }, { 0x09, 0, "Button" }, { 0x0a, 0, "Ordinal" }, { 0x0b, 0, "TelephonyDevice" }, { 0x0b, 0x0001, "Phone" }, { 0x0b, 0x0002, "AnsweringMachine" }, { 0x0b, 0x0003, "MessageControls" }, { 0x0b, 0x0004, "Handset" }, { 0x0b, 0x0005, "Headset" }, { 0x0b, 0x0006, "TelephonyKeyPad" }, { 0x0b, 0x0007, "ProgrammableButton" }, { 0x0b, 0x0020, "HookSwitch" }, { 0x0b, 0x0021, "Flash" }, { 0x0b, 0x0022, "Feature" }, { 0x0b, 0x0023, "Hold" }, { 0x0b, 0x0024, "Redial" }, { 0x0b, 0x0025, "Transfer" }, { 0x0b, 0x0026, "Drop" }, { 0x0b, 0x0027, "Park" }, { 0x0b, 0x0028, "ForwardCalls" }, { 0x0b, 0x0029, "AlternateFunction" }, { 0x0b, 0x002a, "Line" }, { 0x0b, 0x002b, "SpeakerPhone" }, { 0x0b, 0x002c, "Conference" }, { 0x0b, 0x002d, "RingEnable" }, { 0x0b, 0x002e, "RingSelect" }, { 0x0b, 0x002f, "PhoneMute" }, { 0x0b, 0x0030, "CallerID" }, { 0x0b, 0x0031, "Send" }, { 0x0b, 0x0050, "SpeedDial" }, { 0x0b, 0x0051, "StoreNumber" }, { 0x0b, 0x0052, "RecallNumber" }, { 0x0b, 0x0053, "PhoneDirectory" }, { 0x0b, 0x0070, "VoiceMail" }, { 0x0b, 0x0071, "ScreenCalls" }, { 0x0b, 0x0072, "DoNotDisturb" }, { 0x0b, 0x0073, "Message" }, { 0x0b, 0x0074, "AnswerOnOff" }, { 0x0b, 0x0090, "InsideDialTone" }, { 0x0b, 0x0091, "OutsideDialTone" }, { 0x0b, 0x0092, "InsideRingTone" }, { 0x0b, 0x0093, "OutsideRingTone" }, { 0x0b, 0x0094, "PriorityRingTone" }, { 0x0b, 0x0095, "InsideRingback" }, { 0x0b, 0x0096, "PriorityRingback" }, { 0x0b, 0x0097, "LineBusyTone" }, { 0x0b, 0x0098, "ReorderTone" }, { 0x0b, 0x0099, "CallWaitingTone" }, { 0x0b, 0x009a, "ConfirmationTone1" }, { 0x0b, 0x009b, "ConfirmationTone2" }, { 0x0b, 0x009c, "TonesOff" }, { 0x0b, 0x009d, "OutsideRingback" }, { 0x0b, 0x009e, "Ringer" }, { 0x0b, 0x00b0, "PhoneKey0" }, { 0x0b, 0x00b1, "PhoneKey1" }, { 0x0b, 0x00b2, "PhoneKey2" }, { 0x0b, 0x00b3, "PhoneKey3" }, { 0x0b, 0x00b4, "PhoneKey4" }, { 0x0b, 0x00b5, "PhoneKey5" }, { 0x0b, 0x00b6, "PhoneKey6" }, { 0x0b, 0x00b7, "PhoneKey7" }, { 0x0b, 0x00b8, "PhoneKey8" }, { 0x0b, 0x00b9, "PhoneKey9" }, { 0x0b, 0x00ba, "PhoneKeyStar" }, { 0x0b, 0x00bb, "PhoneKeyPound" }, { 0x0b, 0x00bc, "PhoneKeyA" }, { 0x0b, 0x00bd, "PhoneKeyB" }, { 0x0b, 0x00be, "PhoneKeyC" }, { 0x0b, 0x00bf, "PhoneKeyD" }, { 0x0b, 0x00c0, "PhoneCallHistoryKey" }, { 0x0b, 0x00c1, "PhoneCallerIDKey" }, { 0x0b, 0x00c2, "PhoneSettingsKey" }, { 0x0b, 0x00f0, "HostControl" }, { 0x0b, 0x00f1, "HostAvailable" }, { 0x0b, 0x00f2, "HostCallActive" }, { 0x0b, 0x00f3, "ActivateHandsetAudio" }, { 0x0b, 0x00f4, "RingType" }, { 0x0b, 0x00f5, "RedialablePhoneNumber" }, { 0x0b, 0x00f8, "StopRingTone" }, { 0x0b, 0x00f9, "PSTNRingTone" }, { 0x0b, 0x00fa, "HostRingTone" }, { 0x0b, 0x00fb, "AlertSoundError" }, { 0x0b, 0x00fc, "AlertSoundConfirm" }, { 0x0b, 0x00fd, "AlertSoundNotification" }, { 0x0b, 0x00fe, "SilentRing" }, { 0x0b, 0x0108, "EmailMessageWaiting" }, { 0x0b, 0x0109, "VoicemailMessageWaiting" }, { 0x0b, 0x010a, "HostHold" }, { 0x0b, 0x0110, "IncomingCallHistoryCount" }, { 0x0b, 0x0111, "OutgoingCallHistoryCount" }, { 0x0b, 0x0112, "IncomingCallHistory" }, { 0x0b, 0x0113, "OutgoingCallHistory" }, { 0x0b, 0x0114, "PhoneLocale" }, { 0x0b, 0x0140, "PhoneTimeSecond" }, { 0x0b, 0x0141, "PhoneTimeMinute" }, { 0x0b, 0x0142, "PhoneTimeHour" }, { 0x0b, 0x0143, "PhoneDateDay" }, { 0x0b, 0x0144, "PhoneDateMonth" }, { 0x0b, 0x0145, "PhoneDateYear" }, { 0x0b, 0x0146, "HandsetNickname" }, { 0x0b, 0x0147, "AddressBookID" }, { 0x0b, 0x014a, "CallDuration" }, { 0x0b, 0x014b, "DualModePhone" }, { 0x0c, 0, "Consumer" }, { 0x0c, 0x0001, "ConsumerControl" }, { 0x0c, 0x0002, "NumericKeyPad" }, { 0x0c, 0x0003, "ProgrammableButtons" }, { 0x0c, 0x0004, "Microphone" }, { 0x0c, 0x0005, "Headphone" }, { 0x0c, 0x0006, "GraphicEqualizer" }, { 0x0c, 0x0020, "10" }, { 0x0c, 0x0021, "100" }, { 0x0c, 0x0022, "AMPM" }, { 0x0c, 0x0030, "Power" }, { 0x0c, 0x0031, "Reset" }, { 0x0c, 0x0032, "Sleep" }, { 0x0c, 0x0033, "SleepAfter" }, { 0x0c, 0x0034, "SleepMode" }, { 0x0c, 0x0035, "Illumination" }, { 0x0c, 0x0036, "FunctionButtons" }, { 0x0c, 0x0040, "Menu" }, { 0x0c, 0x0041, "MenuPick" }, { 0x0c, 0x0042, "MenuUp" }, { 0x0c, 0x0043, "MenuDown" }, { 0x0c, 0x0044, "MenuLeft" }, { 0x0c, 0x0045, "MenuRight" }, { 0x0c, 0x0046, "MenuEscape" }, { 0x0c, 0x0047, "MenuValueIncrease" }, { 0x0c, 0x0048, "MenuValueDecrease" }, { 0x0c, 0x0060, "DataOnScreen" }, { 0x0c, 0x0061, "ClosedCaption" }, { 0x0c, 0x0062, "ClosedCaptionSelect" }, { 0x0c, 0x0063, "VCRTV" }, { 0x0c, 0x0064, "BroadcastMode" }, { 0x0c, 0x0065, "Snapshot" }, { 0x0c, 0x0066, "Still" }, { 0x0c, 0x0067, "PictureinPictureToggle" }, { 0x0c, 0x0068, "PictureinPictureSwap" }, { 0x0c, 0x0069, "RedMenuButton" }, { 0x0c, 0x006a, "GreenMenuButton" }, { 0x0c, 0x006b, "BlueMenuButton" }, { 0x0c, 0x006c, "YellowMenuButton" }, { 0x0c, 0x006d, "Aspect" }, { 0x0c, 0x006e, "3DModeSelect" }, { 0x0c, 0x006f, "DisplayBrightnessIncrement" }, { 0x0c, 0x0070, "DisplayBrightnessDecrement" }, { 0x0c, 0x0071, "DisplayBrightness" }, { 0x0c, 0x0072, "DisplayBacklightToggle" }, { 0x0c, 0x0073, "DisplaySetBrightnesstoMinimum" }, { 0x0c, 0x0074, "DisplaySetBrightnesstoMaximum" }, { 0x0c, 0x0075, "DisplaySetAutoBrightness" }, { 0x0c, 0x0076, "CameraAccessEnabled" }, { 0x0c, 0x0077, "CameraAccessDisabled" }, { 0x0c, 0x0078, "CameraAccessToggle" }, { 0x0c, 0x0079, "KeyboardBrightnessIncrement" }, { 0x0c, 0x007a, "KeyboardBrightnessDecrement" }, { 0x0c, 0x007b, "KeyboardBacklightSetLevel" }, { 0x0c, 0x007c, "KeyboardBacklightOOC" }, { 0x0c, 0x007d, "KeyboardBacklightSetMinimum" }, { 0x0c, 0x007e, "KeyboardBacklightSetMaximum" }, { 0x0c, 0x007f, "KeyboardBacklightAuto" }, { 0x0c, 0x0080, "Selection" }, { 0x0c, 0x0081, "AssignSelection" }, { 0x0c, 0x0082, "ModeStep" }, { 0x0c, 0x0083, "RecallLast" }, { 0x0c, 0x0084, "EnterChannel" }, { 0x0c, 0x0085, "OrderMovie" }, { 0x0c, 0x0086, "Channel" }, { 0x0c, 0x0087, "MediaSelection" }, { 0x0c, 0x0088, "MediaSelectComputer" }, { 0x0c, 0x0089, "MediaSelectTV" }, { 0x0c, 0x008a, "MediaSelectWWW" }, { 0x0c, 0x008b, "MediaSelectDVD" }, { 0x0c, 0x008c, "MediaSelectTelephone" }, { 0x0c, 0x008d, "MediaSelectProgramGuide" }, { 0x0c, 0x008e, "MediaSelectVideoPhone" }, { 0x0c, 0x008f, "MediaSelectGames" }, { 0x0c, 0x0090, "MediaSelectMessages" }, { 0x0c, 0x0091, "MediaSelectCD" }, { 0x0c, 0x0092, "MediaSelectVCR" }, { 0x0c, 0x0093, "MediaSelectTuner" }, { 0x0c, 0x0094, "Quit" }, { 0x0c, 0x0095, "Help" }, { 0x0c, 0x0096, "MediaSelectTape" }, { 0x0c, 0x0097, "MediaSelectCable" }, { 0x0c, 0x0098, "MediaSelectSatellite" }, { 0x0c, 0x0099, "MediaSelectSecurity" }, { 0x0c, 0x009a, "MediaSelectHome" }, { 0x0c, 0x009b, "MediaSelectCall" }, { 0x0c, 0x009c, "ChannelIncrement" }, { 0x0c, 0x009d, "ChannelDecrement" }, { 0x0c, 0x009e, "MediaSelectSAP" }, { 0x0c, 0x00a0, "VCRPlus" }, { 0x0c, 0x00a1, "Once" }, { 0x0c, 0x00a2, "Daily" }, { 0x0c, 0x00a3, "Weekly" }, { 0x0c, 0x00a4, "Monthly" }, { 0x0c, 0x00b0, "Play" }, { 0x0c, 0x00b1, "Pause" }, { 0x0c, 0x00b2, "Record" }, { 0x0c, 0x00b3, "FastForward" }, { 0x0c, 0x00b4, "Rewind" }, { 0x0c, 0x00b5, "ScanNextTrack" }, { 0x0c, 0x00b6, "ScanPreviousTrack" }, { 0x0c, 0x00b7, "Stop" }, { 0x0c, 0x00b8, "Eject" }, { 0x0c, 0x00b9, "RandomPlay" }, { 0x0c, 0x00ba, "SelectDisc" }, { 0x0c, 0x00bb, "EnterDisc" }, { 0x0c, 0x00bc, "Repeat" }, { 0x0c, 0x00bd, "Tracking" }, { 0x0c, 0x00be, "TrackNormal" }, { 0x0c, 0x00bf, "SlowTracking" }, { 0x0c, 0x00c0, "FrameForward" }, { 0x0c, 0x00c1, "FrameBack" }, { 0x0c, 0x00c2, "Mark" }, { 0x0c, 0x00c3, "ClearMark" }, { 0x0c, 0x00c4, "RepeatFromMark" }, { 0x0c, 0x00c5, "ReturnToMark" }, { 0x0c, 0x00c6, "SearchMarkForward" }, { 0x0c, 0x00c7, "SearchMarkBackwards" }, { 0x0c, 0x00c8, "CounterReset" }, { 0x0c, 0x00c9, "ShowCounter" }, { 0x0c, 0x00ca, "TrackingIncrement" }, { 0x0c, 0x00cb, "TrackingDecrement" }, { 0x0c, 0x00cc, "StopEject" }, { 0x0c, 0x00cd, "PlayPause" }, { 0x0c, 0x00ce, "PlaySkip" }, { 0x0c, 0x00cf, "VoiceCommand" }, { 0x0c, 0x00d0, "InvokeCaptureInterface" }, { 0x0c, 0x00d1, "StartorStopGameRecording" }, { 0x0c, 0x00d2, "HistoricalGameCapture" }, { 0x0c, 0x00d3, "CaptureGameScreenshot" }, { 0x0c, 0x00d4, "ShoworHideRecordingIndicator" }, { 0x0c, 0x00d5, "StartorStopMicrophoneCapture" }, { 0x0c, 0x00d6, "StartorStopCameraCapture" }, { 0x0c, 0x00d7, "StartorStopGameBroadcast" }, { 0x0c, 0x00d8, "StartorStopVoiceDictationSession" }, { 0x0c, 0x00d9, "InvokeDismissEmojiPicker" }, { 0x0c, 0x00e0, "Volume" }, { 0x0c, 0x00e1, "Balance" }, { 0x0c, 0x00e2, "Mute" }, { 0x0c, 0x00e3, "Bass" }, { 0x0c, 0x00e4, "Treble" }, { 0x0c, 0x00e5, "BassBoost" }, { 0x0c, 0x00e6, "SurroundMode" }, { 0x0c, 0x00e7, "Loudness" }, { 0x0c, 0x00e8, "MPX" }, { 0x0c, 0x00e9, "VolumeIncrement" }, { 0x0c, 0x00ea, "VolumeDecrement" }, { 0x0c, 0x00f0, "SpeedSelect" }, { 0x0c, 0x00f1, "PlaybackSpeed" }, { 0x0c, 0x00f2, "StandardPlay" }, { 0x0c, 0x00f3, "LongPlay" }, { 0x0c, 0x00f4, "ExtendedPlay" }, { 0x0c, 0x00f5, "Slow" }, { 0x0c, 0x0100, "FanEnable" }, { 0x0c, 0x0101, "FanSpeed" }, { 0x0c, 0x0102, "LightEnable" }, { 0x0c, 0x0103, "LightIlluminationLevel" }, { 0x0c, 0x0104, "ClimateControlEnable" }, { 0x0c, 0x0105, "RoomTemperature" }, { 0x0c, 0x0106, "SecurityEnable" }, { 0x0c, 0x0107, "FireAlarm" }, { 0x0c, 0x0108, "PoliceAlarm" }, { 0x0c, 0x0109, "Proximity" }, { 0x0c, 0x010a, "Motion" }, { 0x0c, 0x010b, "DuressAlarm" }, { 0x0c, 0x010c, "HoldupAlarm" }, { 0x0c, 0x010d, "MedicalAlarm" }, { 0x0c, 0x0150, "BalanceRight" }, { 0x0c, 0x0151, "BalanceLeft" }, { 0x0c, 0x0152, "BassIncrement" }, { 0x0c, 0x0153, "BassDecrement" }, { 0x0c, 0x0154, "TrebleIncrement" }, { 0x0c, 0x0155, "TrebleDecrement" }, { 0x0c, 0x0160, "SpeakerSystem" }, { 0x0c, 0x0161, "ChannelLeft" }, { 0x0c, 0x0162, "ChannelRight" }, { 0x0c, 0x0163, "ChannelCenter" }, { 0x0c, 0x0164, "ChannelFront" }, { 0x0c, 0x0165, "ChannelCenterFront" }, { 0x0c, 0x0166, "ChannelSide" }, { 0x0c, 0x0167, "ChannelSurround" }, { 0x0c, 0x0168, "ChannelLowFrequencyEnhancement" }, { 0x0c, 0x0169, "ChannelTop" }, { 0x0c, 0x016a, "ChannelUnknown" }, { 0x0c, 0x0170, "Subchannel" }, { 0x0c, 0x0171, "SubchannelIncrement" }, { 0x0c, 0x0172, "SubchannelDecrement" }, { 0x0c, 0x0173, "AlternateAudioIncrement" }, { 0x0c, 0x0174, "AlternateAudioDecrement" }, { 0x0c, 0x0180, "ApplicationLaunchButtons" }, { 0x0c, 0x0181, "ALLaunchButtonConfigurationTool" }, { 0x0c, 0x0182, "ALProgrammableButtonConfiguration" }, { 0x0c, 0x0183, "ALConsumerControlConfiguration" }, { 0x0c, 0x0184, "ALWordProcessor" }, { 0x0c, 0x0185, "ALTextEditor" }, { 0x0c, 0x0186, "ALSpreadsheet" }, { 0x0c, 0x0187, "ALGraphicsEditor" }, { 0x0c, 0x0188, "ALPresentationApp" }, { 0x0c, 0x0189, "ALDatabaseApp" }, { 0x0c, 0x018a, "ALEmailReader" }, { 0x0c, 0x018b, "ALNewsreader" }, { 0x0c, 0x018c, "ALVoicemail" }, { 0x0c, 0x018d, "ALContactsAddressBook" }, { 0x0c, 0x018e, "ALCalendarSchedule" }, { 0x0c, 0x018f, "ALTaskProjectManager" }, { 0x0c, 0x0190, "ALLogJournalTimecard" }, { 0x0c, 0x0191, "ALCheckbookFinance" }, { 0x0c, 0x0192, "ALCalculator" }, { 0x0c, 0x0193, "ALAVCapturePlayback" }, { 0x0c, 0x0194, "ALLocalMachineBrowser" }, { 0x0c, 0x0195, "ALLANWANBrowser" }, { 0x0c, 0x0196, "ALInternetBrowser" }, { 0x0c, 0x0197, "ALRemoteNetworkingISPConnect" }, { 0x0c, 0x0198, "ALNetworkConference" }, { 0x0c, 0x0199, "ALNetworkChat" }, { 0x0c, 0x019a, "ALTelephonyDialer" }, { 0x0c, 0x019b, "ALLogon" }, { 0x0c, 0x019c, "ALLogoff" }, { 0x0c, 0x019d, "ALLogonLogoff" }, { 0x0c, 0x019e, "ALTerminalLockScreensaver" }, { 0x0c, 0x019f, "ALControlPanel" }, { 0x0c, 0x01a0, "ALCommandLineProcessorRun" }, { 0x0c, 0x01a1, "ALProcessTaskManager" }, { 0x0c, 0x01a2, "ALSelectTaskApplication" }, { 0x0c, 0x01a3, "ALNextTaskApplication" }, { 0x0c, 0x01a4, "ALPreviousTaskApplication" }, { 0x0c, 0x01a5, "ALPreemptiveHaltTaskApplication" }, { 0x0c, 0x01a6, "ALIntegratedHelpCenter" }, { 0x0c, 0x01a7, "ALDocuments" }, { 0x0c, 0x01a8, "ALThesaurus" }, { 0x0c, 0x01a9, "ALDictionary" }, { 0x0c, 0x01aa, "ALDesktop" }, { 0x0c, 0x01ab, "ALSpellCheck" }, { 0x0c, 0x01ac, "ALGrammarCheck" }, { 0x0c, 0x01ad, "ALWirelessStatus" }, { 0x0c, 0x01ae, "ALKeyboardLayout" }, { 0x0c, 0x01af, "ALVirusProtection" }, { 0x0c, 0x01b0, "ALEncryption" }, { 0x0c, 0x01b1, "ALScreenSaver" }, { 0x0c, 0x01b2, "ALAlarms" }, { 0x0c, 0x01b3, "ALClock" }, { 0x0c, 0x01b4, "ALFileBrowser" }, { 0x0c, 0x01b5, "ALPowerStatus" }, { 0x0c, 0x01b6, "ALImageBrowser" }, { 0x0c, 0x01b7, "ALAudioBrowser" }, { 0x0c, 0x01b8, "ALMovieBrowser" }, { 0x0c, 0x01b9, "ALDigitalRightsManager" }, { 0x0c, 0x01ba, "ALDigitalWallet" }, { 0x0c, 0x01bc, "ALInstantMessaging" }, { 0x0c, 0x01bd, "ALOEMFeaturesTipsTutorialBrowser" }, { 0x0c, 0x01be, "ALOEMHelp" }, { 0x0c, 0x01bf, "ALOnlineCommunity" }, { 0x0c, 0x01c0, "ALEntertainmentContentBrowser" }, { 0x0c, 0x01c1, "ALOnlineShoppingBrowser" }, { 0x0c, 0x01c2, "ALSmartCardInformationHelp" }, { 0x0c, 0x01c3, "ALMarketMonitorFinanceBrowser" }, { 0x0c, 0x01c4, "ALCustomizedCorporateNewsBrowser" }, { 0x0c, 0x01c5, "ALOnlineActivityBrowser" }, { 0x0c, 0x01c6, "ALResearchSearchBrowser" }, { 0x0c, 0x01c7, "ALAudioPlayer" }, { 0x0c, 0x01c8, "ALMessageStatus" }, { 0x0c, 0x01c9, "ALContactSync" }, { 0x0c, 0x01ca, "ALNavigation" }, { 0x0c, 0x01cb, "ALContextawareDesktopAssistant" }, { 0x0c, 0x0200, "GenericGUIApplicationControls" }, { 0x0c, 0x0201, "ACNew" }, { 0x0c, 0x0202, "ACOpen" }, { 0x0c, 0x0203, "ACClose" }, { 0x0c, 0x0204, "ACExit" }, { 0x0c, 0x0205, "ACMaximize" }, { 0x0c, 0x0206, "ACMinimize" }, { 0x0c, 0x0207, "ACSave" }, { 0x0c, 0x0208, "ACPrint" }, { 0x0c, 0x0209, "ACProperties" }, { 0x0c, 0x021a, "ACUndo" }, { 0x0c, 0x021b, "ACCopy" }, { 0x0c, 0x021c, "ACCut" }, { 0x0c, 0x021d, "ACPaste" }, { 0x0c, 0x021e, "ACSelectAll" }, { 0x0c, 0x021f, "ACFind" }, { 0x0c, 0x0220, "ACFindandReplace" }, { 0x0c, 0x0221, "ACSearch" }, { 0x0c, 0x0222, "ACGoTo" }, { 0x0c, 0x0223, "ACHome" }, { 0x0c, 0x0224, "ACBack" }, { 0x0c, 0x0225, "ACForward" }, { 0x0c, 0x0226, "ACStop" }, { 0x0c, 0x0227, "ACRefresh" }, { 0x0c, 0x0228, "ACPreviousLink" }, { 0x0c, 0x0229, "ACNextLink" }, { 0x0c, 0x022a, "ACBookmarks" }, { 0x0c, 0x022b, "ACHistory" }, { 0x0c, 0x022c, "ACSubscriptions" }, { 0x0c, 0x022d, "ACZoomIn" }, { 0x0c, 0x022e, "ACZoomOut" }, { 0x0c, 0x022f, "ACZoom" }, { 0x0c, 0x0230, "ACFullScreenView" }, { 0x0c, 0x0231, "ACNormalView" }, { 0x0c, 0x0232, "ACViewToggle" }, { 0x0c, 0x0233, "ACScrollUp" }, { 0x0c, 0x0234, "ACScrollDown" }, { 0x0c, 0x0235, "ACScroll" }, { 0x0c, 0x0236, "ACPanLeft" }, { 0x0c, 0x0237, "ACPanRight" }, { 0x0c, 0x0238, "ACPan" }, { 0x0c, 0x0239, "ACNewWindow" }, { 0x0c, 0x023a, "ACTileHorizontally" }, { 0x0c, 0x023b, "ACTileVertically" }, { 0x0c, 0x023c, "ACFormat" }, { 0x0c, 0x023d, "ACEdit" }, { 0x0c, 0x023e, "ACBold" }, { 0x0c, 0x023f, "ACItalics" }, { 0x0c, 0x0240, "ACUnderline" }, { 0x0c, 0x0241, "ACStrikethrough" }, { 0x0c, 0x0242, "ACSubscript" }, { 0x0c, 0x0243, "ACSuperscript" }, { 0x0c, 0x0244, "ACAllCaps" }, { 0x0c, 0x0245, "ACRotate" }, { 0x0c, 0x0246, "ACResize" }, { 0x0c, 0x0247, "ACFlipHorizontal" }, { 0x0c, 0x0248, "ACFlipVertical" }, { 0x0c, 0x0249, "ACMirrorHorizontal" }, { 0x0c, 0x024a, "ACMirrorVertical" }, { 0x0c, 0x024b, "ACFontSelect" }, { 0x0c, 0x024c, "ACFontColor" }, { 0x0c, 0x024d, "ACFontSize" }, { 0x0c, 0x024e, "ACJustifyLeft" }, { 0x0c, 0x024f, "ACJustifyCenterH" }, { 0x0c, 0x0250, "ACJustifyRight" }, { 0x0c, 0x0251, "ACJustifyBlockH" }, { 0x0c, 0x0252, "ACJustifyTop" }, { 0x0c, 0x0253, "ACJustifyCenterV" }, { 0x0c, 0x0254, "ACJustifyBottom" }, { 0x0c, 0x0255, "ACJustifyBlockV" }, { 0x0c, 0x0256, "ACIndentDecrease" }, { 0x0c, 0x0257, "ACIndentIncrease" }, { 0x0c, 0x0258, "ACNumberedList" }, { 0x0c, 0x0259, "ACRestartNumbering" }, { 0x0c, 0x025a, "ACBulletedList" }, { 0x0c, 0x025b, "ACPromote" }, { 0x0c, 0x025c, "ACDemote" }, { 0x0c, 0x025d, "ACYes" }, { 0x0c, 0x025e, "ACNo" }, { 0x0c, 0x025f, "ACCancel" }, { 0x0c, 0x0260, "ACCatalog" }, { 0x0c, 0x0261, "ACBuyCheckout" }, { 0x0c, 0x0262, "ACAddtoCart" }, { 0x0c, 0x0263, "ACExpand" }, { 0x0c, 0x0264, "ACExpandAll" }, { 0x0c, 0x0265, "ACCollapse" }, { 0x0c, 0x0266, "ACCollapseAll" }, { 0x0c, 0x0267, "ACPrintPreview" }, { 0x0c, 0x0268, "ACPasteSpecial" }, { 0x0c, 0x0269, "ACInsertMode" }, { 0x0c, 0x026a, "ACDelete" }, { 0x0c, 0x026b, "ACLock" }, { 0x0c, 0x026c, "ACUnlock" }, { 0x0c, 0x026d, "ACProtect" }, { 0x0c, 0x026e, "ACUnprotect" }, { 0x0c, 0x026f, "ACAttachComment" }, { 0x0c, 0x0270, "ACDeleteComment" }, { 0x0c, 0x0271, "ACViewComment" }, { 0x0c, 0x0272, "ACSelectWord" }, { 0x0c, 0x0273, "ACSelectSentence" }, { 0x0c, 0x0274, "ACSelectParagraph" }, { 0x0c, 0x0275, "ACSelectColumn" }, { 0x0c, 0x0276, "ACSelectRow" }, { 0x0c, 0x0277, "ACSelectTable" }, { 0x0c, 0x0278, "ACSelectObject" }, { 0x0c, 0x0279, "ACRedoRepeat" }, { 0x0c, 0x027a, "ACSort" }, { 0x0c, 0x027b, "ACSortAscending" }, { 0x0c, 0x027c, "ACSortDescending" }, { 0x0c, 0x027d, "ACFilter" }, { 0x0c, 0x027e, "ACSetClock" }, { 0x0c, 0x027f, "ACViewClock" }, { 0x0c, 0x0280, "ACSelectTimeZone" }, { 0x0c, 0x0281, "ACEditTimeZones" }, { 0x0c, 0x0282, "ACSetAlarm" }, { 0x0c, 0x0283, "ACClearAlarm" }, { 0x0c, 0x0284, "ACSnoozeAlarm" }, { 0x0c, 0x0285, "ACResetAlarm" }, { 0x0c, 0x0286, "ACSynchronize" }, { 0x0c, 0x0287, "ACSendReceive" }, { 0x0c, 0x0288, "ACSendTo" }, { 0x0c, 0x0289, "ACReply" }, { 0x0c, 0x028a, "ACReplyAll" }, { 0x0c, 0x028b, "ACForwardMsg" }, { 0x0c, 0x028c, "ACSend" }, { 0x0c, 0x028d, "ACAttachFile" }, { 0x0c, 0x028e, "ACUpload" }, { 0x0c, 0x028f, "ACDownloadSaveTargetAs" }, { 0x0c, 0x0290, "ACSetBorders" }, { 0x0c, 0x0291, "ACInsertRow" }, { 0x0c, 0x0292, "ACInsertColumn" }, { 0x0c, 0x0293, "ACInsertFile" }, { 0x0c, 0x0294, "ACInsertPicture" }, { 0x0c, 0x0295, "ACInsertObject" }, { 0x0c, 0x0296, "ACInsertSymbol" }, { 0x0c, 0x0297, "ACSaveandClose" }, { 0x0c, 0x0298, "ACRename" }, { 0x0c, 0x0299, "ACMerge" }, { 0x0c, 0x029a, "ACSplit" }, { 0x0c, 0x029b, "ACDisributeHorizontally" }, { 0x0c, 0x029c, "ACDistributeVertically" }, { 0x0c, 0x029d, "ACNextKeyboardLayoutSelect" }, { 0x0c, 0x029e, "ACNavigationGuidance" }, { 0x0c, 0x029f, "ACDesktopShowAllWindows" }, { 0x0c, 0x02a0, "ACSoftKeyLeft" }, { 0x0c, 0x02a1, "ACSoftKeyRight" }, { 0x0c, 0x02a2, "ACDesktopShowAllApplications" }, { 0x0c, 0x02b0, "ACIdleKeepAlive" }, { 0x0c, 0x02c0, "ExtendedKeyboardAttributesCollection" }, { 0x0c, 0x02c1, "KeyboardFormFactor" }, { 0x0c, 0x02c2, "KeyboardKeyType" }, { 0x0c, 0x02c3, "KeyboardPhysicalLayout" }, { 0x0c, 0x02c4, "VendorSpecificKeyboardPhysicalLayout" }, { 0x0c, 0x02c5, "KeyboardIETFLanguageTagIndex" }, { 0x0c, 0x02c6, "ImplementedKeyboardInputAssistControls" }, { 0x0c, 0x02c7, "KeyboardInputAssistPrevious" }, { 0x0c, 0x02c8, "KeyboardInputAssistNext" }, { 0x0c, 0x02c9, "KeyboardInputAssistPreviousGroup" }, { 0x0c, 0x02ca, "KeyboardInputAssistNextGroup" }, { 0x0c, 0x02cb, "KeyboardInputAssistAccept" }, { 0x0c, 0x02cc, "KeyboardInputAssistCancel" }, { 0x0c, 0x02d0, "PrivacyScreenToggle" }, { 0x0c, 0x02d1, "PrivacyScreenLevelDecrement" }, { 0x0c, 0x02d2, "PrivacyScreenLevelIncrement" }, { 0x0c, 0x02d3, "PrivacyScreenLevelMinimum" }, { 0x0c, 0x02d4, "PrivacyScreenLevelMaximum" }, { 0x0c, 0x0500, "ContactEdited" }, { 0x0c, 0x0501, "ContactAdded" }, { 0x0c, 0x0502, "ContactRecordActive" }, { 0x0c, 0x0503, "ContactIndex" }, { 0x0c, 0x0504, "ContactNickname" }, { 0x0c, 0x0505, "ContactFirstName" }, { 0x0c, 0x0506, "ContactLastName" }, { 0x0c, 0x0507, "ContactFullName" }, { 0x0c, 0x0508, "ContactPhoneNumberPersonal" }, { 0x0c, 0x0509, "ContactPhoneNumberBusiness" }, { 0x0c, 0x050a, "ContactPhoneNumberMobile" }, { 0x0c, 0x050b, "ContactPhoneNumberPager" }, { 0x0c, 0x050c, "ContactPhoneNumberFax" }, { 0x0c, 0x050d, "ContactPhoneNumberOther" }, { 0x0c, 0x050e, "ContactEmailPersonal" }, { 0x0c, 0x050f, "ContactEmailBusiness" }, { 0x0c, 0x0510, "ContactEmailOther" }, { 0x0c, 0x0511, "ContactEmailMain" }, { 0x0c, 0x0512, "ContactSpeedDialNumber" }, { 0x0c, 0x0513, "ContactStatusFlag" }, { 0x0c, 0x0514, "ContactMisc" }, { 0x0d, 0, "Digitizers" }, { 0x0d, 0x0001, "Digitizer" }, { 0x0d, 0x0002, "Pen" }, { 0x0d, 0x0003, "LightPen" }, { 0x0d, 0x0004, "TouchScreen" }, { 0x0d, 0x0005, "TouchPad" }, { 0x0d, 0x0006, "Whiteboard" }, { 0x0d, 0x0007, "CoordinateMeasuringMachine" }, { 0x0d, 0x0008, "3DDigitizer" }, { 0x0d, 0x0009, "StereoPlotter" }, { 0x0d, 0x000a, "ArticulatedArm" }, { 0x0d, 0x000b, "Armature" }, { 0x0d, 0x000c, "MultiplePointDigitizer" }, { 0x0d, 0x000d, "FreeSpaceWand" }, { 0x0d, 0x000e, "DeviceConfiguration" }, { 0x0d, 0x000f, "CapacitiveHeatMapDigitizer" }, { 0x0d, 0x0020, "Stylus" }, { 0x0d, 0x0021, "Puck" }, { 0x0d, 0x0022, "Finger" }, { 0x0d, 0x0023, "Devicesettings" }, { 0x0d, 0x0024, "CharacterGesture" }, { 0x0d, 0x0030, "TipPressure" }, { 0x0d, 0x0031, "BarrelPressure" }, { 0x0d, 0x0032, "InRange" }, { 0x0d, 0x0033, "Touch" }, { 0x0d, 0x0034, "Untouch" }, { 0x0d, 0x0035, "Tap" }, { 0x0d, 0x0036, "Quality" }, { 0x0d, 0x0037, "DataValid" }, { 0x0d, 0x0038, "TransducerIndex" }, { 0x0d, 0x0039, "TabletFunctionKeys" }, { 0x0d, 0x003a, "ProgramChangeKeys" }, { 0x0d, 0x003b, "BatteryStrength" }, { 0x0d, 0x003c, "Invert" }, { 0x0d, 0x003d, "XTilt" }, { 0x0d, 0x003e, "YTilt" }, { 0x0d, 0x003f, "Azimuth" }, { 0x0d, 0x0040, "Altitude" }, { 0x0d, 0x0041, "Twist" }, { 0x0d, 0x0042, "TipSwitch" }, { 0x0d, 0x0043, "SecondaryTipSwitch" }, { 0x0d, 0x0044, "BarrelSwitch" }, { 0x0d, 0x0045, "Eraser" }, { 0x0d, 0x0046, "TabletPick" }, { 0x0d, 0x0047, "TouchValid" }, { 0x0d, 0x0048, "Width" }, { 0x0d, 0x0049, "Height" }, { 0x0d, 0x0051, "ContactIdentifier" }, { 0x0d, 0x0052, "DeviceMode" }, { 0x0d, 0x0053, "DeviceIdentifier" }, { 0x0d, 0x0054, "ContactCount" }, { 0x0d, 0x0055, "ContactCountMaximum" }, { 0x0d, 0x0056, "ScanTime" }, { 0x0d, 0x0057, "SurfaceSwitch" }, { 0x0d, 0x0058, "ButtonSwitch" }, { 0x0d, 0x0059, "PadType" }, { 0x0d, 0x005a, "SecondaryBarrelSwitch" }, { 0x0d, 0x005b, "TransducerSerialNumber" }, { 0x0d, 0x005c, "PreferredColor" }, { 0x0d, 0x005d, "PreferredColorisLocked" }, { 0x0d, 0x005e, "PreferredLineWidth" }, { 0x0d, 0x005f, "PreferredLineWidthisLocked" }, { 0x0d, 0x0060, "LatencyMode" }, { 0x0d, 0x0061, "GestureCharacterQuality" }, { 0x0d, 0x0062, "CharacterGestureDataLength" }, { 0x0d, 0x0063, "CharacterGestureData" }, { 0x0d, 0x0064, "GestureCharacterEncoding" }, { 0x0d, 0x0065, "UTF8CharacterGestureEncoding" }, { 0x0d, 0x0066, "UTF16LittleEndianCharacterGestureEncoding" }, { 0x0d, 0x0067, "UTF16BigEndianCharacterGestureEncoding" }, { 0x0d, 0x0068, "UTF32LittleEndianCharacterGestureEncoding" }, { 0x0d, 0x0069, "UTF32BigEndianCharacterGestureEncoding" }, { 0x0d, 0x006a, "CapacitiveHeatMapProtocolVendorID" }, { 0x0d, 0x006b, "CapacitiveHeatMapProtocolVersion" }, { 0x0d, 0x006c, "CapacitiveHeatMapFrameData" }, { 0x0d, 0x006d, "GestureCharacterEnable" }, { 0x0d, 0x006e, "TransducerSerialNumberPart2" }, { 0x0d, 0x006f, "NoPreferredColor" }, { 0x0d, 0x0070, "PreferredLineStyle" }, { 0x0d, 0x0071, "PreferredLineStyleisLocked" }, { 0x0d, 0x0072, "Ink" }, { 0x0d, 0x0073, "Pencil" }, { 0x0d, 0x0074, "Highlighter" }, { 0x0d, 0x0075, "ChiselMarker" }, { 0x0d, 0x0076, "Brush" }, { 0x0d, 0x0077, "NoPreference" }, { 0x0d, 0x0080, "DigitizerDiagnostic" }, { 0x0d, 0x0081, "DigitizerError" }, { 0x0d, 0x0082, "ErrNormalStatus" }, { 0x0d, 0x0083, "ErrTransducersExceeded" }, { 0x0d, 0x0084, "ErrFullTransFeaturesUnavailable" }, { 0x0d, 0x0085, "ErrChargeLow" }, { 0x0d, 0x0090, "TransducerSoftwareInfo" }, { 0x0d, 0x0091, "TransducerVendorId" }, { 0x0d, 0x0092, "TransducerProductId" }, { 0x0d, 0x0093, "DeviceSupportedProtocols" }, { 0x0d, 0x0094, "TransducerSupportedProtocols" }, { 0x0d, 0x0095, "NoProtocol" }, { 0x0d, 0x0096, "WacomAESProtocol" }, { 0x0d, 0x0097, "USIProtocol" }, { 0x0d, 0x0098, "MicrosoftPenProtocol" }, { 0x0d, 0x00a0, "SupportedReportRates" }, { 0x0d, 0x00a1, "ReportRate" }, { 0x0d, 0x00a2, "TransducerConnected" }, { 0x0d, 0x00a3, "SwitchDisabled" }, { 0x0d, 0x00a4, "SwitchUnimplemented" }, { 0x0d, 0x00a5, "TransducerSwitches" }, { 0x0d, 0x00a6, "TransducerIndexSelector" }, { 0x0d, 0x00b0, "ButtonPressThreshold" }, { 0x0e, 0, "Haptics" }, { 0x0e, 0x0001, "SimpleHapticController" }, { 0x0e, 0x0010, "WaveformList" }, { 0x0e, 0x0011, "DurationList" }, { 0x0e, 0x0020, "AutoTrigger" }, { 0x0e, 0x0021, "ManualTrigger" }, { 0x0e, 0x0022, "AutoTriggerAssociatedControl" }, { 0x0e, 0x0023, "Intensity" }, { 0x0e, 0x0024, "RepeatCount" }, { 0x0e, 0x0025, "RetriggerPeriod" }, { 0x0e, 0x0026, "WaveformVendorPage" }, { 0x0e, 0x0027, "WaveformVendorID" }, { 0x0e, 0x0028, "WaveformCutoffTime" }, { 0x0e, 0x1001, "WaveformNone" }, { 0x0e, 0x1002, "WaveformStop" }, { 0x0e, 0x1003, "WaveformClick" }, { 0x0e, 0x1004, "WaveformBuzzContinuous" }, { 0x0e, 0x1005, "WaveformRumbleContinuous" }, { 0x0e, 0x1006, "WaveformPress" }, { 0x0e, 0x1007, "WaveformRelease" }, { 0x0e, 0x1008, "WaveformHover" }, { 0x0e, 0x1009, "WaveformSuccess" }, { 0x0e, 0x100a, "WaveformError" }, { 0x0e, 0x100b, "WaveformInkContinuous" }, { 0x0e, 0x100c, "WaveformPencilContinuous" }, { 0x0e, 0x100d, "WaveformMarkerContinuous" }, { 0x0e, 0x100e, "WaveformChiselMarkerContinuous" }, { 0x0e, 0x100f, "WaveformBrushContinuous" }, { 0x0e, 0x1010, "WaveformEraserContinuous" }, { 0x0e, 0x1011, "WaveformSparkleContinuous" }, { 0x0f, 0, "PhysicalInputDevice" }, { 0x0f, 0x0001, "PhysicalInputDevice" }, { 0x0f, 0x0020, "Normal" }, { 0x0f, 0x0021, "SetEffectReport" }, { 0x0f, 0x0022, "EffectParameterBlockIndex" }, { 0x0f, 0x0023, "ParameterBlockOffset" }, { 0x0f, 0x0024, "ROMFlag" }, { 0x0f, 0x0025, "EffectType" }, { 0x0f, 0x0026, "ETConstantForce" }, { 0x0f, 0x0027, "ETRamp" }, { 0x0f, 0x0028, "ETCustomForce" }, { 0x0f, 0x0030, "ETSquare" }, { 0x0f, 0x0031, "ETSine" }, { 0x0f, 0x0032, "ETTriangle" }, { 0x0f, 0x0033, "ETSawtoothUp" }, { 0x0f, 0x0034, "ETSawtoothDown" }, { 0x0f, 0x0040, "ETSpring" }, { 0x0f, 0x0041, "ETDamper" }, { 0x0f, 0x0042, "ETInertia" }, { 0x0f, 0x0043, "ETFriction" }, { 0x0f, 0x0050, "Duration" }, { 0x0f, 0x0051, "SamplePeriod" }, { 0x0f, 0x0052, "Gain" }, { 0x0f, 0x0053, "TriggerButton" }, { 0x0f, 0x0054, "TriggerRepeatInterval" }, { 0x0f, 0x0055, "AxesEnable" }, { 0x0f, 0x0056, "DirectionEnable" }, { 0x0f, 0x0057, "Direction" }, { 0x0f, 0x0058, "TypeSpecificBlockOffset" }, { 0x0f, 0x0059, "BlockType" }, { 0x0f, 0x005a, "SetEnvelopeReport" }, { 0x0f, 0x005b, "AttackLevel" }, { 0x0f, 0x005c, "AttackTime" }, { 0x0f, 0x005d, "FadeLevel" }, { 0x0f, 0x005e, "FadeTime" }, { 0x0f, 0x005f, "SetConditionReport" }, { 0x0f, 0x0060, "CenterPointOffset" }, { 0x0f, 0x0061, "PositiveCoefficient" }, { 0x0f, 0x0062, "NegativeCoefficient" }, { 0x0f, 0x0063, "PositiveSaturation" }, { 0x0f, 0x0064, "NegativeSaturation" }, { 0x0f, 0x0065, "DeadBand" }, { 0x0f, 0x0066, "DownloadForceSample" }, { 0x0f, 0x0067, "IsochCustomForceEnable" }, { 0x0f, 0x0068, "CustomForceDataReport" }, { 0x0f, 0x0069, "CustomForceData" }, { 0x0f, 0x006a, "CustomForceVendorDefinedData" }, { 0x0f, 0x006b, "SetCustomForceReport" }, { 0x0f, 0x006c, "CustomForceDataOffset" }, { 0x0f, 0x006d, "SampleCount" }, { 0x0f, 0x006e, "SetPeriodicReport" }, { 0x0f, 0x006f, "Offset" }, { 0x0f, 0x0070, "Magnitude" }, { 0x0f, 0x0071, "Phase" }, { 0x0f, 0x0072, "Period" }, { 0x0f, 0x0073, "SetConstantForceReport" }, { 0x0f, 0x0074, "SetRampForceReport" }, { 0x0f, 0x0075, "RampStart" }, { 0x0f, 0x0076, "RampEnd" }, { 0x0f, 0x0077, "EffectOperationReport" }, { 0x0f, 0x0078, "EffectOperation" }, { 0x0f, 0x0079, "OpEffectStart" }, { 0x0f, 0x007a, "OpEffectStartSolo" }, { 0x0f, 0x007b, "OpEffectStop" }, { 0x0f, 0x007c, "LoopCount" }, { 0x0f, 0x007d, "DeviceGainReport" }, { 0x0f, 0x007e, "DeviceGain" }, { 0x0f, 0x007f, "ParameterBlockPoolsReport" }, { 0x0f, 0x0080, "RAMPoolSize" }, { 0x0f, 0x0081, "ROMPoolSize" }, { 0x0f, 0x0082, "ROMEffectBlockCount" }, { 0x0f, 0x0083, "SimultaneousEffectsMax" }, { 0x0f, 0x0084, "PoolAlignment" }, { 0x0f, 0x0085, "ParameterBlockMoveReport" }, { 0x0f, 0x0086, "MoveSource" }, { 0x0f, 0x0087, "MoveDestination" }, { 0x0f, 0x0088, "MoveLength" }, { 0x0f, 0x0089, "EffectParameterBlockLoadReport" }, { 0x0f, 0x008b, "EffectParameterBlockLoadStatus" }, { 0x0f, 0x008c, "BlockLoadSuccess" }, { 0x0f, 0x008d, "BlockLoadFull" }, { 0x0f, 0x008e, "BlockLoadError" }, { 0x0f, 0x008f, "BlockHandle" }, { 0x0f, 0x0090, "EffectParameterBlockFreeReport" }, { 0x0f, 0x0091, "TypeSpecificBlockHandle" }, { 0x0f, 0x0092, "PIDStateReport" }, { 0x0f, 0x0094, "EffectPlaying" }, { 0x0f, 0x0095, "PIDDeviceControlReport" }, { 0x0f, 0x0096, "PIDDeviceControl" }, { 0x0f, 0x0097, "DCEnableActuators" }, { 0x0f, 0x0098, "DCDisableActuators" }, { 0x0f, 0x0099, "DCStopAllEffects" }, { 0x0f, 0x009a, "DCReset" }, { 0x0f, 0x009b, "DCPause" }, { 0x0f, 0x009c, "DCContinue" }, { 0x0f, 0x009f, "DevicePaused" }, { 0x0f, 0x00a0, "ActuatorsEnabled" }, { 0x0f, 0x00a4, "SafetySwitch" }, { 0x0f, 0x00a5, "ActuatorOverrideSwitch" }, { 0x0f, 0x00a6, "ActuatorPower" }, { 0x0f, 0x00a7, "StartDelay" }, { 0x0f, 0x00a8, "ParameterBlockSize" }, { 0x0f, 0x00a9, "DeviceManagedPool" }, { 0x0f, 0x00aa, "SharedParameterBlocks" }, { 0x0f, 0x00ab, "CreateNewEffectParameterBlockReport" }, { 0x0f, 0x00ac, "RAMPoolAvailable" }, { 0x11, 0, "SoC" }, { 0x11, 0x0001, "SocControl" }, { 0x11, 0x0002, "FirmwareTransfer" }, { 0x11, 0x0003, "FirmwareFileId" }, { 0x11, 0x0004, "FileOffsetInBytes" }, { 0x11, 0x0005, "FileTransferSizeMaxInBytes" }, { 0x11, 0x0006, "FilePayload" }, { 0x11, 0x0007, "FilePayloadSizeInBytes" }, { 0x11, 0x0008, "FilePayloadContainsLastBytes" }, { 0x11, 0x0009, "FileTransferStop" }, { 0x11, 0x000a, "FileTransferTillEnd" }, { 0x12, 0, "EyeandHeadTrackers" }, { 0x12, 0x0001, "EyeTracker" }, { 0x12, 0x0002, "HeadTracker" }, { 0x12, 0x0010, "TrackingData" }, { 0x12, 0x0011, "Capabilities" }, { 0x12, 0x0012, "Configuration" }, { 0x12, 0x0013, "Status" }, { 0x12, 0x0014, "Control" }, { 0x12, 0x0020, "SensorTimestamp" }, { 0x12, 0x0021, "PositionX" }, { 0x12, 0x0022, "PositionY" }, { 0x12, 0x0023, "PositionZ" }, { 0x12, 0x0024, "GazePoint" }, { 0x12, 0x0025, "LeftEyePosition" }, { 0x12, 0x0026, "RightEyePosition" }, { 0x12, 0x0027, "HeadPosition" }, { 0x12, 0x0028, "HeadDirectionPoint" }, { 0x12, 0x0029, "RotationaboutXaxis" }, { 0x12, 0x002a, "RotationaboutYaxis" }, { 0x12, 0x002b, "RotationaboutZaxis" }, { 0x12, 0x0100, "TrackerQuality" }, { 0x12, 0x0101, "MinimumTrackingDistance" }, { 0x12, 0x0102, "OptimumTrackingDistance" }, { 0x12, 0x0103, "MaximumTrackingDistance" }, { 0x12, 0x0104, "MaximumScreenPlaneWidth" }, { 0x12, 0x0105, "MaximumScreenPlaneHeight" }, { 0x12, 0x0200, "DisplayManufacturerID" }, { 0x12, 0x0201, "DisplayProductID" }, { 0x12, 0x0202, "DisplaySerialNumber" }, { 0x12, 0x0203, "DisplayManufacturerDate" }, { 0x12, 0x0204, "CalibratedScreenWidth" }, { 0x12, 0x0205, "CalibratedScreenHeight" }, { 0x12, 0x0300, "SamplingFrequency" }, { 0x12, 0x0301, "ConfigurationStatus" }, { 0x12, 0x0400, "DeviceModeRequest" }, { 0x14, 0, "AuxiliaryDisplay" }, { 0x14, 0x0001, "AlphanumericDisplay" }, { 0x14, 0x0002, "AuxiliaryDisplay" }, { 0x14, 0x0020, "DisplayAttributesReport" }, { 0x14, 0x0021, "ASCIICharacterSet" }, { 0x14, 0x0022, "DataReadBack" }, { 0x14, 0x0023, "FontReadBack" }, { 0x14, 0x0024, "DisplayControlReport" }, { 0x14, 0x0025, "ClearDisplay" }, { 0x14, 0x0026, "DisplayEnable" }, { 0x14, 0x0027, "ScreenSaverDelay" }, { 0x14, 0x0028, "ScreenSaverEnable" }, { 0x14, 0x0029, "VerticalScroll" }, { 0x14, 0x002a, "HorizontalScroll" }, { 0x14, 0x002b, "CharacterReport" }, { 0x14, 0x002c, "DisplayData" }, { 0x14, 0x002d, "DisplayStatus" }, { 0x14, 0x002e, "StatNotReady" }, { 0x14, 0x002f, "StatReady" }, { 0x14, 0x0030, "ErrNotaloadablecharacter" }, { 0x14, 0x0031, "ErrFontdatacannotberead" }, { 0x14, 0x0032, "CursorPositionReport" }, { 0x14, 0x0033, "Row" }, { 0x14, 0x0034, "Column" }, { 0x14, 0x0035, "Rows" }, { 0x14, 0x0036, "Columns" }, { 0x14, 0x0037, "CursorPixelPositioning" }, { 0x14, 0x0038, "CursorMode" }, { 0x14, 0x0039, "CursorEnable" }, { 0x14, 0x003a, "CursorBlink" }, { 0x14, 0x003b, "FontReport" }, { 0x14, 0x003c, "FontData" }, { 0x14, 0x003d, "CharacterWidth" }, { 0x14, 0x003e, "CharacterHeight" }, { 0x14, 0x003f, "CharacterSpacingHorizontal" }, { 0x14, 0x0040, "CharacterSpacingVertical" }, { 0x14, 0x0041, "UnicodeCharacterSet" }, { 0x14, 0x0042, "Font7Segment" }, { 0x14, 0x0043, "7SegmentDirectMap" }, { 0x14, 0x0044, "Font14Segment" }, { 0x14, 0x0045, "14SegmentDirectMap" }, { 0x14, 0x0046, "DisplayBrightness" }, { 0x14, 0x0047, "DisplayContrast" }, { 0x14, 0x0048, "CharacterAttribute" }, { 0x14, 0x0049, "AttributeReadback" }, { 0x14, 0x004a, "AttributeData" }, { 0x14, 0x004b, "CharAttrEnhance" }, { 0x14, 0x004c, "CharAttrUnderline" }, { 0x14, 0x004d, "CharAttrBlink" }, { 0x14, 0x0080, "BitmapSizeX" }, { 0x14, 0x0081, "BitmapSizeY" }, { 0x14, 0x0082, "MaxBlitSize" }, { 0x14, 0x0083, "BitDepthFormat" }, { 0x14, 0x0084, "DisplayOrientation" }, { 0x14, 0x0085, "PaletteReport" }, { 0x14, 0x0086, "PaletteDataSize" }, { 0x14, 0x0087, "PaletteDataOffset" }, { 0x14, 0x0088, "PaletteData" }, { 0x14, 0x008a, "BlitReport" }, { 0x14, 0x008b, "BlitRectangleX1" }, { 0x14, 0x008c, "BlitRectangleY1" }, { 0x14, 0x008d, "BlitRectangleX2" }, { 0x14, 0x008e, "BlitRectangleY2" }, { 0x14, 0x008f, "BlitData" }, { 0x14, 0x0090, "SoftButton" }, { 0x14, 0x0091, "SoftButtonID" }, { 0x14, 0x0092, "SoftButtonSide" }, { 0x14, 0x0093, "SoftButtonOffset1" }, { 0x14, 0x0094, "SoftButtonOffset2" }, { 0x14, 0x0095, "SoftButtonReport" }, { 0x14, 0x00c2, "SoftKeys" }, { 0x14, 0x00cc, "DisplayDataExtensions" }, { 0x14, 0x00cf, "CharacterMapping" }, { 0x14, 0x00dd, "UnicodeEquivalent" }, { 0x14, 0x00df, "CharacterPageMapping" }, { 0x14, 0x00ff, "RequestReport" }, { 0x20, 0, "Sensors" }, { 0x20, 0x0001, "Sensor" }, { 0x20, 0x0010, "Biometric" }, { 0x20, 0x0011, "BiometricHumanPresence" }, { 0x20, 0x0012, "BiometricHumanProximity" }, { 0x20, 0x0013, "BiometricHumanTouch" }, { 0x20, 0x0014, "BiometricBloodPressure" }, { 0x20, 0x0015, "BiometricBodyTemperature" }, { 0x20, 0x0016, "BiometricHeartRate" }, { 0x20, 0x0017, "BiometricHeartRateVariability" }, { 0x20, 0x0018, "BiometricPeripheralOxygenSaturation" }, { 0x20, 0x0019, "BiometricRespiratoryRate" }, { 0x20, 0x0020, "Electrical" }, { 0x20, 0x0021, "ElectricalCapacitance" }, { 0x20, 0x0022, "ElectricalCurrent" }, { 0x20, 0x0023, "ElectricalPower" }, { 0x20, 0x0024, "ElectricalInductance" }, { 0x20, 0x0025, "ElectricalResistance" }, { 0x20, 0x0026, "ElectricalVoltage" }, { 0x20, 0x0027, "ElectricalPotentiometer" }, { 0x20, 0x0028, "ElectricalFrequency" }, { 0x20, 0x0029, "ElectricalPeriod" }, { 0x20, 0x0030, "Environmental" }, { 0x20, 0x0031, "EnvironmentalAtmosphericPressure" }, { 0x20, 0x0032, "EnvironmentalHumidity" }, { 0x20, 0x0033, "EnvironmentalTemperature" }, { 0x20, 0x0034, "EnvironmentalWindDirection" }, { 0x20, 0x0035, "EnvironmentalWindSpeed" }, { 0x20, 0x0036, "EnvironmentalAirQuality" }, { 0x20, 0x0037, "EnvironmentalHeatIndex" }, { 0x20, 0x0038, "EnvironmentalSurfaceTemperature" }, { 0x20, 0x0039, "EnvironmentalVolatileOrganicCompounds" }, { 0x20, 0x003a, "EnvironmentalObjectPresence" }, { 0x20, 0x003b, "EnvironmentalObjectProximity" }, { 0x20, 0x0040, "Light" }, { 0x20, 0x0041, "LightAmbientLight" }, { 0x20, 0x0042, "LightConsumerInfrared" }, { 0x20, 0x0043, "LightInfraredLight" }, { 0x20, 0x0044, "LightVisibleLight" }, { 0x20, 0x0045, "LightUltravioletLight" }, { 0x20, 0x0050, "Location" }, { 0x20, 0x0051, "LocationBroadcast" }, { 0x20, 0x0052, "LocationDeadReckoning" }, { 0x20, 0x0053, "LocationGPSGlobalPositioningSystem" }, { 0x20, 0x0054, "LocationLookup" }, { 0x20, 0x0055, "LocationOther" }, { 0x20, 0x0056, "LocationStatic" }, { 0x20, 0x0057, "LocationTriangulation" }, { 0x20, 0x0060, "Mechanical" }, { 0x20, 0x0061, "MechanicalBooleanSwitch" }, { 0x20, 0x0062, "MechanicalBooleanSwitchArray" }, { 0x20, 0x0063, "MechanicalMultivalueSwitch" }, { 0x20, 0x0064, "MechanicalForce" }, { 0x20, 0x0065, "MechanicalPressure" }, { 0x20, 0x0066, "MechanicalStrain" }, { 0x20, 0x0067, "MechanicalWeight" }, { 0x20, 0x0068, "MechanicalHapticVibrator" }, { 0x20, 0x0069, "MechanicalHallEffectSwitch" }, { 0x20, 0x0070, "Motion" }, { 0x20, 0x0071, "MotionAccelerometer1D" }, { 0x20, 0x0072, "MotionAccelerometer2D" }, { 0x20, 0x0073, "MotionAccelerometer3D" }, { 0x20, 0x0074, "MotionGyrometer1D" }, { 0x20, 0x0075, "MotionGyrometer2D" }, { 0x20, 0x0076, "MotionGyrometer3D" }, { 0x20, 0x0077, "MotionMotionDetector" }, { 0x20, 0x0078, "MotionSpeedometer" }, { 0x20, 0x0079, "MotionAccelerometer" }, { 0x20, 0x007a, "MotionGyrometer" }, { 0x20, 0x007b, "MotionGravityVector" }, { 0x20, 0x007c, "MotionLinearAccelerometer" }, { 0x20, 0x0080, "Orientation" }, { 0x20, 0x0081, "OrientationCompass1D" }, { 0x20, 0x0082, "OrientationCompass2D" }, { 0x20, 0x0083, "OrientationCompass3D" }, { 0x20, 0x0084, "OrientationInclinometer1D" }, { 0x20, 0x0085, "OrientationInclinometer2D" }, { 0x20, 0x0086, "OrientationInclinometer3D" }, { 0x20, 0x0087, "OrientationDistance1D" }, { 0x20, 0x0088, "OrientationDistance2D" }, { 0x20, 0x0089, "OrientationDistance3D" }, { 0x20, 0x008a, "OrientationDeviceOrientation" }, { 0x20, 0x008b, "OrientationCompass" }, { 0x20, 0x008c, "OrientationInclinometer" }, { 0x20, 0x008d, "OrientationDistance" }, { 0x20, 0x008e, "OrientationRelativeOrientation" }, { 0x20, 0x008f, "OrientationSimpleOrientation" }, { 0x20, 0x0090, "Scanner" }, { 0x20, 0x0091, "ScannerBarcode" }, { 0x20, 0x0092, "ScannerRFID" }, { 0x20, 0x0093, "ScannerNFC" }, { 0x20, 0x00a0, "Time" }, { 0x20, 0x00a1, "TimeAlarmTimer" }, { 0x20, 0x00a2, "TimeRealTimeClock" }, { 0x20, 0x00b0, "PersonalActivity" }, { 0x20, 0x00b1, "PersonalActivityActivityDetection" }, { 0x20, 0x00b2, "PersonalActivityDevicePosition" }, { 0x20, 0x00b3, "PersonalActivityFloorTracker" }, { 0x20, 0x00b4, "PersonalActivityPedometer" }, { 0x20, 0x00b5, "PersonalActivityStepDetection" }, { 0x20, 0x00c0, "OrientationExtended" }, { 0x20, 0x00c1, "OrientationExtendedGeomagneticOrientation" }, { 0x20, 0x00c2, "OrientationExtendedMagnetometer" }, { 0x20, 0x00d0, "Gesture" }, { 0x20, 0x00d1, "GestureChassisFlipGesture" }, { 0x20, 0x00d2, "GestureHingeFoldGesture" }, { 0x20, 0x00e0, "Other" }, { 0x20, 0x00e1, "OtherCustom" }, { 0x20, 0x00e2, "OtherGeneric" }, { 0x20, 0x00e3, "OtherGenericEnumerator" }, { 0x20, 0x00e4, "OtherHingeAngle" }, { 0x20, 0x00f0, "VendorReserved1" }, { 0x20, 0x00f1, "VendorReserved2" }, { 0x20, 0x00f2, "VendorReserved3" }, { 0x20, 0x00f3, "VendorReserved4" }, { 0x20, 0x00f4, "VendorReserved5" }, { 0x20, 0x00f5, "VendorReserved6" }, { 0x20, 0x00f6, "VendorReserved7" }, { 0x20, 0x00f7, "VendorReserved8" }, { 0x20, 0x00f8, "VendorReserved9" }, { 0x20, 0x00f9, "VendorReserved10" }, { 0x20, 0x00fa, "VendorReserved11" }, { 0x20, 0x00fb, "VendorReserved12" }, { 0x20, 0x00fc, "VendorReserved13" }, { 0x20, 0x00fd, "VendorReserved14" }, { 0x20, 0x00fe, "VendorReserved15" }, { 0x20, 0x00ff, "VendorReserved16" }, { 0x20, 0x0200, "Event" }, { 0x20, 0x0201, "EventSensorState" }, { 0x20, 0x0202, "EventSensorEvent" }, { 0x20, 0x0300, "Property" }, { 0x20, 0x0301, "PropertyFriendlyName" }, { 0x20, 0x0302, "PropertyPersistentUniqueID" }, { 0x20, 0x0303, "PropertySensorStatus" }, { 0x20, 0x0304, "PropertyMinimumReportInterval" }, { 0x20, 0x0305, "PropertySensorManufacturer" }, { 0x20, 0x0306, "PropertySensorModel" }, { 0x20, 0x0307, "PropertySensorSerialNumber" }, { 0x20, 0x0308, "PropertySensorDescription" }, { 0x20, 0x0309, "PropertySensorConnectionType" }, { 0x20, 0x030a, "PropertySensorDevicePath" }, { 0x20, 0x030b, "PropertyHardwareRevision" }, { 0x20, 0x030c, "PropertyFirmwareVersion" }, { 0x20, 0x030d, "PropertyReleaseDate" }, { 0x20, 0x030e, "PropertyReportInterval" }, { 0x20, 0x030f, "PropertyChangeSensitivityAbsolute" }, { 0x20, 0x0310, "PropertyChangeSensitivityPercentofRange" }, { 0x20, 0x0311, "PropertyChangeSensitivityPercentRelative" }, { 0x20, 0x0312, "PropertyAccuracy" }, { 0x20, 0x0313, "PropertyResolution" }, { 0x20, 0x0314, "PropertyMaximum" }, { 0x20, 0x0315, "PropertyMinimum" }, { 0x20, 0x0316, "PropertyReportingState" }, { 0x20, 0x0317, "PropertySamplingRate" }, { 0x20, 0x0318, "PropertyResponseCurve" }, { 0x20, 0x0319, "PropertyPowerState" }, { 0x20, 0x031a, "PropertyMaximumFIFOEvents" }, { 0x20, 0x031b, "PropertyReportLatency" }, { 0x20, 0x031c, "PropertyFlushFIFOEvents" }, { 0x20, 0x031d, "PropertyMaximumPowerConsumption" }, { 0x20, 0x031e, "PropertyIsPrimary" }, { 0x20, 0x031f, "PropertyHumanPresenceDetectionType" }, { 0x20, 0x0400, "DataFieldLocation" }, { 0x20, 0x0402, "DataFieldAltitudeAntennaSeaLevel" }, { 0x20, 0x0403, "DataFieldDifferentialReferenceStationID" }, { 0x20, 0x0404, "DataFieldAltitudeEllipsoidError" }, { 0x20, 0x0405, "DataFieldAltitudeEllipsoid" }, { 0x20, 0x0406, "DataFieldAltitudeSeaLevelError" }, { 0x20, 0x0407, "DataFieldAltitudeSeaLevel" }, { 0x20, 0x0408, "DataFieldDifferentialGPSDataAge" }, { 0x20, 0x0409, "DataFieldErrorRadius" }, { 0x20, 0x040a, "DataFieldFixQuality" }, { 0x20, 0x040b, "DataFieldFixType" }, { 0x20, 0x040c, "DataFieldGeoidalSeparation" }, { 0x20, 0x040d, "DataFieldGPSOperationMode" }, { 0x20, 0x040e, "DataFieldGPSSelectionMode" }, { 0x20, 0x040f, "DataFieldGPSStatus" }, { 0x20, 0x0410, "DataFieldPositionDilutionofPrecision" }, { 0x20, 0x0411, "DataFieldHorizontalDilutionofPrecision" }, { 0x20, 0x0412, "DataFieldVerticalDilutionofPrecision" }, { 0x20, 0x0413, "DataFieldLatitude" }, { 0x20, 0x0414, "DataFieldLongitude" }, { 0x20, 0x0415, "DataFieldTrueHeading" }, { 0x20, 0x0416, "DataFieldMagneticHeading" }, { 0x20, 0x0417, "DataFieldMagneticVariation" }, { 0x20, 0x0418, "DataFieldSpeed" }, { 0x20, 0x0419, "DataFieldSatellitesinView" }, { 0x20, 0x041a, "DataFieldSatellitesinViewAzimuth" }, { 0x20, 0x041b, "DataFieldSatellitesinViewElevation" }, { 0x20, 0x041c, "DataFieldSatellitesinViewIDs" }, { 0x20, 0x041d, "DataFieldSatellitesinViewPRNs" }, { 0x20, 0x041e, "DataFieldSatellitesinViewSNRatios" }, { 0x20, 0x041f, "DataFieldSatellitesUsedCount" }, { 0x20, 0x0420, "DataFieldSatellitesUsedPRNs" }, { 0x20, 0x0421, "DataFieldNMEASentence" }, { 0x20, 0x0422, "DataFieldAddressLine1" }, { 0x20, 0x0423, "DataFieldAddressLine2" }, { 0x20, 0x0424, "DataFieldCity" }, { 0x20, 0x0425, "DataFieldStateorProvince" }, { 0x20, 0x0426, "DataFieldCountryorRegion" }, { 0x20, 0x0427, "DataFieldPostalCode" }, { 0x20, 0x042a, "PropertyLocation" }, { 0x20, 0x042b, "PropertyLocationDesiredAccuracy" }, { 0x20, 0x0430, "DataFieldEnvironmental" }, { 0x20, 0x0431, "DataFieldAtmosphericPressure" }, { 0x20, 0x0433, "DataFieldRelativeHumidity" }, { 0x20, 0x0434, "DataFieldTemperature" }, { 0x20, 0x0435, "DataFieldWindDirection" }, { 0x20, 0x0436, "DataFieldWindSpeed" }, { 0x20, 0x0437, "DataFieldAirQualityIndex" }, { 0x20, 0x0438, "DataFieldEquivalentCO2" }, { 0x20, 0x0439, "DataFieldVolatileOrganicCompoundConcentration" }, { 0x20, 0x043a, "DataFieldObjectPresence" }, { 0x20, 0x043b, "DataFieldObjectProximityRange" }, { 0x20, 0x043c, "DataFieldObjectProximityOutofRange" }, { 0x20, 0x0440, "PropertyEnvironmental" }, { 0x20, 0x0441, "PropertyReferencePressure" }, { 0x20, 0x0450, "DataFieldMotion" }, { 0x20, 0x0451, "DataFieldMotionState" }, { 0x20, 0x0452, "DataFieldAcceleration" }, { 0x20, 0x0453, "DataFieldAccelerationAxisX" }, { 0x20, 0x0454, "DataFieldAccelerationAxisY" }, { 0x20, 0x0455, "DataFieldAccelerationAxisZ" }, { 0x20, 0x0456, "DataFieldAngularVelocity" }, { 0x20, 0x0457, "DataFieldAngularVelocityaboutXAxis" }, { 0x20, 0x0458, "DataFieldAngularVelocityaboutYAxis" }, { 0x20, 0x0459, "DataFieldAngularVelocityaboutZAxis" }, { 0x20, 0x045a, "DataFieldAngularPosition" }, { 0x20, 0x045b, "DataFieldAngularPositionaboutXAxis" }, { 0x20, 0x045c, "DataFieldAngularPositionaboutYAxis" }, { 0x20, 0x045d, "DataFieldAngularPositionaboutZAxis" }, { 0x20, 0x045e, "DataFieldMotionSpeed" }, { 0x20, 0x045f, "DataFieldMotionIntensity" }, { 0x20, 0x0470, "DataFieldOrientation" }, { 0x20, 0x0471, "DataFieldHeading" }, { 0x20, 0x0472, "DataFieldHeadingXAxis" }, { 0x20, 0x0473, "DataFieldHeadingYAxis" }, { 0x20, 0x0474, "DataFieldHeadingZAxis" }, { 0x20, 0x0475, "DataFieldHeadingCompensatedMagneticNorth" }, { 0x20, 0x0476, "DataFieldHeadingCompensatedTrueNorth" }, { 0x20, 0x0477, "DataFieldHeadingMagneticNorth" }, { 0x20, 0x0478, "DataFieldHeadingTrueNorth" }, { 0x20, 0x0479, "DataFieldDistance" }, { 0x20, 0x047a, "DataFieldDistanceXAxis" }, { 0x20, 0x047b, "DataFieldDistanceYAxis" }, { 0x20, 0x047c, "DataFieldDistanceZAxis" }, { 0x20, 0x047d, "DataFieldDistanceOutofRange" }, { 0x20, 0x047e, "DataFieldTilt" }, { 0x20, 0x047f, "DataFieldTiltXAxis" }, { 0x20, 0x0480, "DataFieldTiltYAxis" }, { 0x20, 0x0481, "DataFieldTiltZAxis" }, { 0x20, 0x0482, "DataFieldRotationMatrix" }, { 0x20, 0x0483, "DataFieldQuaternion" }, { 0x20, 0x0484, "DataFieldMagneticFlux" }, { 0x20, 0x0485, "DataFieldMagneticFluxXAxis" }, { 0x20, 0x0486, "DataFieldMagneticFluxYAxis" }, { 0x20, 0x0487, "DataFieldMagneticFluxZAxis" }, { 0x20, 0x0488, "DataFieldMagnetometerAccuracy" }, { 0x20, 0x0489, "DataFieldSimpleOrientationDirection" }, { 0x20, 0x0490, "DataFieldMechanical" }, { 0x20, 0x0491, "DataFieldBooleanSwitchState" }, { 0x20, 0x0492, "DataFieldBooleanSwitchArrayStates" }, { 0x20, 0x0493, "DataFieldMultivalueSwitchValue" }, { 0x20, 0x0494, "DataFieldForce" }, { 0x20, 0x0495, "DataFieldAbsolutePressure" }, { 0x20, 0x0496, "DataFieldGaugePressure" }, { 0x20, 0x0497, "DataFieldStrain" }, { 0x20, 0x0498, "DataFieldWeight" }, { 0x20, 0x04a0, "PropertyMechanical" }, { 0x20, 0x04a1, "PropertyVibrationState" }, { 0x20, 0x04a2, "PropertyForwardVibrationSpeed" }, { 0x20, 0x04a3, "PropertyBackwardVibrationSpeed" }, { 0x20, 0x04b0, "DataFieldBiometric" }, { 0x20, 0x04b1, "DataFieldHumanPresence" }, { 0x20, 0x04b2, "DataFieldHumanProximityRange" }, { 0x20, 0x04b3, "DataFieldHumanProximityOutofRange" }, { 0x20, 0x04b4, "DataFieldHumanTouchState" }, { 0x20, 0x04b5, "DataFieldBloodPressure" }, { 0x20, 0x04b6, "DataFieldBloodPressureDiastolic" }, { 0x20, 0x04b7, "DataFieldBloodPressureSystolic" }, { 0x20, 0x04b8, "DataFieldHeartRate" }, { 0x20, 0x04b9, "DataFieldRestingHeartRate" }, { 0x20, 0x04ba, "DataFieldHeartbeatInterval" }, { 0x20, 0x04bb, "DataFieldRespiratoryRate" }, { 0x20, 0x04bc, "DataFieldSpO2" }, { 0x20, 0x04bd, "DataFieldHumanAttentionDetected" }, { 0x20, 0x04be, "DataFieldHumanHeadAzimuth" }, { 0x20, 0x04bf, "DataFieldHumanHeadAltitude" }, { 0x20, 0x04c0, "DataFieldHumanHeadRoll" }, { 0x20, 0x04c1, "DataFieldHumanHeadPitch" }, { 0x20, 0x04c2, "DataFieldHumanHeadYaw" }, { 0x20, 0x04c3, "DataFieldHumanCorrelationId" }, { 0x20, 0x04d0, "DataFieldLight" }, { 0x20, 0x04d1, "DataFieldIlluminance" }, { 0x20, 0x04d2, "DataFieldColorTemperature" }, { 0x20, 0x04d3, "DataFieldChromaticity" }, { 0x20, 0x04d4, "DataFieldChromaticityX" }, { 0x20, 0x04d5, "DataFieldChromaticityY" }, { 0x20, 0x04d6, "DataFieldConsumerIRSentenceReceive" }, { 0x20, 0x04d7, "DataFieldInfraredLight" }, { 0x20, 0x04d8, "DataFieldRedLight" }, { 0x20, 0x04d9, "DataFieldGreenLight" }, { 0x20, 0x04da, "DataFieldBlueLight" }, { 0x20, 0x04db, "DataFieldUltravioletALight" }, { 0x20, 0x04dc, "DataFieldUltravioletBLight" }, { 0x20, 0x04dd, "DataFieldUltravioletIndex" }, { 0x20, 0x04de, "DataFieldNearInfraredLight" }, { 0x20, 0x04df, "PropertyLight" }, { 0x20, 0x04e0, "PropertyConsumerIRSentenceSend" }, { 0x20, 0x04e2, "PropertyAutoBrightnessPreferred" }, { 0x20, 0x04e3, "PropertyAutoColorPreferred" }, { 0x20, 0x04f0, "DataFieldScanner" }, { 0x20, 0x04f1, "DataFieldRFIDTag40Bit" }, { 0x20, 0x04f2, "DataFieldNFCSentenceReceive" }, { 0x20, 0x04f8, "PropertyScanner" }, { 0x20, 0x04f9, "PropertyNFCSentenceSend" }, { 0x20, 0x0500, "DataFieldElectrical" }, { 0x20, 0x0501, "DataFieldCapacitance" }, { 0x20, 0x0502, "DataFieldCurrent" }, { 0x20, 0x0503, "DataFieldElectricalPower" }, { 0x20, 0x0504, "DataFieldInductance" }, { 0x20, 0x0505, "DataFieldResistance" }, { 0x20, 0x0506, "DataFieldVoltage" }, { 0x20, 0x0507, "DataFieldFrequency" }, { 0x20, 0x0508, "DataFieldPeriod" }, { 0x20, 0x0509, "DataFieldPercentofRange" }, { 0x20, 0x0520, "DataFieldTime" }, { 0x20, 0x0521, "DataFieldYear" }, { 0x20, 0x0522, "DataFieldMonth" }, { 0x20, 0x0523, "DataFieldDay" }, { 0x20, 0x0524, "DataFieldDayofWeek" }, { 0x20, 0x0525, "DataFieldHour" }, { 0x20, 0x0526, "DataFieldMinute" }, { 0x20, 0x0527, "DataFieldSecond" }, { 0x20, 0x0528, "DataFieldMillisecond" }, { 0x20, 0x0529, "DataFieldTimestamp" }, { 0x20, 0x052a, "DataFieldJulianDayofYear" }, { 0x20, 0x052b, "DataFieldTimeSinceSystemBoot" }, { 0x20, 0x0530, "PropertyTime" }, { 0x20, 0x0531, "PropertyTimeZoneOffsetfromUTC" }, { 0x20, 0x0532, "PropertyTimeZoneName" }, { 0x20, 0x0533, "PropertyDaylightSavingsTimeObserved" }, { 0x20, 0x0534, "PropertyTimeTrimAdjustment" }, { 0x20, 0x0535, "PropertyArmAlarm" }, { 0x20, 0x0540, "DataFieldCustom" }, { 0x20, 0x0541, "DataFieldCustomUsage" }, { 0x20, 0x0542, "DataFieldCustomBooleanArray" }, { 0x20, 0x0543, "DataFieldCustomValue" }, { 0x20, 0x0544, "DataFieldCustomValue1" }, { 0x20, 0x0545, "DataFieldCustomValue2" }, { 0x20, 0x0546, "DataFieldCustomValue3" }, { 0x20, 0x0547, "DataFieldCustomValue4" }, { 0x20, 0x0548, "DataFieldCustomValue5" }, { 0x20, 0x0549, "DataFieldCustomValue6" }, { 0x20, 0x054a, "DataFieldCustomValue7" }, { 0x20, 0x054b, "DataFieldCustomValue8" }, { 0x20, 0x054c, "DataFieldCustomValue9" }, { 0x20, 0x054d, "DataFieldCustomValue10" }, { 0x20, 0x054e, "DataFieldCustomValue11" }, { 0x20, 0x054f, "DataFieldCustomValue12" }, { 0x20, 0x0550, "DataFieldCustomValue13" }, { 0x20, 0x0551, "DataFieldCustomValue14" }, { 0x20, 0x0552, "DataFieldCustomValue15" }, { 0x20, 0x0553, "DataFieldCustomValue16" }, { 0x20, 0x0554, "DataFieldCustomValue17" }, { 0x20, 0x0555, "DataFieldCustomValue18" }, { 0x20, 0x0556, "DataFieldCustomValue19" }, { 0x20, 0x0557, "DataFieldCustomValue20" }, { 0x20, 0x0558, "DataFieldCustomValue21" }, { 0x20, 0x0559, "DataFieldCustomValue22" }, { 0x20, 0x055a, "DataFieldCustomValue23" }, { 0x20, 0x055b, "DataFieldCustomValue24" }, { 0x20, 0x055c, "DataFieldCustomValue25" }, { 0x20, 0x055d, "DataFieldCustomValue26" }, { 0x20, 0x055e, "DataFieldCustomValue27" }, { 0x20, 0x055f, "DataFieldCustomValue28" }, { 0x20, 0x0560, "DataFieldGeneric" }, { 0x20, 0x0561, "DataFieldGenericGUIDorPROPERTYKEY" }, { 0x20, 0x0562, "DataFieldGenericCategoryGUID" }, { 0x20, 0x0563, "DataFieldGenericTypeGUID" }, { 0x20, 0x0564, "DataFieldGenericEventPROPERTYKEY" }, { 0x20, 0x0565, "DataFieldGenericPropertyPROPERTYKEY" }, { 0x20, 0x0566, "DataFieldGenericDataFieldPROPERTYKEY" }, { 0x20, 0x0567, "DataFieldGenericEvent" }, { 0x20, 0x0568, "DataFieldGenericProperty" }, { 0x20, 0x0569, "DataFieldGenericDataField" }, { 0x20, 0x056a, "DataFieldEnumeratorTableRowIndex" }, { 0x20, 0x056b, "DataFieldEnumeratorTableRowCount" }, { 0x20, 0x056c, "DataFieldGenericGUIDorPROPERTYKEYkind" }, { 0x20, 0x056d, "DataFieldGenericGUID" }, { 0x20, 0x056e, "DataFieldGenericPROPERTYKEY" }, { 0x20, 0x056f, "DataFieldGenericTopLevelCollectionID" }, { 0x20, 0x0570, "DataFieldGenericReportID" }, { 0x20, 0x0571, "DataFieldGenericReportItemPositionIndex" }, { 0x20, 0x0572, "DataFieldGenericFirmwareVARTYPE" }, { 0x20, 0x0573, "DataFieldGenericUnitofMeasure" }, { 0x20, 0x0574, "DataFieldGenericUnitExponent" }, { 0x20, 0x0575, "DataFieldGenericReportSize" }, { 0x20, 0x0576, "DataFieldGenericReportCount" }, { 0x20, 0x0580, "PropertyGeneric" }, { 0x20, 0x0581, "PropertyEnumeratorTableRowIndex" }, { 0x20, 0x0582, "PropertyEnumeratorTableRowCount" }, { 0x20, 0x0590, "DataFieldPersonalActivity" }, { 0x20, 0x0591, "DataFieldActivityType" }, { 0x20, 0x0592, "DataFieldActivityState" }, { 0x20, 0x0593, "DataFieldDevicePosition" }, { 0x20, 0x0594, "DataFieldStepCount" }, { 0x20, 0x0595, "DataFieldStepCountReset" }, { 0x20, 0x0596, "DataFieldStepDuration" }, { 0x20, 0x0597, "DataFieldStepType" }, { 0x20, 0x05a0, "PropertyMinimumActivityDetectionInterval" }, { 0x20, 0x05a1, "PropertySupportedActivityTypes" }, { 0x20, 0x05a2, "PropertySubscribedActivityTypes" }, { 0x20, 0x05a3, "PropertySupportedStepTypes" }, { 0x20, 0x05a4, "PropertySubscribedStepTypes" }, { 0x20, 0x05a5, "PropertyFloorHeight" }, { 0x20, 0x05b0, "DataFieldCustomTypeID" }, { 0x20, 0x05c0, "PropertyCustom" }, { 0x20, 0x05c1, "PropertyCustomValue1" }, { 0x20, 0x05c2, "PropertyCustomValue2" }, { 0x20, 0x05c3, "PropertyCustomValue3" }, { 0x20, 0x05c4, "PropertyCustomValue4" }, { 0x20, 0x05c5, "PropertyCustomValue5" }, { 0x20, 0x05c6, "PropertyCustomValue6" }, { 0x20, 0x05c7, "PropertyCustomValue7" }, { 0x20, 0x05c8, "PropertyCustomValue8" }, { 0x20, 0x05c9, "PropertyCustomValue9" }, { 0x20, 0x05ca, "PropertyCustomValue10" }, { 0x20, 0x05cb, "PropertyCustomValue11" }, { 0x20, 0x05cc, "PropertyCustomValue12" }, { 0x20, 0x05cd, "PropertyCustomValue13" }, { 0x20, 0x05ce, "PropertyCustomValue14" }, { 0x20, 0x05cf, "PropertyCustomValue15" }, { 0x20, 0x05d0, "PropertyCustomValue16" }, { 0x20, 0x05e0, "DataFieldHinge" }, { 0x20, 0x05e1, "DataFieldHingeAngle" }, { 0x20, 0x05f0, "DataFieldGestureSensor" }, { 0x20, 0x05f1, "DataFieldGestureState" }, { 0x20, 0x05f2, "DataFieldHingeFoldInitialAngle" }, { 0x20, 0x05f3, "DataFieldHingeFoldFinalAngle" }, { 0x20, 0x05f4, "DataFieldHingeFoldContributingPanel" }, { 0x20, 0x05f5, "DataFieldHingeFoldType" }, { 0x20, 0x0800, "SensorStateUndefined" }, { 0x20, 0x0801, "SensorStateReady" }, { 0x20, 0x0802, "SensorStateNotAvailable" }, { 0x20, 0x0803, "SensorStateNoData" }, { 0x20, 0x0804, "SensorStateInitializing" }, { 0x20, 0x0805, "SensorStateAccessDenied" }, { 0x20, 0x0806, "SensorStateError" }, { 0x20, 0x0810, "SensorEventUnknown" }, { 0x20, 0x0811, "SensorEventStateChanged" }, { 0x20, 0x0812, "SensorEventPropertyChanged" }, { 0x20, 0x0813, "SensorEventDataUpdated" }, { 0x20, 0x0814, "SensorEventPollResponse" }, { 0x20, 0x0815, "SensorEventChangeSensitivity" }, { 0x20, 0x0816, "SensorEventRangeMaximumReached" }, { 0x20, 0x0817, "SensorEventRangeMinimumReached" }, { 0x20, 0x0818, "SensorEventHighThresholdCrossUpward" }, { 0x20, 0x0819, "SensorEventHighThresholdCrossDownward" }, { 0x20, 0x081a, "SensorEventLowThresholdCrossUpward" }, { 0x20, 0x081b, "SensorEventLowThresholdCrossDownward" }, { 0x20, 0x081c, "SensorEventZeroThresholdCrossUpward" }, { 0x20, 0x081d, "SensorEventZeroThresholdCrossDownward" }, { 0x20, 0x081e, "SensorEventPeriodExceeded" }, { 0x20, 0x081f, "SensorEventFrequencyExceeded" }, { 0x20, 0x0820, "SensorEventComplexTrigger" }, { 0x20, 0x0830, "ConnectionTypePCIntegrated" }, { 0x20, 0x0831, "ConnectionTypePCAttached" }, { 0x20, 0x0832, "ConnectionTypePCExternal" }, { 0x20, 0x0840, "ReportingStateReportNoEvents" }, { 0x20, 0x0841, "ReportingStateReportAllEvents" }, { 0x20, 0x0842, "ReportingStateReportThresholdEvents" }, { 0x20, 0x0843, "ReportingStateWakeOnNoEvents" }, { 0x20, 0x0844, "ReportingStateWakeOnAllEvents" }, { 0x20, 0x0845, "ReportingStateWakeOnThresholdEvents" }, { 0x20, 0x0846, "ReportingStateAnytime" }, { 0x20, 0x0850, "PowerStateUndefined" }, { 0x20, 0x0851, "PowerStateD0FullPower" }, { 0x20, 0x0852, "PowerStateD1LowPower" }, { 0x20, 0x0853, "PowerStateD2StandbyPowerwithWakeup" }, { 0x20, 0x0854, "PowerStateD3SleepwithWakeup" }, { 0x20, 0x0855, "PowerStateD4PowerOff" }, { 0x20, 0x0860, "AccuracyDefault" }, { 0x20, 0x0861, "AccuracyHigh" }, { 0x20, 0x0862, "AccuracyMedium" }, { 0x20, 0x0863, "AccuracyLow" }, { 0x20, 0x0870, "FixQualityNoFix" }, { 0x20, 0x0871, "FixQualityGPS" }, { 0x20, 0x0872, "FixQualityDGPS" }, { 0x20, 0x0880, "FixTypeNoFix" }, { 0x20, 0x0881, "FixTypeGPSSPSModeFixValid" }, { 0x20, 0x0882, "FixTypeDGPSSPSModeFixValid" }, { 0x20, 0x0883, "FixTypeGPSPPSModeFixValid" }, { 0x20, 0x0884, "FixTypeRealTimeKinematic" }, { 0x20, 0x0885, "FixTypeFloatRTK" }, { 0x20, 0x0886, "FixTypeEstimateddeadreckoned" }, { 0x20, 0x0887, "FixTypeManualInputMode" }, { 0x20, 0x0888, "FixTypeSimulatorMode" }, { 0x20, 0x0890, "GPSOperationModeManual" }, { 0x20, 0x0891, "GPSOperationModeAutomatic" }, { 0x20, 0x08a0, "GPSSelectionModeAutonomous" }, { 0x20, 0x08a1, "GPSSelectionModeDGPS" }, { 0x20, 0x08a2, "GPSSelectionModeEstimateddeadreckoned" }, { 0x20, 0x08a3, "GPSSelectionModeManualInput" }, { 0x20, 0x08a4, "GPSSelectionModeSimulator" }, { 0x20, 0x08a5, "GPSSelectionModeDataNotValid" }, { 0x20, 0x08b0, "GPSStatusDataValid" }, { 0x20, 0x08b1, "GPSStatusDataNotValid" }, { 0x20, 0x08c0, "DayofWeekSunday" }, { 0x20, 0x08c1, "DayofWeekMonday" }, { 0x20, 0x08c2, "DayofWeekTuesday" }, { 0x20, 0x08c3, "DayofWeekWednesday" }, { 0x20, 0x08c4, "DayofWeekThursday" }, { 0x20, 0x08c5, "DayofWeekFriday" }, { 0x20, 0x08c6, "DayofWeekSaturday" }, { 0x20, 0x08d0, "KindCategory" }, { 0x20, 0x08d1, "KindType" }, { 0x20, 0x08d2, "KindEvent" }, { 0x20, 0x08d3, "KindProperty" }, { 0x20, 0x08d4, "KindDataField" }, { 0x20, 0x08e0, "MagnetometerAccuracyLow" }, { 0x20, 0x08e1, "MagnetometerAccuracyMedium" }, { 0x20, 0x08e2, "MagnetometerAccuracyHigh" }, { 0x20, 0x08f0, "SimpleOrientationDirectionNotRotated" }, { 0x20, 0x08f1, "SimpleOrientationDirectionRotated90DegreesCCW" }, { 0x20, 0x08f2, "SimpleOrientationDirectionRotated180DegreesCCW" }, { 0x20, 0x08f3, "SimpleOrientationDirectionRotated270DegreesCCW" }, { 0x20, 0x08f4, "SimpleOrientationDirectionFaceUp" }, { 0x20, 0x08f5, "SimpleOrientationDirectionFaceDown" }, { 0x20, 0x0900, "VT_NULL" }, { 0x20, 0x0901, "VT_BOOL" }, { 0x20, 0x0902, "VT_UI1" }, { 0x20, 0x0903, "VT_I1" }, { 0x20, 0x0904, "VT_UI2" }, { 0x20, 0x0905, "VT_I2" }, { 0x20, 0x0906, "VT_UI4" }, { 0x20, 0x0907, "VT_I4" }, { 0x20, 0x0908, "VT_UI8" }, { 0x20, 0x0909, "VT_I8" }, { 0x20, 0x090a, "VT_R4" }, { 0x20, 0x090b, "VT_R8" }, { 0x20, 0x090c, "VT_WSTR" }, { 0x20, 0x090d, "VT_STR" }, { 0x20, 0x090e, "VT_CLSID" }, { 0x20, 0x090f, "VT_VECTORVT_UI1" }, { 0x20, 0x0910, "VT_F16E0" }, { 0x20, 0x0911, "VT_F16E1" }, { 0x20, 0x0912, "VT_F16E2" }, { 0x20, 0x0913, "VT_F16E3" }, { 0x20, 0x0914, "VT_F16E4" }, { 0x20, 0x0915, "VT_F16E5" }, { 0x20, 0x0916, "VT_F16E6" }, { 0x20, 0x0917, "VT_F16E7" }, { 0x20, 0x0918, "VT_F16E8" }, { 0x20, 0x0919, "VT_F16E9" }, { 0x20, 0x091a, "VT_F16EA" }, { 0x20, 0x091b, "VT_F16EB" }, { 0x20, 0x091c, "VT_F16EC" }, { 0x20, 0x091d, "VT_F16ED" }, { 0x20, 0x091e, "VT_F16EE" }, { 0x20, 0x091f, "VT_F16EF" }, { 0x20, 0x0920, "VT_F32E0" }, { 0x20, 0x0921, "VT_F32E1" }, { 0x20, 0x0922, "VT_F32E2" }, { 0x20, 0x0923, "VT_F32E3" }, { 0x20, 0x0924, "VT_F32E4" }, { 0x20, 0x0925, "VT_F32E5" }, { 0x20, 0x0926, "VT_F32E6" }, { 0x20, 0x0927, "VT_F32E7" }, { 0x20, 0x0928, "VT_F32E8" }, { 0x20, 0x0929, "VT_F32E9" }, { 0x20, 0x092a, "VT_F32EA" }, { 0x20, 0x092b, "VT_F32EB" }, { 0x20, 0x092c, "VT_F32EC" }, { 0x20, 0x092d, "VT_F32ED" }, { 0x20, 0x092e, "VT_F32EE" }, { 0x20, 0x092f, "VT_F32EF" }, { 0x20, 0x0930, "ActivityTypeUnknown" }, { 0x20, 0x0931, "ActivityTypeStationary" }, { 0x20, 0x0932, "ActivityTypeFidgeting" }, { 0x20, 0x0933, "ActivityTypeWalking" }, { 0x20, 0x0934, "ActivityTypeRunning" }, { 0x20, 0x0935, "ActivityTypeInVehicle" }, { 0x20, 0x0936, "ActivityTypeBiking" }, { 0x20, 0x0937, "ActivityTypeIdle" }, { 0x20, 0x0940, "UnitNotSpecified" }, { 0x20, 0x0941, "UnitLux" }, { 0x20, 0x0942, "UnitDegreesKelvin" }, { 0x20, 0x0943, "UnitDegreesCelsius" }, { 0x20, 0x0944, "UnitPascal" }, { 0x20, 0x0945, "UnitNewton" }, { 0x20, 0x0946, "UnitMetersSecond" }, { 0x20, 0x0947, "UnitKilogram" }, { 0x20, 0x0948, "UnitMeter" }, { 0x20, 0x0949, "UnitMetersSecondSecond" }, { 0x20, 0x094a, "UnitFarad" }, { 0x20, 0x094b, "UnitAmpere" }, { 0x20, 0x094c, "UnitWatt" }, { 0x20, 0x094d, "UnitHenry" }, { 0x20, 0x094e, "UnitOhm" }, { 0x20, 0x094f, "UnitVolt" }, { 0x20, 0x0950, "UnitHertz" }, { 0x20, 0x0951, "UnitBar" }, { 0x20, 0x0952, "UnitDegreesAnticlockwise" }, { 0x20, 0x0953, "UnitDegreesClockwise" }, { 0x20, 0x0954, "UnitDegrees" }, { 0x20, 0x0955, "UnitDegreesSecond" }, { 0x20, 0x0956, "UnitDegreesSecondSecond" }, { 0x20, 0x0957, "UnitKnot" }, { 0x20, 0x0958, "UnitPercent" }, { 0x20, 0x0959, "UnitSecond" }, { 0x20, 0x095a, "UnitMillisecond" }, { 0x20, 0x095b, "UnitG" }, { 0x20, 0x095c, "UnitBytes" }, { 0x20, 0x095d, "UnitMilligauss" }, { 0x20, 0x095e, "UnitBits" }, { 0x20, 0x0960, "ActivityStateNoStateChange" }, { 0x20, 0x0961, "ActivityStateStartActivity" }, { 0x20, 0x0962, "ActivityStateEndActivity" }, { 0x20, 0x0970, "Exponent0" }, { 0x20, 0x0971, "Exponent1" }, { 0x20, 0x0972, "Exponent2" }, { 0x20, 0x0973, "Exponent3" }, { 0x20, 0x0974, "Exponent4" }, { 0x20, 0x0975, "Exponent5" }, { 0x20, 0x0976, "Exponent6" }, { 0x20, 0x0977, "Exponent7" }, { 0x20, 0x0978, "Exponent8" }, { 0x20, 0x0979, "Exponent9" }, { 0x20, 0x097a, "ExponentA" }, { 0x20, 0x097b, "ExponentB" }, { 0x20, 0x097c, "ExponentC" }, { 0x20, 0x097d, "ExponentD" }, { 0x20, 0x097e, "ExponentE" }, { 0x20, 0x097f, "ExponentF" }, { 0x20, 0x0980, "DevicePositionUnknown" }, { 0x20, 0x0981, "DevicePositionUnchanged" }, { 0x20, 0x0982, "DevicePositionOnDesk" }, { 0x20, 0x0983, "DevicePositionInHand" }, { 0x20, 0x0984, "DevicePositionMovinginBag" }, { 0x20, 0x0985, "DevicePositionStationaryinBag" }, { 0x20, 0x0990, "StepTypeUnknown" }, { 0x20, 0x0991, "StepTypeWalking" }, { 0x20, 0x0992, "StepTypeRunning" }, { 0x20, 0x09a0, "GestureStateUnknown" }, { 0x20, 0x09a1, "GestureStateStarted" }, { 0x20, 0x09a2, "GestureStateCompleted" }, { 0x20, 0x09a3, "GestureStateCancelled" }, { 0x20, 0x09b0, "HingeFoldContributingPanelUnknown" }, { 0x20, 0x09b1, "HingeFoldContributingPanelPanel1" }, { 0x20, 0x09b2, "HingeFoldContributingPanelPanel2" }, { 0x20, 0x09b3, "HingeFoldContributingPanelBoth" }, { 0x20, 0x09b4, "HingeFoldTypeUnknown" }, { 0x20, 0x09b5, "HingeFoldTypeIncreasing" }, { 0x20, 0x09b6, "HingeFoldTypeDecreasing" }, { 0x20, 0x09c0, "HumanPresenceDetectionTypeVendorDefinedNonBiometric" }, { 0x20, 0x09c1, "HumanPresenceDetectionTypeVendorDefinedBiometric" }, { 0x20, 0x09c2, "HumanPresenceDetectionTypeFacialBiometric" }, { 0x20, 0x09c3, "HumanPresenceDetectionTypeAudioBiometric" }, { 0x20, 0x1000, "ModifierChangeSensitivityAbsolute" }, { 0x20, 0x2000, "ModifierMaximum" }, { 0x20, 0x3000, "ModifierMinimum" }, { 0x20, 0x4000, "ModifierAccuracy" }, { 0x20, 0x5000, "ModifierResolution" }, { 0x20, 0x6000, "ModifierThresholdHigh" }, { 0x20, 0x7000, "ModifierThresholdLow" }, { 0x20, 0x8000, "ModifierCalibrationOffset" }, { 0x20, 0x9000, "ModifierCalibrationMultiplier" }, { 0x20, 0xa000, "ModifierReportInterval" }, { 0x20, 0xb000, "ModifierFrequencyMax" }, { 0x20, 0xc000, "ModifierPeriodMax" }, { 0x20, 0xd000, "ModifierChangeSensitivityPercentofRange" }, { 0x20, 0xe000, "ModifierChangeSensitivityPercentRelative" }, { 0x20, 0xf000, "ModifierVendorReserved" }, { 0x40, 0, "MedicalInstrument" }, { 0x40, 0x0001, "MedicalUltrasound" }, { 0x40, 0x0020, "VCRAcquisition" }, { 0x40, 0x0021, "FreezeThaw" }, { 0x40, 0x0022, "ClipStore" }, { 0x40, 0x0023, "Update" }, { 0x40, 0x0024, "Next" }, { 0x40, 0x0025, "Save" }, { 0x40, 0x0026, "Print" }, { 0x40, 0x0027, "MicrophoneEnable" }, { 0x40, 0x0040, "Cine" }, { 0x40, 0x0041, "TransmitPower" }, { 0x40, 0x0042, "Volume" }, { 0x40, 0x0043, "Focus" }, { 0x40, 0x0044, "Depth" }, { 0x40, 0x0060, "SoftStepPrimary" }, { 0x40, 0x0061, "SoftStepSecondary" }, { 0x40, 0x0070, "DepthGainCompensation" }, { 0x40, 0x0080, "ZoomSelect" }, { 0x40, 0x0081, "ZoomAdjust" }, { 0x40, 0x0082, "SpectralDopplerModeSelect" }, { 0x40, 0x0083, "SpectralDopplerAdjust" }, { 0x40, 0x0084, "ColorDopplerModeSelect" }, { 0x40, 0x0085, "ColorDopplerAdjust" }, { 0x40, 0x0086, "MotionModeSelect" }, { 0x40, 0x0087, "MotionModeAdjust" }, { 0x40, 0x0088, "2DModeSelect" }, { 0x40, 0x0089, "2DModeAdjust" }, { 0x40, 0x00a0, "SoftControlSelect" }, { 0x40, 0x00a1, "SoftControlAdjust" }, { 0x41, 0, "BrailleDisplay" }, { 0x41, 0x0001, "BrailleDisplay" }, { 0x41, 0x0002, "BrailleRow" }, { 0x41, 0x0003, "8DotBrailleCell" }, { 0x41, 0x0004, "6DotBrailleCell" }, { 0x41, 0x0005, "NumberofBrailleCells" }, { 0x41, 0x0006, "ScreenReaderControl" }, { 0x41, 0x0007, "ScreenReaderIdentifier" }, { 0x41, 0x00fa, "RouterSet1" }, { 0x41, 0x00fb, "RouterSet2" }, { 0x41, 0x00fc, "RouterSet3" }, { 0x41, 0x0100, "RouterKey" }, { 0x41, 0x0101, "RowRouterKey" }, { 0x41, 0x0200, "BrailleButtons" }, { 0x41, 0x0201, "BrailleKeyboardDot1" }, { 0x41, 0x0202, "BrailleKeyboardDot2" }, { 0x41, 0x0203, "BrailleKeyboardDot3" }, { 0x41, 0x0204, "BrailleKeyboardDot4" }, { 0x41, 0x0205, "BrailleKeyboardDot5" }, { 0x41, 0x0206, "BrailleKeyboardDot6" }, { 0x41, 0x0207, "BrailleKeyboardDot7" }, { 0x41, 0x0208, "BrailleKeyboardDot8" }, { 0x41, 0x0209, "BrailleKeyboardSpace" }, { 0x41, 0x020a, "BrailleKeyboardLeftSpace" }, { 0x41, 0x020b, "BrailleKeyboardRightSpace" }, { 0x41, 0x020c, "BrailleFaceControls" }, { 0x41, 0x020d, "BrailleLeftControls" }, { 0x41, 0x020e, "BrailleRightControls" }, { 0x41, 0x020f, "BrailleTopControls" }, { 0x41, 0x0210, "BrailleJoystickCenter" }, { 0x41, 0x0211, "BrailleJoystickUp" }, { 0x41, 0x0212, "BrailleJoystickDown" }, { 0x41, 0x0213, "BrailleJoystickLeft" }, { 0x41, 0x0214, "BrailleJoystickRight" }, { 0x41, 0x0215, "BrailleDPadCenter" }, { 0x41, 0x0216, "BrailleDPadUp" }, { 0x41, 0x0217, "BrailleDPadDown" }, { 0x41, 0x0218, "BrailleDPadLeft" }, { 0x41, 0x0219, "BrailleDPadRight" }, { 0x41, 0x021a, "BraillePanLeft" }, { 0x41, 0x021b, "BraillePanRight" }, { 0x41, 0x021c, "BrailleRockerUp" }, { 0x41, 0x021d, "BrailleRockerDown" }, { 0x41, 0x021e, "BrailleRockerPress" }, { 0x59, 0, "LightingAndIllumination" }, { 0x59, 0x0001, "LampArray" }, { 0x59, 0x0002, "LampArrayAttributesReport" }, { 0x59, 0x0003, "LampCount" }, { 0x59, 0x0004, "BoundingBoxWidthInMicrometers" }, { 0x59, 0x0005, "BoundingBoxHeightInMicrometers" }, { 0x59, 0x0006, "BoundingBoxDepthInMicrometers" }, { 0x59, 0x0007, "LampArrayKind" }, { 0x59, 0x0008, "MinUpdateIntervalInMicroseconds" }, { 0x59, 0x0020, "LampAttributesRequestReport" }, { 0x59, 0x0021, "LampId" }, { 0x59, 0x0022, "LampAttributesResponseReport" }, { 0x59, 0x0023, "PositionXInMicrometers" }, { 0x59, 0x0024, "PositionYInMicrometers" }, { 0x59, 0x0025, "PositionZInMicrometers" }, { 0x59, 0x0026, "LampPurposes" }, { 0x59, 0x0027, "UpdateLatencyInMicroseconds" }, { 0x59, 0x0028, "RedLevelCount" }, { 0x59, 0x0029, "GreenLevelCount" }, { 0x59, 0x002a, "BlueLevelCount" }, { 0x59, 0x002b, "IntensityLevelCount" }, { 0x59, 0x002c, "IsProgrammable" }, { 0x59, 0x002d, "InputBinding" }, { 0x59, 0x0050, "LampMultiUpdateReport" }, { 0x59, 0x0051, "RedUpdateChannel" }, { 0x59, 0x0052, "GreenUpdateChannel" }, { 0x59, 0x0053, "BlueUpdateChannel" }, { 0x59, 0x0054, "IntensityUpdateChannel" }, { 0x59, 0x0055, "LampUpdateFlags" }, { 0x59, 0x0060, "LampRangeUpdateReport" }, { 0x59, 0x0061, "LampIdStart" }, { 0x59, 0x0062, "LampIdEnd" }, { 0x59, 0x0070, "LampArrayControlReport" }, { 0x59, 0x0071, "AutonomousMode" }, { 0x80, 0, "Monitor" }, { 0x80, 0x0001, "MonitorControl" }, { 0x80, 0x0002, "EDIDInformation" }, { 0x80, 0x0003, "VDIFInformation" }, { 0x80, 0x0004, "VESAVersion" }, { 0x81, 0, "MonitorEnumerated" }, { 0x82, 0, "VESAVirtualControls" }, { 0x82, 0x0001, "Degauss" }, { 0x82, 0x0010, "Brightness" }, { 0x82, 0x0012, "Contrast" }, { 0x82, 0x0016, "RedVideoGain" }, { 0x82, 0x0018, "GreenVideoGain" }, { 0x82, 0x001a, "BlueVideoGain" }, { 0x82, 0x001c, "Focus" }, { 0x82, 0x0020, "HorizontalPosition" }, { 0x82, 0x0022, "HorizontalSize" }, { 0x82, 0x0024, "HorizontalPincushion" }, { 0x82, 0x0026, "HorizontalPincushionBalance" }, { 0x82, 0x0028, "HorizontalMisconvergence" }, { 0x82, 0x002a, "HorizontalLinearity" }, { 0x82, 0x002c, "HorizontalLinearityBalance" }, { 0x82, 0x0030, "VerticalPosition" }, { 0x82, 0x0032, "VerticalSize" }, { 0x82, 0x0034, "VerticalPincushion" }, { 0x82, 0x0036, "VerticalPincushionBalance" }, { 0x82, 0x0038, "VerticalMisconvergence" }, { 0x82, 0x003a, "VerticalLinearity" }, { 0x82, 0x003c, "VerticalLinearityBalance" }, { 0x82, 0x0040, "ParallelogramDistortionKeyBalance" }, { 0x82, 0x0042, "TrapezoidalDistortionKey" }, { 0x82, 0x0044, "TiltRotation" }, { 0x82, 0x0046, "TopCornerDistortionControl" }, { 0x82, 0x0048, "TopCornerDistortionBalance" }, { 0x82, 0x004a, "BottomCornerDistortionControl" }, { 0x82, 0x004c, "BottomCornerDistortionBalance" }, { 0x82, 0x0056, "HorizontalMoire" }, { 0x82, 0x0058, "VerticalMoire" }, { 0x82, 0x005e, "InputLevelSelect" }, { 0x82, 0x0060, "InputSourceSelect" }, { 0x82, 0x006c, "RedVideoBlackLevel" }, { 0x82, 0x006e, "GreenVideoBlackLevel" }, { 0x82, 0x0070, "BlueVideoBlackLevel" }, { 0x82, 0x00a2, "AutoSizeCenter" }, { 0x82, 0x00a4, "PolarityHorizontalSynchronization" }, { 0x82, 0x00a6, "PolarityVerticalSynchronization" }, { 0x82, 0x00a8, "SynchronizationType" }, { 0x82, 0x00aa, "ScreenOrientation" }, { 0x82, 0x00ac, "HorizontalFrequency" }, { 0x82, 0x00ae, "VerticalFrequency" }, { 0x82, 0x00b0, "Settings" }, { 0x82, 0x00ca, "OnScreenDisplay" }, { 0x82, 0x00d4, "StereoMode" }, { 0x84, 0, "Power" }, { 0x84, 0x0001, "iName" }, { 0x84, 0x0002, "PresentStatus" }, { 0x84, 0x0003, "ChangedStatus" }, { 0x84, 0x0004, "UPS" }, { 0x84, 0x0005, "PowerSupply" }, { 0x84, 0x0010, "BatterySystem" }, { 0x84, 0x0011, "BatterySystemId" }, { 0x84, 0x0012, "Battery" }, { 0x84, 0x0013, "BatteryId" }, { 0x84, 0x0014, "Charger" }, { 0x84, 0x0015, "ChargerId" }, { 0x84, 0x0016, "PowerConverter" }, { 0x84, 0x0017, "PowerConverterId" }, { 0x84, 0x0018, "OutletSystem" }, { 0x84, 0x0019, "OutletSystemId" }, { 0x84, 0x001a, "Input" }, { 0x84, 0x001b, "InputId" }, { 0x84, 0x001c, "Output" }, { 0x84, 0x001d, "OutputId" }, { 0x84, 0x001e, "Flow" }, { 0x84, 0x001f, "FlowId" }, { 0x84, 0x0020, "Outlet" }, { 0x84, 0x0021, "OutletId" }, { 0x84, 0x0022, "Gang" }, { 0x84, 0x0023, "GangId" }, { 0x84, 0x0024, "PowerSummary" }, { 0x84, 0x0025, "PowerSummaryId" }, { 0x84, 0x0030, "Voltage" }, { 0x84, 0x0031, "Current" }, { 0x84, 0x0032, "Frequency" }, { 0x84, 0x0033, "ApparentPower" }, { 0x84, 0x0034, "ActivePower" }, { 0x84, 0x0035, "PercentLoad" }, { 0x84, 0x0036, "Temperature" }, { 0x84, 0x0037, "Humidity" }, { 0x84, 0x0038, "BadCount" }, { 0x84, 0x0040, "ConfigVoltage" }, { 0x84, 0x0041, "ConfigCurrent" }, { 0x84, 0x0042, "ConfigFrequency" }, { 0x84, 0x0043, "ConfigApparentPower" }, { 0x84, 0x0044, "ConfigActivePower" }, { 0x84, 0x0045, "ConfigPercentLoad" }, { 0x84, 0x0046, "ConfigTemperature" }, { 0x84, 0x0047, "ConfigHumidity" }, { 0x84, 0x0050, "SwitchOnControl" }, { 0x84, 0x0051, "SwitchOffControl" }, { 0x84, 0x0052, "ToggleControl" }, { 0x84, 0x0053, "LowVoltageTransfer" }, { 0x84, 0x0054, "HighVoltageTransfer" }, { 0x84, 0x0055, "DelayBeforeReboot" }, { 0x84, 0x0056, "DelayBeforeStartup" }, { 0x84, 0x0057, "DelayBeforeShutdown" }, { 0x84, 0x0058, "Test" }, { 0x84, 0x0059, "ModuleReset" }, { 0x84, 0x005a, "AudibleAlarmControl" }, { 0x84, 0x0060, "Present" }, { 0x84, 0x0061, "Good" }, { 0x84, 0x0062, "InternalFailure" }, { 0x84, 0x0063, "VoltagOutOfRange" }, { 0x84, 0x0064, "FrequencyOutOfRange" }, { 0x84, 0x0065, "Overload" }, { 0x84, 0x0066, "OverCharged" }, { 0x84, 0x0067, "OverTemperature" }, { 0x84, 0x0068, "ShutdownRequested" }, { 0x84, 0x0069, "ShutdownImminent" }, { 0x84, 0x006b, "SwitchOnOff" }, { 0x84, 0x006c, "Switchable" }, { 0x84, 0x006d, "Used" }, { 0x84, 0x006e, "Boost" }, { 0x84, 0x006f, "Buck" }, { 0x84, 0x0070, "Initialized" }, { 0x84, 0x0071, "Tested" }, { 0x84, 0x0072, "AwaitingPower" }, { 0x84, 0x0073, "CommunicationLost" }, { 0x84, 0x00fd, "iManufacturer" }, { 0x84, 0x00fe, "iProduct" }, { 0x84, 0x00ff, "iSerialNumber" }, { 0x85, 0, "BatterySystem" }, { 0x85, 0x0001, "SmartBatteryBatteryMode" }, { 0x85, 0x0002, "SmartBatteryBatteryStatus" }, { 0x85, 0x0003, "SmartBatteryAlarmWarning" }, { 0x85, 0x0004, "SmartBatteryChargerMode" }, { 0x85, 0x0005, "SmartBatteryChargerStatus" }, { 0x85, 0x0006, "SmartBatteryChargerSpecInfo" }, { 0x85, 0x0007, "SmartBatterySelectorState" }, { 0x85, 0x0008, "SmartBatterySelectorPresets" }, { 0x85, 0x0009, "SmartBatterySelectorInfo" }, { 0x85, 0x0010, "OptionalMfgFunction1" }, { 0x85, 0x0011, "OptionalMfgFunction2" }, { 0x85, 0x0012, "OptionalMfgFunction3" }, { 0x85, 0x0013, "OptionalMfgFunction4" }, { 0x85, 0x0014, "OptionalMfgFunction5" }, { 0x85, 0x0015, "ConnectionToSMBus" }, { 0x85, 0x0016, "OutputConnection" }, { 0x85, 0x0017, "ChargerConnection" }, { 0x85, 0x0018, "BatteryInsertion" }, { 0x85, 0x0019, "UseNext" }, { 0x85, 0x001a, "OKToUse" }, { 0x85, 0x001b, "BatterySupported" }, { 0x85, 0x001c, "SelectorRevision" }, { 0x85, 0x001d, "ChargingIndicator" }, { 0x85, 0x0028, "ManufacturerAccess" }, { 0x85, 0x0029, "RemainingCapacityLimit" }, { 0x85, 0x002a, "RemainingTimeLimit" }, { 0x85, 0x002b, "AtRate" }, { 0x85, 0x002c, "CapacityMode" }, { 0x85, 0x002d, "BroadcastToCharger" }, { 0x85, 0x002e, "PrimaryBattery" }, { 0x85, 0x002f, "ChargeController" }, { 0x85, 0x0040, "TerminateCharge" }, { 0x85, 0x0041, "TerminateDischarge" }, { 0x85, 0x0042, "BelowRemainingCapacityLimit" }, { 0x85, 0x0043, "RemainingTimeLimitExpired" }, { 0x85, 0x0044, "Charging" }, { 0x85, 0x0045, "Discharging" }, { 0x85, 0x0046, "FullyCharged" }, { 0x85, 0x0047, "FullyDischarged" }, { 0x85, 0x0048, "ConditioningFlag" }, { 0x85, 0x0049, "AtRateOK" }, { 0x85, 0x004a, "SmartBatteryErrorCode" }, { 0x85, 0x004b, "NeedReplacement" }, { 0x85, 0x0060, "AtRateTimeToFull" }, { 0x85, 0x0061, "AtRateTimeToEmpty" }, { 0x85, 0x0062, "AverageCurrent" }, { 0x85, 0x0063, "MaxError" }, { 0x85, 0x0064, "RelativeStateOfCharge" }, { 0x85, 0x0065, "AbsoluteStateOfCharge" }, { 0x85, 0x0066, "RemainingCapacity" }, { 0x85, 0x0067, "FullChargeCapacity" }, { 0x85, 0x0068, "RunTimeToEmpty" }, { 0x85, 0x0069, "AverageTimeToEmpty" }, { 0x85, 0x006a, "AverageTimeToFull" }, { 0x85, 0x006b, "CycleCount" }, { 0x85, 0x0080, "BatteryPackModelLevel" }, { 0x85, 0x0081, "InternalChargeController" }, { 0x85, 0x0082, "PrimaryBatterySupport" }, { 0x85, 0x0083, "DesignCapacity" }, { 0x85, 0x0084, "SpecificationInfo" }, { 0x85, 0x0085, "ManufactureDate" }, { 0x85, 0x0086, "SerialNumber" }, { 0x85, 0x0087, "iManufacturerName" }, { 0x85, 0x0088, "iDeviceName" }, { 0x85, 0x0089, "iDeviceChemistry" }, { 0x85, 0x008a, "ManufacturerData" }, { 0x85, 0x008b, "Rechargable" }, { 0x85, 0x008c, "WarningCapacityLimit" }, { 0x85, 0x008d, "CapacityGranularity1" }, { 0x85, 0x008e, "CapacityGranularity2" }, { 0x85, 0x008f, "iOEMInformation" }, { 0x85, 0x00c0, "InhibitCharge" }, { 0x85, 0x00c1, "EnablePolling" }, { 0x85, 0x00c2, "ResetToZero" }, { 0x85, 0x00d0, "ACPresent" }, { 0x85, 0x00d1, "BatteryPresent" }, { 0x85, 0x00d2, "PowerFail" }, { 0x85, 0x00d3, "AlarmInhibited" }, { 0x85, 0x00d4, "ThermistorUnderRange" }, { 0x85, 0x00d5, "ThermistorHot" }, { 0x85, 0x00d6, "ThermistorCold" }, { 0x85, 0x00d7, "ThermistorOverRange" }, { 0x85, 0x00d8, "VoltageOutOfRange" }, { 0x85, 0x00d9, "CurrentOutOfRange" }, { 0x85, 0x00da, "CurrentNotRegulated" }, { 0x85, 0x00db, "VoltageNotRegulated" }, { 0x85, 0x00dc, "MasterMode" }, { 0x85, 0x00f0, "ChargerSelectorSupport" }, { 0x85, 0x00f1, "ChargerSpec" }, { 0x85, 0x00f2, "Level2" }, { 0x85, 0x00f3, "Level3" }, { 0x8c, 0, "BarcodeScanner" }, { 0x8c, 0x0001, "BarcodeBadgeReader" }, { 0x8c, 0x0002, "BarcodeScanner" }, { 0x8c, 0x0003, "DumbBarCodeScanner" }, { 0x8c, 0x0004, "CordlessScannerBase" }, { 0x8c, 0x0005, "BarCodeScannerCradle" }, { 0x8c, 0x0010, "AttributeReport" }, { 0x8c, 0x0011, "SettingsReport" }, { 0x8c, 0x0012, "ScannedDataReport" }, { 0x8c, 0x0013, "RawScannedDataReport" }, { 0x8c, 0x0014, "TriggerReport" }, { 0x8c, 0x0015, "StatusReport" }, { 0x8c, 0x0016, "UPCEANControlReport" }, { 0x8c, 0x0017, "EAN23LabelControlReport" }, { 0x8c, 0x0018, "Code39ControlReport" }, { 0x8c, 0x0019, "Interleaved2of5ControlReport" }, { 0x8c, 0x001a, "Standard2of5ControlReport" }, { 0x8c, 0x001b, "MSIPlesseyControlReport" }, { 0x8c, 0x001c, "CodabarControlReport" }, { 0x8c, 0x001d, "Code128ControlReport" }, { 0x8c, 0x001e, "Misc1DControlReport" }, { 0x8c, 0x001f, "2DControlReport" }, { 0x8c, 0x0030, "AimingPointerMode" }, { 0x8c, 0x0031, "BarCodePresentSensor" }, { 0x8c, 0x0032, "Class1ALaser" }, { 0x8c, 0x0033, "Class2Laser" }, { 0x8c, 0x0034, "HeaterPresent" }, { 0x8c, 0x0035, "ContactScanner" }, { 0x8c, 0x0036, "ElectronicArticleSurveillanceNotification" }, { 0x8c, 0x0037, "ConstantElectronicArticleSurveillance" }, { 0x8c, 0x0038, "ErrorIndication" }, { 0x8c, 0x0039, "FixedBeeper" }, { 0x8c, 0x003a, "GoodDecodeIndication" }, { 0x8c, 0x003b, "HandsFreeScanning" }, { 0x8c, 0x003c, "IntrinsicallySafe" }, { 0x8c, 0x003d, "KlasseEinsLaser" }, { 0x8c, 0x003e, "LongRangeScanner" }, { 0x8c, 0x003f, "MirrorSpeedControl" }, { 0x8c, 0x0040, "NotOnFileIndication" }, { 0x8c, 0x0041, "ProgrammableBeeper" }, { 0x8c, 0x0042, "Triggerless" }, { 0x8c, 0x0043, "Wand" }, { 0x8c, 0x0044, "WaterResistant" }, { 0x8c, 0x0045, "MultiRangeScanner" }, { 0x8c, 0x0046, "ProximitySensor" }, { 0x8c, 0x004d, "FragmentDecoding" }, { 0x8c, 0x004e, "ScannerReadConfidence" }, { 0x8c, 0x004f, "DataPrefix" }, { 0x8c, 0x0050, "PrefixAIMI" }, { 0x8c, 0x0051, "PrefixNone" }, { 0x8c, 0x0052, "PrefixProprietary" }, { 0x8c, 0x0055, "ActiveTime" }, { 0x8c, 0x0056, "AimingLaserPattern" }, { 0x8c, 0x0057, "BarCodePresent" }, { 0x8c, 0x0058, "BeeperState" }, { 0x8c, 0x0059, "LaserOnTime" }, { 0x8c, 0x005a, "LaserState" }, { 0x8c, 0x005b, "LockoutTime" }, { 0x8c, 0x005c, "MotorState" }, { 0x8c, 0x005d, "MotorTimeout" }, { 0x8c, 0x005e, "PowerOnResetScanner" }, { 0x8c, 0x005f, "PreventReadofBarcodes" }, { 0x8c, 0x0060, "InitiateBarcodeRead" }, { 0x8c, 0x0061, "TriggerState" }, { 0x8c, 0x0062, "TriggerMode" }, { 0x8c, 0x0063, "TriggerModeBlinkingLaserOn" }, { 0x8c, 0x0064, "TriggerModeContinuousLaserOn" }, { 0x8c, 0x0065, "TriggerModeLaseronwhilePulled" }, { 0x8c, 0x0066, "TriggerModeLaserstaysonafterrelease" }, { 0x8c, 0x006d, "CommitParameterstoNVM" }, { 0x8c, 0x006e, "ParameterScanning" }, { 0x8c, 0x006f, "ParametersChanged" }, { 0x8c, 0x0070, "Setparameterdefaultvalues" }, { 0x8c, 0x0075, "ScannerInCradle" }, { 0x8c, 0x0076, "ScannerInRange" }, { 0x8c, 0x007a, "AimDuration" }, { 0x8c, 0x007b, "GoodReadLampDuration" }, { 0x8c, 0x007c, "GoodReadLampIntensity" }, { 0x8c, 0x007d, "GoodReadLED" }, { 0x8c, 0x007e, "GoodReadToneFrequency" }, { 0x8c, 0x007f, "GoodReadToneLength" }, { 0x8c, 0x0080, "GoodReadToneVolume" }, { 0x8c, 0x0082, "NoReadMessage" }, { 0x8c, 0x0083, "NotonFileVolume" }, { 0x8c, 0x0084, "PowerupBeep" }, { 0x8c, 0x0085, "SoundErrorBeep" }, { 0x8c, 0x0086, "SoundGoodReadBeep" }, { 0x8c, 0x0087, "SoundNotOnFileBeep" }, { 0x8c, 0x0088, "GoodReadWhentoWrite" }, { 0x8c, 0x0089, "GRWTIAfterDecode" }, { 0x8c, 0x008a, "GRWTIBeepLampaftertransmit" }, { 0x8c, 0x008b, "GRWTINoBeepLampuseatall" }, { 0x8c, 0x0091, "BooklandEAN" }, { 0x8c, 0x0092, "ConvertEAN8to13Type" }, { 0x8c, 0x0093, "ConvertUPCAtoEAN13" }, { 0x8c, 0x0094, "ConvertUPCEtoA" }, { 0x8c, 0x0095, "EAN13" }, { 0x8c, 0x0096, "EAN8" }, { 0x8c, 0x0097, "EAN99128Mandatory" }, { 0x8c, 0x0098, "EAN99P5128Optional" }, { 0x8c, 0x0099, "EnableEANTwoLabel" }, { 0x8c, 0x009a, "UPCEAN" }, { 0x8c, 0x009b, "UPCEANCouponCode" }, { 0x8c, 0x009c, "UPCEANPeriodicals" }, { 0x8c, 0x009d, "UPCA" }, { 0x8c, 0x009e, "UPCAwith128Mandatory" }, { 0x8c, 0x009f, "UPCAwith128Optional" }, { 0x8c, 0x00a0, "UPCAwithP5Optional" }, { 0x8c, 0x00a1, "UPCE" }, { 0x8c, 0x00a2, "UPCE1" }, { 0x8c, 0x00a9, "Periodical" }, { 0x8c, 0x00aa, "PeriodicalAutoDiscriminate2" }, { 0x8c, 0x00ab, "PeriodicalOnlyDecodewith2" }, { 0x8c, 0x00ac, "PeriodicalIgnore2" }, { 0x8c, 0x00ad, "PeriodicalAutoDiscriminate5" }, { 0x8c, 0x00ae, "PeriodicalOnlyDecodewith5" }, { 0x8c, 0x00af, "PeriodicalIgnore5" }, { 0x8c, 0x00b0, "Check" }, { 0x8c, 0x00b1, "CheckDisablePrice" }, { 0x8c, 0x00b2, "CheckEnable4digitPrice" }, { 0x8c, 0x00b3, "CheckEnable5digitPrice" }, { 0x8c, 0x00b4, "CheckEnableEuropean4digitPrice" }, { 0x8c, 0x00b5, "CheckEnableEuropean5digitPrice" }, { 0x8c, 0x00b7, "EANTwoLabel" }, { 0x8c, 0x00b8, "EANThreeLabel" }, { 0x8c, 0x00b9, "EAN8FlagDigit1" }, { 0x8c, 0x00ba, "EAN8FlagDigit2" }, { 0x8c, 0x00bb, "EAN8FlagDigit3" }, { 0x8c, 0x00bc, "EAN13FlagDigit1" }, { 0x8c, 0x00bd, "EAN13FlagDigit2" }, { 0x8c, 0x00be, "EAN13FlagDigit3" }, { 0x8c, 0x00bf, "AddEAN23LabelDefinition" }, { 0x8c, 0x00c0, "ClearallEAN23LabelDefinitions" }, { 0x8c, 0x00c3, "Codabar" }, { 0x8c, 0x00c4, "Code128" }, { 0x8c, 0x00c7, "Code39" }, { 0x8c, 0x00c8, "Code93" }, { 0x8c, 0x00c9, "FullASCIIConversion" }, { 0x8c, 0x00ca, "Interleaved2of5" }, { 0x8c, 0x00cb, "ItalianPharmacyCode" }, { 0x8c, 0x00cc, "MSIPlessey" }, { 0x8c, 0x00cd, "Standard2of5IATA" }, { 0x8c, 0x00ce, "Standard2of5" }, { 0x8c, 0x00d3, "TransmitStartStop" }, { 0x8c, 0x00d4, "TriOptic" }, { 0x8c, 0x00d5, "UCCEAN128" }, { 0x8c, 0x00d6, "CheckDigit" }, { 0x8c, 0x00d7, "CheckDigitDisable" }, { 0x8c, 0x00d8, "CheckDigitEnableInterleaved2of5OPCC" }, { 0x8c, 0x00d9, "CheckDigitEnableInterleaved2of5USS" }, { 0x8c, 0x00da, "CheckDigitEnableStandard2of5OPCC" }, { 0x8c, 0x00db, "CheckDigitEnableStandard2of5USS" }, { 0x8c, 0x00dc, "CheckDigitEnableOneMSIPlessey" }, { 0x8c, 0x00dd, "CheckDigitEnableTwoMSIPlessey" }, { 0x8c, 0x00de, "CheckDigitCodabarEnable" }, { 0x8c, 0x00df, "CheckDigitCode39Enable" }, { 0x8c, 0x00f0, "TransmitCheckDigit" }, { 0x8c, 0x00f1, "DisableCheckDigitTransmit" }, { 0x8c, 0x00f2, "EnableCheckDigitTransmit" }, { 0x8c, 0x00fb, "SymbologyIdentifier1" }, { 0x8c, 0x00fc, "SymbologyIdentifier2" }, { 0x8c, 0x00fd, "SymbologyIdentifier3" }, { 0x8c, 0x00fe, "DecodedData" }, { 0x8c, 0x00ff, "DecodeDataContinued" }, { 0x8c, 0x0100, "BarSpaceData" }, { 0x8c, 0x0101, "ScannerDataAccuracy" }, { 0x8c, 0x0102, "RawDataPolarity" }, { 0x8c, 0x0103, "PolarityInvertedBarCode" }, { 0x8c, 0x0104, "PolarityNormalBarCode" }, { 0x8c, 0x0106, "MinimumLengthtoDecode" }, { 0x8c, 0x0107, "MaximumLengthtoDecode" }, { 0x8c, 0x0108, "DiscreteLengthtoDecode1" }, { 0x8c, 0x0109, "DiscreteLengthtoDecode2" }, { 0x8c, 0x010a, "DataLengthMethod" }, { 0x8c, 0x010b, "DLMethodReadany" }, { 0x8c, 0x010c, "DLMethodCheckinRange" }, { 0x8c, 0x010d, "DLMethodCheckforDiscrete" }, { 0x8c, 0x0110, "AztecCode" }, { 0x8c, 0x0111, "BC412" }, { 0x8c, 0x0112, "ChannelCode" }, { 0x8c, 0x0113, "Code16" }, { 0x8c, 0x0114, "Code32" }, { 0x8c, 0x0115, "Code49" }, { 0x8c, 0x0116, "CodeOne" }, { 0x8c, 0x0117, "Colorcode" }, { 0x8c, 0x0118, "DataMatrix" }, { 0x8c, 0x0119, "MaxiCode" }, { 0x8c, 0x011a, "MicroPDF" }, { 0x8c, 0x011b, "PDF417" }, { 0x8c, 0x011c, "PosiCode" }, { 0x8c, 0x011d, "QRCode" }, { 0x8c, 0x011e, "SuperCode" }, { 0x8c, 0x011f, "UltraCode" }, { 0x8c, 0x0120, "USD5SlugCode" }, { 0x8c, 0x0121, "VeriCode" }, { 0x8d, 0, "Scales" }, { 0x8d, 0x0001, "Scales" }, { 0x8d, 0x0020, "ScaleDevice" }, { 0x8d, 0x0021, "ScaleClass" }, { 0x8d, 0x0022, "ScaleClassIMetric" }, { 0x8d, 0x0023, "ScaleClassIIMetric" }, { 0x8d, 0x0024, "ScaleClassIIIMetric" }, { 0x8d, 0x0025, "ScaleClassIIILMetric" }, { 0x8d, 0x0026, "ScaleClassIVMetric" }, { 0x8d, 0x0027, "ScaleClassIIIEnglish" }, { 0x8d, 0x0028, "ScaleClassIIILEnglish" }, { 0x8d, 0x0029, "ScaleClassIVEnglish" }, { 0x8d, 0x002a, "ScaleClassGeneric" }, { 0x8d, 0x0030, "ScaleAttributeReport" }, { 0x8d, 0x0031, "ScaleControlReport" }, { 0x8d, 0x0032, "ScaleDataReport" }, { 0x8d, 0x0033, "ScaleStatusReport" }, { 0x8d, 0x0034, "ScaleWeightLimitReport" }, { 0x8d, 0x0035, "ScaleStatisticsReport" }, { 0x8d, 0x0040, "DataWeight" }, { 0x8d, 0x0041, "DataScaling" }, { 0x8d, 0x0050, "WeightUnit" }, { 0x8d, 0x0051, "WeightUnitMilligram" }, { 0x8d, 0x0052, "WeightUnitGram" }, { 0x8d, 0x0053, "WeightUnitKilogram" }, { 0x8d, 0x0054, "WeightUnitCarats" }, { 0x8d, 0x0055, "WeightUnitTaels" }, { 0x8d, 0x0056, "WeightUnitGrains" }, { 0x8d, 0x0057, "WeightUnitPennyweights" }, { 0x8d, 0x0058, "WeightUnitMetricTon" }, { 0x8d, 0x0059, "WeightUnitAvoirTon" }, { 0x8d, 0x005a, "WeightUnitTroyOunce" }, { 0x8d, 0x005b, "WeightUnitOunce" }, { 0x8d, 0x005c, "WeightUnitPound" }, { 0x8d, 0x0060, "CalibrationCount" }, { 0x8d, 0x0061, "ReZeroCount" }, { 0x8d, 0x0070, "ScaleStatus" }, { 0x8d, 0x0071, "ScaleStatusFault" }, { 0x8d, 0x0072, "ScaleStatusStableatCenterofZero" }, { 0x8d, 0x0073, "ScaleStatusInMotion" }, { 0x8d, 0x0074, "ScaleStatusWeightStable" }, { 0x8d, 0x0075, "ScaleStatusUnderZero" }, { 0x8d, 0x0076, "ScaleStatusOverWeightLimit" }, { 0x8d, 0x0077, "ScaleStatusRequiresCalibration" }, { 0x8d, 0x0078, "ScaleStatusRequiresRezeroing" }, { 0x8d, 0x0080, "ZeroScale" }, { 0x8d, 0x0081, "EnforcedZeroReturn" }, { 0x8e, 0, "MagneticStripeReader" }, { 0x8e, 0x0001, "MSRDeviceReadOnly" }, { 0x8e, 0x0011, "Track1Length" }, { 0x8e, 0x0012, "Track2Length" }, { 0x8e, 0x0013, "Track3Length" }, { 0x8e, 0x0014, "TrackJISLength" }, { 0x8e, 0x0020, "TrackData" }, { 0x8e, 0x0021, "Track1Data" }, { 0x8e, 0x0022, "Track2Data" }, { 0x8e, 0x0023, "Track3Data" }, { 0x8e, 0x0024, "TrackJISData" }, { 0x90, 0, "CameraControl" }, { 0x90, 0x0020, "CameraAutofocus" }, { 0x90, 0x0021, "CameraShutter" }, { 0x91, 0, "Arcade" }, { 0x91, 0x0001, "GeneralPurposeIOCard" }, { 0x91, 0x0002, "CoinDoor" }, { 0x91, 0x0003, "WatchdogTimer" }, { 0x91, 0x0030, "GeneralPurposeAnalogInputState" }, { 0x91, 0x0031, "GeneralPurposeDigitalInputState" }, { 0x91, 0x0032, "GeneralPurposeOpticalInputState" }, { 0x91, 0x0033, "GeneralPurposeDigitalOutputState" }, { 0x91, 0x0034, "NumberofCoinDoors" }, { 0x91, 0x0035, "CoinDrawerDropCount" }, { 0x91, 0x0036, "CoinDrawerStart" }, { 0x91, 0x0037, "CoinDrawerService" }, { 0x91, 0x0038, "CoinDrawerTilt" }, { 0x91, 0x0039, "CoinDoorTest" }, { 0x91, 0x0040, "CoinDoorLockout" }, { 0x91, 0x0041, "WatchdogTimeout" }, { 0x91, 0x0042, "WatchdogAction" }, { 0x91, 0x0043, "WatchdogReboot" }, { 0x91, 0x0044, "WatchdogRestart" }, { 0x91, 0x0045, "AlarmInput" }, { 0x91, 0x0046, "CoinDoorCounter" }, { 0x91, 0x0047, "IODirectionMapping" }, { 0x91, 0x0048, "SetIODirectionMapping" }, { 0x91, 0x0049, "ExtendedOpticalInputState" }, { 0x91, 0x004a, "PinPadInputState" }, { 0x91, 0x004b, "PinPadStatus" }, { 0x91, 0x004c, "PinPadOutput" }, { 0x91, 0x004d, "PinPadCommand" }, { 0xf1d0, 0, "FIDOAlliance" }, { 0xf1d0, 0x0001, "U2FAuthenticatorDevice" }, { 0xf1d0, 0x0020, "InputReportData" }, { 0xf1d0, 0x0021, "OutputReportData" }, /* pages 0xff00 to 0xffff are vendor-specific */ { 0xffff, 0, "Vendor-specific-FF" }, { 0, 0, NULL } }; /* Either output directly into simple seq_file, or (if f == NULL) * allocate a separate buffer that will then be passed to the 'events' * ringbuffer. * * This is because these functions can be called both for "one-shot" * "rdesc" while resolving, or for blocking "events". * * This holds both for resolv_usage_page() and hid_resolv_usage(). */ static char *resolv_usage_page(unsigned page, struct seq_file *f) { const struct hid_usage_entry *p; char *buf = NULL; if (!f) { buf = kzalloc(HID_DEBUG_BUFSIZE, GFP_ATOMIC); if (!buf) return ERR_PTR(-ENOMEM); } for (p = hid_usage_table; p->description; p++) if (p->page == page) { if (!f) { snprintf(buf, HID_DEBUG_BUFSIZE, "%s", p->description); return buf; } else { seq_printf(f, "%s", p->description); return NULL; } } if (!f) snprintf(buf, HID_DEBUG_BUFSIZE, "%04x", page); else seq_printf(f, "%04x", page); return buf; } char *hid_resolv_usage(unsigned usage, struct seq_file *f) { const struct hid_usage_entry *p; const struct hid_usage_entry *m; char *buf = NULL; int len = 0; const char *modifier = NULL; unsigned int usage_modifier = usage & 0xF000; unsigned int usage_actual = usage & 0xFFFF; buf = resolv_usage_page(usage >> 16, f); if (IS_ERR(buf)) { pr_err("error allocating HID debug buffer\n"); return NULL; } if (!f) { len = strlen(buf); len += scnprintf(buf + len, HID_DEBUG_BUFSIZE - len, "."); } else { seq_printf(f, "."); } for (p = hid_usage_table; p->description; p++) if (p->page == (usage >> 16)) { if (p->page == 0x20 && usage_modifier) { for (m = p; m->description; m++) { if (p->page == m->page && m->usage == usage_modifier) { modifier = m->description; break; } } if (modifier) usage_actual = usage_actual & 0x0FFF; } if (!modifier) modifier = ""; for(++p; p->description && p->usage != 0; p++) if (p->usage == usage_actual) { if (!f) snprintf(buf + len, HID_DEBUG_BUFSIZE - len, "%s%s", p->description, modifier); else seq_printf(f, "%s%s", p->description, modifier); return buf; } break; } if (!f) snprintf(buf + len, HID_DEBUG_BUFSIZE - len, "%04x", usage & 0xffff); else seq_printf(f, "%04x", usage & 0xffff); return buf; } EXPORT_SYMBOL_GPL(hid_resolv_usage); static void tab(int n, struct seq_file *f) { seq_printf(f, "%*s", n, ""); } void hid_dump_field(struct hid_field *field, int n, struct seq_file *f) { int j; if (field->physical) { tab(n, f); seq_printf(f, "Physical("); hid_resolv_usage(field->physical, f); seq_printf(f, ")\n"); } if (field->logical) { tab(n, f); seq_printf(f, "Logical("); hid_resolv_usage(field->logical, f); seq_printf(f, ")\n"); } if (field->application) { tab(n, f); seq_printf(f, "Application("); hid_resolv_usage(field->application, f); seq_printf(f, ")\n"); } tab(n, f); seq_printf(f, "Usage(%d)\n", field->maxusage); for (j = 0; j < field->maxusage; j++) { tab(n+2, f); hid_resolv_usage(field->usage[j].hid, f); seq_printf(f, "\n"); } if (field->logical_minimum != field->logical_maximum) { tab(n, f); seq_printf(f, "Logical Minimum(%d)\n", field->logical_minimum); tab(n, f); seq_printf(f, "Logical Maximum(%d)\n", field->logical_maximum); } if (field->physical_minimum != field->physical_maximum) { tab(n, f); seq_printf(f, "Physical Minimum(%d)\n", field->physical_minimum); tab(n, f); seq_printf(f, "Physical Maximum(%d)\n", field->physical_maximum); } if (field->unit_exponent) { tab(n, f); seq_printf(f, "Unit Exponent(%d)\n", field->unit_exponent); } if (field->unit) { static const char *systems[5] = { "None", "SI Linear", "SI Rotation", "English Linear", "English Rotation" }; static const char *units[5][8] = { { "None", "None", "None", "None", "None", "None", "None", "None" }, { "None", "Centimeter", "Gram", "Seconds", "Kelvin", "Ampere", "Candela", "None" }, { "None", "Radians", "Gram", "Seconds", "Kelvin", "Ampere", "Candela", "None" }, { "None", "Inch", "Slug", "Seconds", "Fahrenheit", "Ampere", "Candela", "None" }, { "None", "Degrees", "Slug", "Seconds", "Fahrenheit", "Ampere", "Candela", "None" } }; int i; int sys; __u32 data = field->unit; /* First nibble tells us which system we're in. */ sys = data & 0xf; data >>= 4; if(sys > 4) { tab(n, f); seq_printf(f, "Unit(Invalid)\n"); } else { int earlier_unit = 0; tab(n, f); seq_printf(f, "Unit(%s : ", systems[sys]); for (i=1 ; i<sizeof(__u32)*2 ; i++) { char nibble = data & 0xf; data >>= 4; if (nibble != 0) { if(earlier_unit++ > 0) seq_printf(f, "*"); seq_printf(f, "%s", units[sys][i]); if(nibble != 1) { /* This is a _signed_ nibble(!) */ int val = nibble & 0x7; if(nibble & 0x08) val = -((0x7 & ~val) +1); seq_printf(f, "^%d", val); } } } seq_printf(f, ")\n"); } } tab(n, f); seq_printf(f, "Report Size(%u)\n", field->report_size); tab(n, f); seq_printf(f, "Report Count(%u)\n", field->report_count); tab(n, f); seq_printf(f, "Report Offset(%u)\n", field->report_offset); tab(n, f); seq_printf(f, "Flags( "); j = field->flags; seq_printf(f, "%s", HID_MAIN_ITEM_CONSTANT & j ? "Constant " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_VARIABLE & j ? "Variable " : "Array "); seq_printf(f, "%s", HID_MAIN_ITEM_RELATIVE & j ? "Relative " : "Absolute "); seq_printf(f, "%s", HID_MAIN_ITEM_WRAP & j ? "Wrap " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_NONLINEAR & j ? "NonLinear " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_NO_PREFERRED & j ? "NoPreferredState " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_NULL_STATE & j ? "NullState " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_VOLATILE & j ? "Volatile " : ""); seq_printf(f, "%s", HID_MAIN_ITEM_BUFFERED_BYTE & j ? "BufferedByte " : ""); seq_printf(f, ")\n"); } EXPORT_SYMBOL_GPL(hid_dump_field); void hid_dump_device(struct hid_device *device, struct seq_file *f) { struct hid_report_enum *report_enum; struct hid_report *report; struct list_head *list; unsigned i,k; static const char *table[] = {"INPUT", "OUTPUT", "FEATURE"}; for (i = 0; i < HID_REPORT_TYPES; i++) { report_enum = device->report_enum + i; list = report_enum->report_list.next; while (list != &report_enum->report_list) { report = (struct hid_report *) list; tab(2, f); seq_printf(f, "%s", table[i]); if (report->id) seq_printf(f, "(%d)", report->id); seq_printf(f, "[%s]", table[report->type]); seq_printf(f, "\n"); for (k = 0; k < report->maxfield; k++) { tab(4, f); seq_printf(f, "Field(%d)\n", k); hid_dump_field(report->field[k], 6, f); } list = list->next; } } } EXPORT_SYMBOL_GPL(hid_dump_device); /* enqueue string to 'events' ring buffer */ void hid_debug_event(struct hid_device *hdev, char *buf) { struct hid_debug_list *list; unsigned long flags; spin_lock_irqsave(&hdev->debug_list_lock, flags); list_for_each_entry(list, &hdev->debug_list, node) kfifo_in(&list->hid_debug_fifo, buf, strlen(buf)); spin_unlock_irqrestore(&hdev->debug_list_lock, flags); wake_up_interruptible(&hdev->debug_wait); } EXPORT_SYMBOL_GPL(hid_debug_event); void hid_dump_report(struct hid_device *hid, int type, u8 *data, int size) { struct hid_report_enum *report_enum; char *buf; unsigned int i; buf = kmalloc(HID_DEBUG_BUFSIZE, GFP_ATOMIC); if (!buf) return; report_enum = hid->report_enum + type; /* dump the report */ snprintf(buf, HID_DEBUG_BUFSIZE - 1, "\nreport (size %u) (%snumbered) = ", size, report_enum->numbered ? "" : "un"); hid_debug_event(hid, buf); for (i = 0; i < size; i++) { snprintf(buf, HID_DEBUG_BUFSIZE - 1, " %02x", data[i]); hid_debug_event(hid, buf); } hid_debug_event(hid, "\n"); kfree(buf); } EXPORT_SYMBOL_GPL(hid_dump_report); void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 value) { char *buf; int len; buf = hid_resolv_usage(usage->hid, NULL); if (!buf) return; len = strlen(buf); snprintf(buf + len, HID_DEBUG_BUFSIZE - len - 1, " = %d\n", value); hid_debug_event(hdev, buf); kfree(buf); wake_up_interruptible(&hdev->debug_wait); } EXPORT_SYMBOL_GPL(hid_dump_input); static const char *events[EV_MAX + 1] = { [EV_SYN] = "Sync", [EV_KEY] = "Key", [EV_REL] = "Relative", [EV_ABS] = "Absolute", [EV_MSC] = "Misc", [EV_LED] = "LED", [EV_SND] = "Sound", [EV_REP] = "Repeat", [EV_FF] = "ForceFeedback", [EV_PWR] = "Power", [EV_FF_STATUS] = "ForceFeedbackStatus", [EV_SW] = "Software", }; static const char *syncs[SYN_CNT] = { [SYN_REPORT] = "Report", [SYN_CONFIG] = "Config", [SYN_MT_REPORT] = "MT Report", [SYN_DROPPED] = "Dropped", }; static const char *keys[KEY_MAX + 1] = { [KEY_RESERVED] = "Reserved", [KEY_ESC] = "Esc", [KEY_1] = "1", [KEY_2] = "2", [KEY_3] = "3", [KEY_4] = "4", [KEY_5] = "5", [KEY_6] = "6", [KEY_7] = "7", [KEY_8] = "8", [KEY_9] = "9", [KEY_0] = "0", [KEY_MINUS] = "Minus", [KEY_EQUAL] = "Equal", [KEY_BACKSPACE] = "Backspace", [KEY_TAB] = "Tab", [KEY_Q] = "Q", [KEY_W] = "W", [KEY_E] = "E", [KEY_R] = "R", [KEY_T] = "T", [KEY_Y] = "Y", [KEY_U] = "U", [KEY_I] = "I", [KEY_O] = "O", [KEY_P] = "P", [KEY_LEFTBRACE] = "LeftBrace", [KEY_RIGHTBRACE] = "RightBrace", [KEY_ENTER] = "Enter", [KEY_LEFTCTRL] = "LeftControl", [KEY_A] = "A", [KEY_S] = "S", [KEY_D] = "D", [KEY_F] = "F", [KEY_G] = "G", [KEY_H] = "H", [KEY_J] = "J", [KEY_K] = "K", [KEY_L] = "L", [KEY_SEMICOLON] = "Semicolon", [KEY_APOSTROPHE] = "Apostrophe", [KEY_GRAVE] = "Grave", [KEY_LEFTSHIFT] = "LeftShift", [KEY_BACKSLASH] = "BackSlash", [KEY_Z] = "Z", [KEY_X] = "X", [KEY_C] = "C", [KEY_V] = "V", [KEY_B] = "B", [KEY_N] = "N", [KEY_M] = "M", [KEY_COMMA] = "Comma", [KEY_DOT] = "Dot", [KEY_SLASH] = "Slash", [KEY_RIGHTSHIFT] = "RightShift", [KEY_KPASTERISK] = "KPAsterisk", [KEY_LEFTALT] = "LeftAlt", [KEY_SPACE] = "Space", [KEY_CAPSLOCK] = "CapsLock", [KEY_F1] = "F1", [KEY_F2] = "F2", [KEY_F3] = "F3", [KEY_F4] = "F4", [KEY_F5] = "F5", [KEY_F6] = "F6", [KEY_F7] = "F7", [KEY_F8] = "F8", [KEY_F9] = "F9", [KEY_F10] = "F10", [KEY_NUMLOCK] = "NumLock", [KEY_SCROLLLOCK] = "ScrollLock", [KEY_KP7] = "KP7", [KEY_KP8] = "KP8", [KEY_KP9] = "KP9", [KEY_KPMINUS] = "KPMinus", [KEY_KP4] = "KP4", [KEY_KP5] = "KP5", [KEY_KP6] = "KP6", [KEY_KPPLUS] = "KPPlus", [KEY_KP1] = "KP1", [KEY_KP2] = "KP2", [KEY_KP3] = "KP3", [KEY_KP0] = "KP0", [KEY_KPDOT] = "KPDot", [KEY_ZENKAKUHANKAKU] = "Zenkaku/Hankaku", [KEY_102ND] = "102nd", [KEY_F11] = "F11", [KEY_F12] = "F12", [KEY_RO] = "RO", [KEY_KATAKANA] = "Katakana", [KEY_HIRAGANA] = "HIRAGANA", [KEY_HENKAN] = "Henkan", [KEY_KATAKANAHIRAGANA] = "Katakana/Hiragana", [KEY_MUHENKAN] = "Muhenkan", [KEY_KPJPCOMMA] = "KPJpComma", [KEY_KPENTER] = "KPEnter", [KEY_RIGHTCTRL] = "RightCtrl", [KEY_KPSLASH] = "KPSlash", [KEY_SYSRQ] = "SysRq", [KEY_RIGHTALT] = "RightAlt", [KEY_LINEFEED] = "LineFeed", [KEY_HOME] = "Home", [KEY_UP] = "Up", [KEY_PAGEUP] = "PageUp", [KEY_LEFT] = "Left", [KEY_RIGHT] = "Right", [KEY_END] = "End", [KEY_DOWN] = "Down", [KEY_PAGEDOWN] = "PageDown", [KEY_INSERT] = "Insert", [KEY_DELETE] = "Delete", [KEY_MACRO] = "Macro", [KEY_MUTE] = "Mute", [KEY_VOLUMEDOWN] = "VolumeDown", [KEY_VOLUMEUP] = "VolumeUp", [KEY_POWER] = "Power", [KEY_KPEQUAL] = "KPEqual", [KEY_KPPLUSMINUS] = "KPPlusMinus", [KEY_PAUSE] = "Pause", [KEY_KPCOMMA] = "KPComma", [KEY_HANGUEL] = "Hangeul", [KEY_HANJA] = "Hanja", [KEY_YEN] = "Yen", [KEY_LEFTMETA] = "LeftMeta", [KEY_RIGHTMETA] = "RightMeta", [KEY_COMPOSE] = "Compose", [KEY_STOP] = "Stop", [KEY_AGAIN] = "Again", [KEY_PROPS] = "Props", [KEY_UNDO] = "Undo", [KEY_FRONT] = "Front", [KEY_COPY] = "Copy", [KEY_OPEN] = "Open", [KEY_PASTE] = "Paste", [KEY_FIND] = "Find", [KEY_CUT] = "Cut", [KEY_HELP] = "Help", [KEY_MENU] = "Menu", [KEY_CALC] = "Calc", [KEY_SETUP] = "Setup", [KEY_SLEEP] = "Sleep", [KEY_WAKEUP] = "WakeUp", [KEY_FILE] = "File", [KEY_SENDFILE] = "SendFile", [KEY_DELETEFILE] = "DeleteFile", [KEY_XFER] = "X-fer", [KEY_PROG1] = "Prog1", [KEY_PROG2] = "Prog2", [KEY_WWW] = "WWW", [KEY_MSDOS] = "MSDOS", [KEY_COFFEE] = "Coffee", [KEY_ROTATE_DISPLAY] = "RotateDisplay", [KEY_CYCLEWINDOWS] = "CycleWindows", [KEY_MAIL] = "Mail", [KEY_BOOKMARKS] = "Bookmarks", [KEY_COMPUTER] = "Computer", [KEY_BACK] = "Back", [KEY_FORWARD] = "Forward", [KEY_CLOSECD] = "CloseCD", [KEY_EJECTCD] = "EjectCD", [KEY_EJECTCLOSECD] = "EjectCloseCD", [KEY_NEXTSONG] = "NextSong", [KEY_PLAYPAUSE] = "PlayPause", [KEY_PREVIOUSSONG] = "PreviousSong", [KEY_STOPCD] = "StopCD", [KEY_RECORD] = "Record", [KEY_REWIND] = "Rewind", [KEY_PHONE] = "Phone", [KEY_ISO] = "ISOKey", [KEY_CONFIG] = "Config", [KEY_HOMEPAGE] = "HomePage", [KEY_REFRESH] = "Refresh", [KEY_EXIT] = "Exit", [KEY_MOVE] = "Move", [KEY_EDIT] = "Edit", [KEY_SCROLLUP] = "ScrollUp", [KEY_SCROLLDOWN] = "ScrollDown", [KEY_KPLEFTPAREN] = "KPLeftParenthesis", [KEY_KPRIGHTPAREN] = "KPRightParenthesis", [KEY_NEW] = "New", [KEY_REDO] = "Redo", [KEY_F13] = "F13", [KEY_F14] = "F14", [KEY_F15] = "F15", [KEY_F16] = "F16", [KEY_F17] = "F17", [KEY_F18] = "F18", [KEY_F19] = "F19", [KEY_F20] = "F20", [KEY_F21] = "F21", [KEY_F22] = "F22", [KEY_F23] = "F23", [KEY_F24] = "F24", [KEY_PLAYCD] = "PlayCD", [KEY_PAUSECD] = "PauseCD", [KEY_PROG3] = "Prog3", [KEY_PROG4] = "Prog4", [KEY_ALL_APPLICATIONS] = "AllApplications", [KEY_SUSPEND] = "Suspend", [KEY_CLOSE] = "Close", [KEY_PLAY] = "Play", [KEY_FASTFORWARD] = "FastForward", [KEY_BASSBOOST] = "BassBoost", [KEY_PRINT] = "Print", [KEY_HP] = "HP", [KEY_CAMERA] = "Camera", [KEY_SOUND] = "Sound", [KEY_QUESTION] = "Question", [KEY_EMAIL] = "Email", [KEY_CHAT] = "Chat", [KEY_SEARCH] = "Search", [KEY_CONNECT] = "Connect", [KEY_FINANCE] = "Finance", [KEY_SPORT] = "Sport", [KEY_SHOP] = "Shop", [KEY_ALTERASE] = "AlternateErase", [KEY_CANCEL] = "Cancel", [KEY_BRIGHTNESSDOWN] = "BrightnessDown", [KEY_BRIGHTNESSUP] = "BrightnessUp", [KEY_MEDIA] = "Media", [KEY_UNKNOWN] = "Unknown", [BTN_DPAD_UP] = "BtnDPadUp", [BTN_DPAD_DOWN] = "BtnDPadDown", [BTN_DPAD_LEFT] = "BtnDPadLeft", [BTN_DPAD_RIGHT] = "BtnDPadRight", [BTN_0] = "Btn0", [BTN_1] = "Btn1", [BTN_2] = "Btn2", [BTN_3] = "Btn3", [BTN_4] = "Btn4", [BTN_5] = "Btn5", [BTN_6] = "Btn6", [BTN_7] = "Btn7", [BTN_8] = "Btn8", [BTN_9] = "Btn9", [BTN_LEFT] = "LeftBtn", [BTN_RIGHT] = "RightBtn", [BTN_MIDDLE] = "MiddleBtn", [BTN_SIDE] = "SideBtn", [BTN_EXTRA] = "ExtraBtn", [BTN_FORWARD] = "ForwardBtn", [BTN_BACK] = "BackBtn", [BTN_TASK] = "TaskBtn", [BTN_TRIGGER] = "Trigger", [BTN_THUMB] = "ThumbBtn", [BTN_THUMB2] = "ThumbBtn2", [BTN_TOP] = "TopBtn", [BTN_TOP2] = "TopBtn2", [BTN_PINKIE] = "PinkieBtn", [BTN_BASE] = "BaseBtn", [BTN_BASE2] = "BaseBtn2", [BTN_BASE3] = "BaseBtn3", [BTN_BASE4] = "BaseBtn4", [BTN_BASE5] = "BaseBtn5", [BTN_BASE6] = "BaseBtn6", [BTN_DEAD] = "BtnDead", [BTN_A] = "BtnA", [BTN_B] = "BtnB", [BTN_C] = "BtnC", [BTN_X] = "BtnX", [BTN_Y] = "BtnY", [BTN_Z] = "BtnZ", [BTN_TL] = "BtnTL", [BTN_TR] = "BtnTR", [BTN_TL2] = "BtnTL2", [BTN_TR2] = "BtnTR2", [BTN_SELECT] = "BtnSelect", [BTN_START] = "BtnStart", [BTN_MODE] = "BtnMode", [BTN_THUMBL] = "BtnThumbL", [BTN_THUMBR] = "BtnThumbR", [BTN_TOOL_PEN] = "ToolPen", [BTN_TOOL_RUBBER] = "ToolRubber", [BTN_TOOL_BRUSH] = "ToolBrush", [BTN_TOOL_PENCIL] = "ToolPencil", [BTN_TOOL_AIRBRUSH] = "ToolAirbrush", [BTN_TOOL_FINGER] = "ToolFinger", [BTN_TOOL_MOUSE] = "ToolMouse", [BTN_TOOL_LENS] = "ToolLens", [BTN_TOUCH] = "Touch", [BTN_STYLUS] = "Stylus", [BTN_STYLUS2] = "Stylus2", [BTN_TOOL_DOUBLETAP] = "ToolDoubleTap", [BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_TOOL_QUADTAP] = "ToolQuadrupleTap", [BTN_GEAR_DOWN] = "WheelBtn", [BTN_GEAR_UP] = "Gear up", [KEY_OK] = "Ok", [KEY_SELECT] = "Select", [KEY_GOTO] = "Goto", [KEY_CLEAR] = "Clear", [KEY_POWER2] = "Power2", [KEY_OPTION] = "Option", [KEY_INFO] = "Info", [KEY_TIME] = "Time", [KEY_VENDOR] = "Vendor", [KEY_ARCHIVE] = "Archive", [KEY_PROGRAM] = "Program", [KEY_CHANNEL] = "Channel", [KEY_FAVORITES] = "Favorites", [KEY_EPG] = "EPG", [KEY_PVR] = "PVR", [KEY_MHP] = "MHP", [KEY_LANGUAGE] = "Language", [KEY_TITLE] = "Title", [KEY_SUBTITLE] = "Subtitle", [KEY_ANGLE] = "Angle", [KEY_MODE] = "Mode", [KEY_KEYBOARD] = "Keyboard", [KEY_PC] = "PC", [KEY_TV] = "TV", [KEY_TV2] = "TV2", [KEY_VCR] = "VCR", [KEY_VCR2] = "VCR2", [KEY_SAT] = "Sat", [KEY_SAT2] = "Sat2", [KEY_CD] = "CD", [KEY_TAPE] = "Tape", [KEY_RADIO] = "Radio", [KEY_TUNER] = "Tuner", [KEY_PLAYER] = "Player", [KEY_TEXT] = "Text", [KEY_DVD] = "DVD", [KEY_AUX] = "Aux", [KEY_MP3] = "MP3", [KEY_AUDIO] = "Audio", [KEY_VIDEO] = "Video", [KEY_DIRECTORY] = "Directory", [KEY_LIST] = "List", [KEY_MEMO] = "Memo", [KEY_CALENDAR] = "Calendar", [KEY_RED] = "Red", [KEY_GREEN] = "Green", [KEY_YELLOW] = "Yellow", [KEY_BLUE] = "Blue", [KEY_CHANNELUP] = "ChannelUp", [KEY_CHANNELDOWN] = "ChannelDown", [KEY_FIRST] = "First", [KEY_LAST] = "Last", [KEY_AB] = "AB", [KEY_NEXT] = "Next", [KEY_RESTART] = "Restart", [KEY_SLOW] = "Slow", [KEY_SHUFFLE] = "Shuffle", [KEY_BREAK] = "Break", [KEY_PREVIOUS] = "Previous", [KEY_DIGITS] = "Digits", [KEY_TEEN] = "TEEN", [KEY_TWEN] = "TWEN", [KEY_DEL_EOL] = "DeleteEOL", [KEY_DEL_EOS] = "DeleteEOS", [KEY_INS_LINE] = "InsertLine", [KEY_DEL_LINE] = "DeleteLine", [KEY_SEND] = "Send", [KEY_REPLY] = "Reply", [KEY_FORWARDMAIL] = "ForwardMail", [KEY_SAVE] = "Save", [KEY_DOCUMENTS] = "Documents", [KEY_SPELLCHECK] = "SpellCheck", [KEY_LOGOFF] = "Logoff", [KEY_FN] = "Fn", [KEY_FN_ESC] = "Fn+ESC", [KEY_FN_1] = "Fn+1", [KEY_FN_2] = "Fn+2", [KEY_FN_B] = "Fn+B", [KEY_FN_D] = "Fn+D", [KEY_FN_E] = "Fn+E", [KEY_FN_F] = "Fn+F", [KEY_FN_S] = "Fn+S", [KEY_FN_F1] = "Fn+F1", [KEY_FN_F2] = "Fn+F2", [KEY_FN_F3] = "Fn+F3", [KEY_FN_F4] = "Fn+F4", [KEY_FN_F5] = "Fn+F5", [KEY_FN_F6] = "Fn+F6", [KEY_FN_F7] = "Fn+F7", [KEY_FN_F8] = "Fn+F8", [KEY_FN_F9] = "Fn+F9", [KEY_FN_F10] = "Fn+F10", [KEY_FN_F11] = "Fn+F11", [KEY_FN_F12] = "Fn+F12", [KEY_KBDILLUMTOGGLE] = "KbdIlluminationToggle", [KEY_KBDILLUMDOWN] = "KbdIlluminationDown", [KEY_KBDILLUMUP] = "KbdIlluminationUp", [KEY_SWITCHVIDEOMODE] = "SwitchVideoMode", [KEY_BUTTONCONFIG] = "ButtonConfig", [KEY_TASKMANAGER] = "TaskManager", [KEY_JOURNAL] = "Journal", [KEY_CONTROLPANEL] = "ControlPanel", [KEY_APPSELECT] = "AppSelect", [KEY_SCREENSAVER] = "ScreenSaver", [KEY_VOICECOMMAND] = "VoiceCommand", [KEY_ASSISTANT] = "Assistant", [KEY_KBD_LAYOUT_NEXT] = "KbdLayoutNext", [KEY_EMOJI_PICKER] = "EmojiPicker", [KEY_CAMERA_ACCESS_ENABLE] = "CameraAccessEnable", [KEY_CAMERA_ACCESS_DISABLE] = "CameraAccessDisable", [KEY_CAMERA_ACCESS_TOGGLE] = "CameraAccessToggle", [KEY_ACCESSIBILITY] = "Accessibility", [KEY_DO_NOT_DISTURB] = "DoNotDisturb", [KEY_DICTATE] = "Dictate", [KEY_MICMUTE] = "MicrophoneMute", [KEY_BRIGHTNESS_MIN] = "BrightnessMin", [KEY_BRIGHTNESS_MAX] = "BrightnessMax", [KEY_BRIGHTNESS_AUTO] = "BrightnessAuto", [KEY_KBDINPUTASSIST_PREV] = "KbdInputAssistPrev", [KEY_KBDINPUTASSIST_NEXT] = "KbdInputAssistNext", [KEY_KBDINPUTASSIST_PREVGROUP] = "KbdInputAssistPrevGroup", [KEY_KBDINPUTASSIST_NEXTGROUP] = "KbdInputAssistNextGroup", [KEY_KBDINPUTASSIST_ACCEPT] = "KbdInputAssistAccept", [KEY_KBDINPUTASSIST_CANCEL] = "KbdInputAssistCancel", [KEY_MACRO1] = "Macro1", [KEY_MACRO2] = "Macro2", [KEY_MACRO3] = "Macro3", [KEY_MACRO4] = "Macro4", [KEY_MACRO5] = "Macro5", [KEY_MACRO6] = "Macro6", [KEY_MACRO7] = "Macro7", [KEY_MACRO8] = "Macro8", [KEY_MACRO9] = "Macro9", [KEY_MACRO10] = "Macro10", [KEY_MACRO11] = "Macro11", [KEY_MACRO12] = "Macro12", [KEY_MACRO13] = "Macro13", [KEY_MACRO14] = "Macro14", [KEY_MACRO15] = "Macro15", [KEY_MACRO16] = "Macro16", [KEY_MACRO17] = "Macro17", [KEY_MACRO18] = "Macro18", [KEY_MACRO19] = "Macro19", [KEY_MACRO20] = "Macro20", [KEY_MACRO21] = "Macro21", [KEY_MACRO22] = "Macro22", [KEY_MACRO23] = "Macro23", [KEY_MACRO24] = "Macro24", [KEY_MACRO25] = "Macro25", [KEY_MACRO26] = "Macro26", [KEY_MACRO27] = "Macro27", [KEY_MACRO28] = "Macro28", [KEY_MACRO29] = "Macro29", [KEY_MACRO30] = "Macro30", [BTN_TRIGGER_HAPPY1] = "TriggerHappy1", [BTN_TRIGGER_HAPPY2] = "TriggerHappy2", [BTN_TRIGGER_HAPPY3] = "TriggerHappy3", [BTN_TRIGGER_HAPPY4] = "TriggerHappy4", [BTN_TRIGGER_HAPPY5] = "TriggerHappy5", [BTN_TRIGGER_HAPPY6] = "TriggerHappy6", [BTN_TRIGGER_HAPPY7] = "TriggerHappy7", [BTN_TRIGGER_HAPPY8] = "TriggerHappy8", [BTN_TRIGGER_HAPPY9] = "TriggerHappy9", [BTN_TRIGGER_HAPPY10] = "TriggerHappy10", [BTN_TRIGGER_HAPPY11] = "TriggerHappy11", [BTN_TRIGGER_HAPPY12] = "TriggerHappy12", [BTN_TRIGGER_HAPPY13] = "TriggerHappy13", [BTN_TRIGGER_HAPPY14] = "TriggerHappy14", [BTN_TRIGGER_HAPPY15] = "TriggerHappy15", [BTN_TRIGGER_HAPPY16] = "TriggerHappy16", [BTN_TRIGGER_HAPPY17] = "TriggerHappy17", [BTN_TRIGGER_HAPPY18] = "TriggerHappy18", [BTN_TRIGGER_HAPPY19] = "TriggerHappy19", [BTN_TRIGGER_HAPPY20] = "TriggerHappy20", [BTN_TRIGGER_HAPPY21] = "TriggerHappy21", [BTN_TRIGGER_HAPPY22] = "TriggerHappy22", [BTN_TRIGGER_HAPPY23] = "TriggerHappy23", [BTN_TRIGGER_HAPPY24] = "TriggerHappy24", [BTN_TRIGGER_HAPPY25] = "TriggerHappy25", [BTN_TRIGGER_HAPPY26] = "TriggerHappy26", [BTN_TRIGGER_HAPPY27] = "TriggerHappy27", [BTN_TRIGGER_HAPPY28] = "TriggerHappy28", [BTN_TRIGGER_HAPPY29] = "TriggerHappy29", [BTN_TRIGGER_HAPPY30] = "TriggerHappy30", [BTN_TRIGGER_HAPPY31] = "TriggerHappy31", [BTN_TRIGGER_HAPPY32] = "TriggerHappy32", [BTN_TRIGGER_HAPPY33] = "TriggerHappy33", [BTN_TRIGGER_HAPPY34] = "TriggerHappy34", [BTN_TRIGGER_HAPPY35] = "TriggerHappy35", [BTN_TRIGGER_HAPPY36] = "TriggerHappy36", [BTN_TRIGGER_HAPPY37] = "TriggerHappy37", [BTN_TRIGGER_HAPPY38] = "TriggerHappy38", [BTN_TRIGGER_HAPPY39] = "TriggerHappy39", [BTN_TRIGGER_HAPPY40] = "TriggerHappy40", [BTN_STYLUS3] = "Stylus3", [BTN_TOOL_QUINTTAP] = "ToolQuintTap", [KEY_10CHANNELSDOWN] = "10ChannelsDown", [KEY_10CHANNELSUP] = "10ChannelsUp", [KEY_3D_MODE] = "3DMode", [KEY_ADDRESSBOOK] = "Addressbook", [KEY_ALS_TOGGLE] = "ALSToggle", [KEY_ASPECT_RATIO] = "AspectRatio", [KEY_ATTENDANT_OFF] = "AttendantOff", [KEY_ATTENDANT_ON] = "AttendantOn", [KEY_ATTENDANT_TOGGLE] = "AttendantToggle", [KEY_AUDIO_DESC] = "AudioDesc", [KEY_AUTOPILOT_ENGAGE_TOGGLE] = "AutoPiloteEngage", [KEY_BATTERY] = "Battery", [KEY_BLUETOOTH] = "BlueTooth", [KEY_BRIGHTNESS_CYCLE] = "BrightnessCycle", [KEY_BRIGHTNESS_MENU] = "BrightnessMenu", [KEY_BRL_DOT1] = "BrlDot1", [KEY_BRL_DOT10] = "BrlDot10", [KEY_BRL_DOT2] = "BrlDot2", [KEY_BRL_DOT3] = "BrlDot3", [KEY_BRL_DOT4] = "BrlDot4", [KEY_BRL_DOT5] = "BrlDot5", [KEY_BRL_DOT6] = "BrlDot6", [KEY_BRL_DOT7] = "BrlDot7", [KEY_BRL_DOT8] = "BrlDot8", [KEY_BRL_DOT9] = "BrlDot9", [KEY_CAMERA_DOWN] = "CameraDown", [KEY_CAMERA_FOCUS] = "CameraFocus", [KEY_CAMERA_LEFT] = "CameraLeft", [KEY_CAMERA_RIGHT] = "CameraRight", [KEY_CAMERA_UP] = "CameraUp", [KEY_CAMERA_ZOOMIN] = "CameraZoomIn", [KEY_CAMERA_ZOOMOUT] = "CameraZoomOut", [KEY_CLEARVU_SONAR] = "ClearVUSonar", [KEY_CONTEXT_MENU] = "ContextMenu", [KEY_DATA] = "Data", [KEY_DATABASE] = "DataBase", [KEY_DISPLAY_OFF] = "DisplayOff", [KEY_DISPLAYTOGGLE] = "DisplayToggle", [KEY_DOLLAR] = "Dollar", [KEY_DUAL_RANGE_RADAR] = "DualRangeRadat", [KEY_EDITOR] = "Editor", [KEY_EURO] = "Euro", [KEY_FASTREVERSE] = "FastReverse", [KEY_FISHING_CHART] = "FishingChart", [KEY_FN_RIGHT_SHIFT] = "FnRightShift", [KEY_FRAMEBACK] = "FrameBack", [KEY_FRAMEFORWARD] = "FrameForward", [KEY_FULL_SCREEN] = "FullScreen", [KEY_GAMES] = "Games", [KEY_GRAPHICSEDITOR] = "GraphicsEditor", [KEY_HANGUP_PHONE] = "HangUpPhone", [KEY_IMAGES] = "Images", [KEY_KBD_LCD_MENU1] = "KbdLcdMenu1", [KEY_KBD_LCD_MENU2] = "KbdLcdMenu2", [KEY_KBD_LCD_MENU3] = "KbdLcdMenu3", [KEY_KBD_LCD_MENU4] = "KbdLcdMenu4", [KEY_KBD_LCD_MENU5] = "KbdLcdMenu5", [KEY_LEFT_DOWN] = "LeftDown", [KEY_LEFT_UP] = "LeftUp", [KEY_LIGHTS_TOGGLE] = "LightToggle", [KEY_MACRO_PRESET1] = "MacroPreset1", [KEY_MACRO_PRESET2] = "MacroPreset2", [KEY_MACRO_PRESET3] = "MacroPrest3", [KEY_MACRO_PRESET_CYCLE] = "MacroPresetCycle", [KEY_MACRO_RECORD_START] = "MacroRecordStart", [KEY_MACRO_RECORD_STOP] = "MacroRecordStop", [KEY_MARK_WAYPOINT] = "MarkWayPoint", [KEY_MEDIA_REPEAT] = "MediaRepeat", [KEY_MEDIA_TOP_MENU] = "MediaTopMenu", [KEY_MESSENGER] = "Messenger", [KEY_NAV_CHART] = "NavChar", [KEY_NAV_INFO] = "NavInfo", [KEY_NEWS] = "News", [KEY_NEXT_ELEMENT] = "NextElement", [KEY_NEXT_FAVORITE] = "NextFavorite", [KEY_NOTIFICATION_CENTER] = "NotificationCenter", [KEY_NUMERIC_0] = "Numeric0", [KEY_NUMERIC_1] = "Numeric1", [KEY_NUMERIC_11] = "Numceric11", [KEY_NUMERIC_12] = "Numeric12", [KEY_NUMERIC_2] = "Numeric2", [KEY_NUMERIC_3] = "Numeric3", [KEY_NUMERIC_4] = "Numeric4", [KEY_NUMERIC_5] = "Numeric5", [KEY_NUMERIC_6] = "Numeric6", [KEY_NUMERIC_7] = "Numeric7", [KEY_NUMERIC_8] = "Numeric8", [KEY_NUMERIC_9] = "Numeric9", [KEY_NUMERIC_A] = "NumericA", [KEY_NUMERIC_B] = "NumericB", [KEY_NUMERIC_C] = "NumericC", [KEY_NUMERIC_D] = "NumericD", [KEY_NUMERIC_POUND] = "NumericPound", [KEY_NUMERIC_STAR] = "NumericStar", [KEY_ONSCREEN_KEYBOARD] = "OnScreenKeyBoard", [KEY_PAUSE_RECORD] = "PauseRecord", [KEY_PICKUP_PHONE] = "PickUpPhone", [KEY_PRESENTATION] = "Presentation", [KEY_PREVIOUS_ELEMENT] = "PreviousElement", [KEY_PRIVACY_SCREEN_TOGGLE] = "PrivacyScreenToggle", [KEY_RADAR_OVERLAY] = "RadarOverLay", [KEY_RFKILL] = "RFKill", [KEY_RIGHT_DOWN] = "RightDown", [KEY_RIGHT_UP] = "RightUp", [KEY_ROOT_MENU] = "RootMenu", [KEY_ROTATE_LOCK_TOGGLE] = "RotateLockToggle", [KEY_SCALE] = "Scale", [KEY_SELECTIVE_SCREENSHOT] = "SelectiveScreenshot", [KEY_SIDEVU_SONAR] = "SideVUSonar", [KEY_SINGLE_RANGE_RADAR] = "SingleRangeRadar", [KEY_SLOWREVERSE] = "SlowReverse", [KEY_SOS] = "SOS", [KEY_SPREADSHEET] = "SpreadSheet", [KEY_STOP_RECORD] = "StopRecord", [KEY_TOUCHPAD_OFF] = "TouchPadOff", [KEY_TOUCHPAD_ON] = "TouchPadOn", [KEY_TOUCHPAD_TOGGLE] = "TouchPadToggle", [KEY_TRADITIONAL_SONAR] = "TraditionalSonar", [KEY_UNMUTE] = "Unmute", [KEY_UWB] = "UWB", [KEY_VIDEO_NEXT] = "VideoNext", [KEY_VIDEOPHONE] = "VideoPhone", [KEY_VIDEO_PREV] = "VideoPrev", [KEY_VOD] = "VOD", [KEY_VOICEMAIL] = "VoiceMail", [KEY_WLAN] = "WLAN", [KEY_WORDPROCESSOR] = "WordProcessor", [KEY_WPS_BUTTON] = "WPSButton", [KEY_WWAN] = "WWAN", [KEY_ZOOMIN] = "ZoomIn", [KEY_ZOOMOUT] = "ZoomOut", [KEY_ZOOMRESET] = "ZoomReset", }; static const char *relatives[REL_MAX + 1] = { [REL_X] = "X", [REL_Y] = "Y", [REL_Z] = "Z", [REL_RX] = "Rx", [REL_RY] = "Ry", [REL_RZ] = "Rz", [REL_HWHEEL] = "HWheel", [REL_DIAL] = "Dial", [REL_WHEEL] = "Wheel", [REL_MISC] = "Misc", [REL_WHEEL_HI_RES] = "WheelHiRes", [REL_HWHEEL_HI_RES] = "HWheelHiRes" }; static const char *absolutes[ABS_CNT] = { [ABS_X] = "X", [ABS_Y] = "Y", [ABS_Z] = "Z", [ABS_RX] = "Rx", [ABS_RY] = "Ry", [ABS_RZ] = "Rz", [ABS_THROTTLE] = "Throttle", [ABS_RUDDER] = "Rudder", [ABS_WHEEL] = "Wheel", [ABS_GAS] = "Gas", [ABS_BRAKE] = "Brake", [ABS_HAT0X] = "Hat0X", [ABS_HAT0Y] = "Hat0Y", [ABS_HAT1X] = "Hat1X", [ABS_HAT1Y] = "Hat1Y", [ABS_HAT2X] = "Hat2X", [ABS_HAT2Y] = "Hat2Y", [ABS_HAT3X] = "Hat3X", [ABS_HAT3Y] = "Hat 3Y", [ABS_PRESSURE] = "Pressure", [ABS_DISTANCE] = "Distance", [ABS_TILT_X] = "XTilt", [ABS_TILT_Y] = "YTilt", [ABS_TOOL_WIDTH] = "ToolWidth", [ABS_VOLUME] = "Volume", [ABS_PROFILE] = "Profile", [ABS_MISC] = "Misc", [ABS_MT_SLOT] = "MTSlot", [ABS_MT_TOUCH_MAJOR] = "MTMajor", [ABS_MT_TOUCH_MINOR] = "MTMinor", [ABS_MT_WIDTH_MAJOR] = "MTMajorW", [ABS_MT_WIDTH_MINOR] = "MTMinorW", [ABS_MT_ORIENTATION] = "MTOrientation", [ABS_MT_POSITION_X] = "MTPositionX", [ABS_MT_POSITION_Y] = "MTPositionY", [ABS_MT_TOOL_TYPE] = "MTToolType", [ABS_MT_BLOB_ID] = "MTBlobID", [ABS_MT_TRACKING_ID] = "MTTrackingID", [ABS_MT_PRESSURE] = "MTPressure", [ABS_MT_DISTANCE] = "MTDistance", [ABS_MT_TOOL_X] = "MTToolX", [ABS_MT_TOOL_Y] = "MTToolY", }; static const char *misc[MSC_MAX + 1] = { [MSC_SERIAL] = "Serial", [MSC_PULSELED] = "Pulseled", [MSC_GESTURE] = "Gesture", [MSC_RAW] = "RawData", [MSC_SCAN] = "Scan", [MSC_TIMESTAMP] = "TimeStamp", }; static const char *leds[LED_MAX + 1] = { [LED_NUML] = "NumLock", [LED_CAPSL] = "CapsLock", [LED_SCROLLL] = "ScrollLock", [LED_COMPOSE] = "Compose", [LED_KANA] = "Kana", [LED_SLEEP] = "Sleep", [LED_SUSPEND] = "Suspend", [LED_MUTE] = "Mute", [LED_MISC] = "Misc", [LED_MAIL] = "Mail", [LED_CHARGING] = "Charging", }; static const char *repeats[REP_MAX + 1] = { [REP_DELAY] = "Delay", [REP_PERIOD] = "Period" }; static const char *sounds[SND_MAX + 1] = { [SND_CLICK] = "Click", [SND_BELL] = "Bell", [SND_TONE] = "Tone" }; static const char *software[SW_CNT] = { [SW_LID] = "Lid", [SW_TABLET_MODE] = "TabletMode", [SW_HEADPHONE_INSERT] = "HeadPhoneInsert", [SW_RFKILL_ALL] = "RFKillAll", [SW_MICROPHONE_INSERT] = "MicrophoneInsert", [SW_DOCK] = "Dock", [SW_LINEOUT_INSERT] = "LineOutInsert", [SW_JACK_PHYSICAL_INSERT] = "JackPhysicalInsert", [SW_VIDEOOUT_INSERT] = "VideoOutInsert", [SW_CAMERA_LENS_COVER] = "CameraLensCover", [SW_KEYPAD_SLIDE] = "KeyPadSlide", [SW_FRONT_PROXIMITY] = "FrontProximity", [SW_ROTATE_LOCK] = "RotateLock", [SW_LINEIN_INSERT] = "LineInInsert", [SW_MUTE_DEVICE] = "MuteDevice", [SW_PEN_INSERTED] = "PenInserted", [SW_MACHINE_COVER] = "MachineCover", }; static const char *force[FF_CNT] = { [FF_RUMBLE] = "FF_RUMBLE", [FF_PERIODIC] = "FF_PERIODIC", [FF_CONSTANT] = "FF_CONSTANT", [FF_SPRING] = "FF_SPRING", [FF_FRICTION] = "FF_FRICTION", [FF_DAMPER] = "FF_DAMPER", [FF_INERTIA] = "FF_INERTIA", [FF_RAMP] = "FF_RAMP", [FF_SQUARE] = "FF_SQUARE", [FF_TRIANGLE] = "FF_TRIANGLE", [FF_SINE] = "FF_SINE", [FF_SAW_UP] = "FF_SAW_UP", [FF_SAW_DOWN] = "FF_SAW_DOWN", [FF_CUSTOM] = "FF_CUSTOM", [FF_GAIN] = "FF_GAIN", [FF_AUTOCENTER] = "FF_AUTOCENTER", [FF_MAX] = "FF_MAX", }; static const char *force_status[FF_STATUS_MAX + 1] = { [FF_STATUS_STOPPED] = "FF_STATUS_STOPPED", [FF_STATUS_PLAYING] = "FF_STATUS_PLAYING", }; static const char **names[EV_MAX + 1] = { [EV_SYN] = syncs, [EV_KEY] = keys, [EV_REL] = relatives, [EV_ABS] = absolutes, [EV_MSC] = misc, [EV_LED] = leds, [EV_SND] = sounds, [EV_REP] = repeats, [EV_SW] = software, [EV_FF] = force, [EV_FF_STATUS] = force_status, }; static void hid_resolv_event(__u8 type, __u16 code, struct seq_file *f) { if (events[type]) seq_printf(f, "%s.", events[type]); else seq_printf(f, "%02x.", type); if (names[type] && names[type][code]) seq_printf(f, "%s", names[type][code]); else seq_printf(f, "%04x", code); } static void hid_dump_input_mapping(struct hid_device *hid, struct seq_file *f) { int i, j, k; struct hid_report *report; struct hid_usage *usage; for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) { list_for_each_entry(report, &hid->report_enum[k].report_list, list) { for (i = 0; i < report->maxfield; i++) { for ( j = 0; j < report->field[i]->maxusage; j++) { usage = report->field[i]->usage + j; hid_resolv_usage(usage->hid, f); seq_printf(f, " ---> "); hid_resolv_event(usage->type, usage->code, f); seq_printf(f, "\n"); } } } } } static int hid_debug_rdesc_show(struct seq_file *f, void *p) { struct hid_device *hdev = f->private; const __u8 *rdesc = hdev->rdesc; unsigned rsize = hdev->rsize; int i; if (!rdesc) { rdesc = hdev->dev_rdesc; rsize = hdev->dev_rsize; } /* dump HID report descriptor */ for (i = 0; i < rsize; i++) seq_printf(f, "%02x ", rdesc[i]); seq_printf(f, "\n\n"); /* dump parsed data and input mappings */ if (down_interruptible(&hdev->driver_input_lock)) return 0; hid_dump_device(hdev, f); seq_printf(f, "\n"); hid_dump_input_mapping(hdev, f); up(&hdev->driver_input_lock); return 0; } static int hid_debug_events_open(struct inode *inode, struct file *file) { int err = 0; struct hid_debug_list *list; unsigned long flags; if (!(list = kzalloc(sizeof(struct hid_debug_list), GFP_KERNEL))) { err = -ENOMEM; goto out; } err = kfifo_alloc(&list->hid_debug_fifo, HID_DEBUG_FIFOSIZE, GFP_KERNEL); if (err) { kfree(list); goto out; } list->hdev = (struct hid_device *) inode->i_private; kref_get(&list->hdev->ref); file->private_data = list; mutex_init(&list->read_mutex); spin_lock_irqsave(&list->hdev->debug_list_lock, flags); list_add_tail(&list->node, &list->hdev->debug_list); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); out: return err; } static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct hid_debug_list *list = file->private_data; int ret = 0, copied; DECLARE_WAITQUEUE(wait, current); mutex_lock(&list->read_mutex); if (kfifo_is_empty(&list->hid_debug_fifo)) { add_wait_queue(&list->hdev->debug_wait, &wait); set_current_state(TASK_INTERRUPTIBLE); while (kfifo_is_empty(&list->hid_debug_fifo)) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; } /* if list->hdev is NULL we cannot remove_wait_queue(). * if list->hdev->debug is 0 then hid_debug_unregister() * was already called and list->hdev is being destroyed. * if we add remove_wait_queue() here we can hit a race. */ if (!list->hdev || !list->hdev->debug) { ret = -EIO; set_current_state(TASK_RUNNING); goto out; } if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } /* allow O_NONBLOCK from other threads */ mutex_unlock(&list->read_mutex); schedule(); mutex_lock(&list->read_mutex); set_current_state(TASK_INTERRUPTIBLE); } __set_current_state(TASK_RUNNING); remove_wait_queue(&list->hdev->debug_wait, &wait); if (ret) goto out; } /* pass the fifo content to userspace, locking is not needed with only * one concurrent reader and one concurrent writer */ ret = kfifo_to_user(&list->hid_debug_fifo, buffer, count, &copied); if (ret) goto out; ret = copied; out: mutex_unlock(&list->read_mutex); return ret; } static __poll_t hid_debug_events_poll(struct file *file, poll_table *wait) { struct hid_debug_list *list = file->private_data; poll_wait(file, &list->hdev->debug_wait, wait); if (!kfifo_is_empty(&list->hid_debug_fifo)) return EPOLLIN | EPOLLRDNORM; if (!list->hdev->debug) return EPOLLERR | EPOLLHUP; return 0; } static int hid_debug_events_release(struct inode *inode, struct file *file) { struct hid_debug_list *list = file->private_data; unsigned long flags; spin_lock_irqsave(&list->hdev->debug_list_lock, flags); list_del(&list->node); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); kfifo_free(&list->hid_debug_fifo); kref_put(&list->hdev->ref, hiddev_free); kfree(list); return 0; } DEFINE_SHOW_ATTRIBUTE(hid_debug_rdesc); static const struct file_operations hid_debug_events_fops = { .owner = THIS_MODULE, .open = hid_debug_events_open, .read = hid_debug_events_read, .poll = hid_debug_events_poll, .release = hid_debug_events_release, .llseek = noop_llseek, }; void hid_debug_register(struct hid_device *hdev, const char *name) { hdev->debug_dir = debugfs_create_dir(name, hid_debug_root); hdev->debug_rdesc = debugfs_create_file("rdesc", 0400, hdev->debug_dir, hdev, &hid_debug_rdesc_fops); hdev->debug_events = debugfs_create_file("events", 0400, hdev->debug_dir, hdev, &hid_debug_events_fops); hdev->debug = 1; } void hid_debug_unregister(struct hid_device *hdev) { hdev->debug = 0; wake_up_interruptible(&hdev->debug_wait); debugfs_remove(hdev->debug_rdesc); debugfs_remove(hdev->debug_events); debugfs_remove(hdev->debug_dir); } void hid_debug_init(void) { hid_debug_root = debugfs_create_dir("hid", NULL); } void hid_debug_exit(void) { debugfs_remove_recursive(hid_debug_root); }
1 1 398 398 39 496 386 52 287 297 21 493 494 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 // SPDX-License-Identifier: GPL-2.0 /* * Floating proportions with flexible aging period * * Copyright (C) 2011, SUSE, Jan Kara <jack@suse.cz> * * The goal of this code is: Given different types of event, measure proportion * of each type of event over time. The proportions are measured with * exponentially decaying history to give smooth transitions. A formula * expressing proportion of event of type 'j' is: * * p_{j} = (\Sum_{i>=0} x_{i,j}/2^{i+1})/(\Sum_{i>=0} x_i/2^{i+1}) * * Where x_{i,j} is j's number of events in i-th last time period and x_i is * total number of events in i-th last time period. * * Note that p_{j}'s are normalised, i.e. * * \Sum_{j} p_{j} = 1, * * This formula can be straightforwardly computed by maintaining denominator * (let's call it 'd') and for each event type its numerator (let's call it * 'n_j'). When an event of type 'j' happens, we simply need to do: * n_j++; d++; * * When a new period is declared, we could do: * d /= 2 * for each j * n_j /= 2 * * To avoid iteration over all event types, we instead shift numerator of event * j lazily when someone asks for a proportion of event j or when event j * occurs. This can bit trivially implemented by remembering last period in * which something happened with proportion of type j. */ #include <linux/flex_proportions.h> int fprop_global_init(struct fprop_global *p, gfp_t gfp) { int err; p->period = 0; /* Use 1 to avoid dealing with periods with 0 events... */ err = percpu_counter_init(&p->events, 1, gfp); if (err) return err; seqcount_init(&p->sequence); return 0; } void fprop_global_destroy(struct fprop_global *p) { percpu_counter_destroy(&p->events); } /* * Declare @periods new periods. It is upto the caller to make sure period * transitions cannot happen in parallel. * * The function returns true if the proportions are still defined and false * if aging zeroed out all events. This can be used to detect whether declaring * further periods has any effect. */ bool fprop_new_period(struct fprop_global *p, int periods) { s64 events = percpu_counter_sum(&p->events); /* * Don't do anything if there are no events. */ if (events <= 1) return false; preempt_disable_nested(); write_seqcount_begin(&p->sequence); if (periods < 64) events -= events >> periods; /* Use addition to avoid losing events happening between sum and set */ percpu_counter_add(&p->events, -events); p->period += periods; write_seqcount_end(&p->sequence); preempt_enable_nested(); return true; } /* * ---- PERCPU ---- */ #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp) { int err; err = percpu_counter_init(&pl->events, 0, gfp); if (err) return err; pl->period = 0; raw_spin_lock_init(&pl->lock); return 0; } void fprop_local_destroy_percpu(struct fprop_local_percpu *pl) { percpu_counter_destroy(&pl->events); } static void fprop_reflect_period_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) { unsigned int period = p->period; unsigned long flags; /* Fast path - period didn't change */ if (pl->period == period) return; raw_spin_lock_irqsave(&pl->lock, flags); /* Someone updated pl->period while we were spinning? */ if (pl->period >= period) { raw_spin_unlock_irqrestore(&pl->lock, flags); return; } /* Aging zeroed our fraction? */ if (period - pl->period < BITS_PER_LONG) { s64 val = percpu_counter_read(&pl->events); if (val < (nr_cpu_ids * PROP_BATCH)) val = percpu_counter_sum(&pl->events); percpu_counter_add_batch(&pl->events, -val + (val >> (period-pl->period)), PROP_BATCH); } else percpu_counter_set(&pl->events, 0); pl->period = period; raw_spin_unlock_irqrestore(&pl->lock, flags); } /* Event of type pl happened */ void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, long nr) { fprop_reflect_period_percpu(p, pl); percpu_counter_add_batch(&pl->events, nr, PROP_BATCH); percpu_counter_add(&p->events, nr); } void fprop_fraction_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, unsigned long *numerator, unsigned long *denominator) { unsigned int seq; s64 num, den; do { seq = read_seqcount_begin(&p->sequence); fprop_reflect_period_percpu(p, pl); num = percpu_counter_read_positive(&pl->events); den = percpu_counter_read_positive(&p->events); } while (read_seqcount_retry(&p->sequence, seq)); /* * Make fraction <= 1 and denominator > 0 even in presence of percpu * counter errors */ if (den <= num) { if (num) den = num; else den = 1; } *denominator = den; *numerator = num; } /* * Like __fprop_add_percpu() except that event is counted only if the given * type has fraction smaller than @max_frac/FPROP_FRAC_BASE */ void __fprop_add_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl, int max_frac, long nr) { if (unlikely(max_frac < FPROP_FRAC_BASE)) { unsigned long numerator, denominator; s64 tmp; fprop_fraction_percpu(p, pl, &numerator, &denominator); /* Adding 'nr' to fraction exceeds max_frac/FPROP_FRAC_BASE? */ tmp = (u64)denominator * max_frac - ((u64)numerator << FPROP_FRAC_SHIFT); if (tmp < 0) { /* Maximum fraction already exceeded? */ return; } else if (tmp < nr * (FPROP_FRAC_BASE - max_frac)) { /* Add just enough for the fraction to saturate */ nr = div_u64(tmp + FPROP_FRAC_BASE - max_frac - 1, FPROP_FRAC_BASE - max_frac); } } __fprop_add_percpu(p, pl, nr); }
16 16 12 30 166 81 100 100 100 222 39 26 16 146 48 114 102 60 223 184 146 93 224 224 196 208 224 156 222 224 224 187 92 94 42 19 83 224 211 197 19 212 54 493 433 212 177 212 212 212 172 172 74 107 198 83 211 172 211 26 174 109 212 212 83 306 287 28 28 28 11 17 19 3 5 28 6 22 10 10 3 10 7 7 10 8 7 228 229 228 20 15 1 7 1 53 198 228 228 42 191 77 152 134 94 1 6 2 272 66 6 203 99 27 72 223 572 507 223 82 82 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2002 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_quota.h" #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_error.h" #include "xfs_health.h" STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *); /* * Add the locked dquot to the transaction. * The dquot must be locked, and it cannot be associated with any * transaction. */ void xfs_trans_dqjoin( struct xfs_trans *tp, struct xfs_dquot *dqp) { ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(dqp->q_logitem.qli_dquot == dqp); /* * Get a log_item_desc to point at the new item. */ xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); } /* * This is called to mark the dquot as needing * to be logged when the transaction is committed. The dquot must * already be associated with the given transaction. * Note that it marks the entire transaction as dirty. In the ordinary * case, this gets called via xfs_trans_commit, after the transaction * is already dirty. However, there's nothing stop this from getting * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY * flag. */ void xfs_trans_log_dquot( struct xfs_trans *tp, struct xfs_dquot *dqp) { ASSERT(XFS_DQ_IS_LOCKED(dqp)); /* Upgrade the dquot to bigtime format if possible. */ if (dqp->q_id != 0 && xfs_has_bigtime(tp->t_mountp) && !(dqp->q_type & XFS_DQTYPE_BIGTIME)) dqp->q_type |= XFS_DQTYPE_BIGTIME; tp->t_flags |= XFS_TRANS_DIRTY; set_bit(XFS_LI_DIRTY, &dqp->q_logitem.qli_item.li_flags); } /* * Carry forward whatever is left of the quota blk reservation to * the spanky new transaction */ void xfs_trans_dup_dqinfo( struct xfs_trans *otp, struct xfs_trans *ntp) { struct xfs_dqtrx *oq, *nq; int i, j; struct xfs_dqtrx *oqa, *nqa; uint64_t blk_res_used; if (!otp->t_dqinfo) return; xfs_trans_alloc_dqinfo(ntp); for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { oqa = otp->t_dqinfo->dqs[j]; nqa = ntp->t_dqinfo->dqs[j]; for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { blk_res_used = 0; if (oqa[i].qt_dquot == NULL) break; oq = &oqa[i]; nq = &nqa[i]; if (oq->qt_blk_res && oq->qt_bcount_delta > 0) blk_res_used = oq->qt_bcount_delta; nq->qt_dquot = oq->qt_dquot; nq->qt_bcount_delta = nq->qt_icount_delta = 0; nq->qt_rtbcount_delta = 0; /* * Transfer whatever is left of the reservations. */ nq->qt_blk_res = oq->qt_blk_res - blk_res_used; oq->qt_blk_res = blk_res_used; nq->qt_rtblk_res = oq->qt_rtblk_res - oq->qt_rtblk_res_used; oq->qt_rtblk_res = oq->qt_rtblk_res_used; nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used; oq->qt_ino_res = oq->qt_ino_res_used; } } } #ifdef CONFIG_XFS_LIVE_HOOKS /* * Use a static key here to reduce the overhead of quota live updates. If the * compiler supports jump labels, the static branch will be replaced by a nop * sled when there are no hook users. Online fsck is currently the only * caller, so this is a reasonable tradeoff. * * Note: Patching the kernel code requires taking the cpu hotplug lock. Other * parts of the kernel allocate memory with that lock held, which means that * XFS callers cannot hold any locks that might be used by memory reclaim or * writeback when calling the static_branch_{inc,dec} functions. */ DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dqtrx_hooks_switch); void xfs_dqtrx_hook_disable(void) { xfs_hooks_switch_off(&xfs_dqtrx_hooks_switch); } void xfs_dqtrx_hook_enable(void) { xfs_hooks_switch_on(&xfs_dqtrx_hooks_switch); } /* Schedule a transactional dquot update on behalf of an inode. */ void xfs_trans_mod_ino_dquot( struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_dquot *dqp, unsigned int field, int64_t delta) { if (xfs_is_metadir_inode(ip)) return; xfs_trans_mod_dquot(tp, dqp, field, delta); if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) { struct xfs_mod_ino_dqtrx_params p = { .tx_id = (uintptr_t)tp, .ino = ip->i_ino, .q_type = xfs_dquot_type(dqp), .q_id = dqp->q_id, .delta = delta }; struct xfs_quotainfo *qi = tp->t_mountp->m_quotainfo; xfs_hooks_call(&qi->qi_mod_ino_dqtrx_hooks, field, &p); } } /* Call the specified functions during a dquot counter update. */ int xfs_dqtrx_hook_add( struct xfs_quotainfo *qi, struct xfs_dqtrx_hook *hook) { int error; /* * Transactional dquot updates first call the mod hook when changes * are attached to the transaction and then call the apply hook when * those changes are committed (or canceled). * * The apply hook must be installed before the mod hook so that we * never fail to catch the end of a quota update sequence. */ error = xfs_hooks_add(&qi->qi_apply_dqtrx_hooks, &hook->apply_hook); if (error) goto out; error = xfs_hooks_add(&qi->qi_mod_ino_dqtrx_hooks, &hook->mod_hook); if (error) goto out_apply; return 0; out_apply: xfs_hooks_del(&qi->qi_apply_dqtrx_hooks, &hook->apply_hook); out: return error; } /* Stop calling the specified function during a dquot counter update. */ void xfs_dqtrx_hook_del( struct xfs_quotainfo *qi, struct xfs_dqtrx_hook *hook) { /* * The mod hook must be removed before apply hook to avoid giving the * hook consumer with an incomplete update. No hooks should be running * after these functions return. */ xfs_hooks_del(&qi->qi_mod_ino_dqtrx_hooks, &hook->mod_hook); xfs_hooks_del(&qi->qi_apply_dqtrx_hooks, &hook->apply_hook); } /* Configure dquot update hook functions. */ void xfs_dqtrx_hook_setup( struct xfs_dqtrx_hook *hook, notifier_fn_t mod_fn, notifier_fn_t apply_fn) { xfs_hook_setup(&hook->mod_hook, mod_fn); xfs_hook_setup(&hook->apply_hook, apply_fn); } #endif /* CONFIG_XFS_LIVE_HOOKS */ /* * Wrap around mod_dquot to account for both user and group quotas. */ void xfs_trans_mod_dquot_byino( xfs_trans_t *tp, xfs_inode_t *ip, uint field, int64_t delta) { xfs_mount_t *mp = tp->t_mountp; if (!XFS_IS_QUOTA_ON(mp) || xfs_is_quota_inode(&mp->m_sb, ip->i_ino) || xfs_is_metadir_inode(ip)) return; if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) xfs_trans_mod_ino_dquot(tp, ip, ip->i_udquot, field, delta); if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) xfs_trans_mod_ino_dquot(tp, ip, ip->i_gdquot, field, delta); if (XFS_IS_PQUOTA_ON(mp) && ip->i_pdquot) xfs_trans_mod_ino_dquot(tp, ip, ip->i_pdquot, field, delta); } STATIC struct xfs_dqtrx * xfs_trans_get_dqtrx( struct xfs_trans *tp, struct xfs_dquot *dqp) { int i; struct xfs_dqtrx *qa; switch (xfs_dquot_type(dqp)) { case XFS_DQTYPE_USER: qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_USR]; break; case XFS_DQTYPE_GROUP: qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_GRP]; break; case XFS_DQTYPE_PROJ: qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_PRJ]; break; default: return NULL; } for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { if (qa[i].qt_dquot == NULL || qa[i].qt_dquot == dqp) return &qa[i]; } return NULL; } /* * Make the changes in the transaction structure. * The moral equivalent to xfs_trans_mod_sb(). * We don't touch any fields in the dquot, so we don't care * if it's locked or not (most of the time it won't be). */ void xfs_trans_mod_dquot( struct xfs_trans *tp, struct xfs_dquot *dqp, uint field, int64_t delta) { struct xfs_dqtrx *qtrx; ASSERT(tp); ASSERT(XFS_IS_QUOTA_ON(tp->t_mountp)); qtrx = NULL; if (!delta) return; if (tp->t_dqinfo == NULL) xfs_trans_alloc_dqinfo(tp); /* * Find either the first free slot or the slot that belongs * to this dquot. */ qtrx = xfs_trans_get_dqtrx(tp, dqp); ASSERT(qtrx); if (qtrx->qt_dquot == NULL) qtrx->qt_dquot = dqp; trace_xfs_trans_mod_dquot_before(qtrx); trace_xfs_trans_mod_dquot(tp, dqp, field, delta); switch (field) { /* regular disk blk reservation */ case XFS_TRANS_DQ_RES_BLKS: qtrx->qt_blk_res += delta; break; /* inode reservation */ case XFS_TRANS_DQ_RES_INOS: qtrx->qt_ino_res += delta; break; /* disk blocks used. */ case XFS_TRANS_DQ_BCOUNT: qtrx->qt_bcount_delta += delta; break; case XFS_TRANS_DQ_DELBCOUNT: qtrx->qt_delbcnt_delta += delta; break; /* Inode Count */ case XFS_TRANS_DQ_ICOUNT: if (qtrx->qt_ino_res && delta > 0) { qtrx->qt_ino_res_used += delta; ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used); } qtrx->qt_icount_delta += delta; break; /* rtblk reservation */ case XFS_TRANS_DQ_RES_RTBLKS: qtrx->qt_rtblk_res += delta; break; /* rtblk count */ case XFS_TRANS_DQ_RTBCOUNT: if (qtrx->qt_rtblk_res && delta > 0) { qtrx->qt_rtblk_res_used += delta; ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used); } qtrx->qt_rtbcount_delta += delta; break; case XFS_TRANS_DQ_DELRTBCOUNT: qtrx->qt_delrtb_delta += delta; break; default: ASSERT(0); } trace_xfs_trans_mod_dquot_after(qtrx); } /* * Given an array of dqtrx structures, lock all the dquots associated and join * them to the transaction, provided they have been modified. */ STATIC void xfs_trans_dqlockedjoin( struct xfs_trans *tp, struct xfs_dqtrx *q) { unsigned int i; ASSERT(q[0].qt_dquot != NULL); if (q[1].qt_dquot == NULL) { xfs_dqlock(q[0].qt_dquot); xfs_trans_dqjoin(tp, q[0].qt_dquot); } else if (q[2].qt_dquot == NULL) { xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot); xfs_trans_dqjoin(tp, q[0].qt_dquot); xfs_trans_dqjoin(tp, q[1].qt_dquot); } else { xfs_dqlockn(q); for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { if (q[i].qt_dquot == NULL) break; xfs_trans_dqjoin(tp, q[i].qt_dquot); } } } /* Apply dqtrx changes to the quota reservation counters. */ static inline void xfs_apply_quota_reservation_deltas( struct xfs_dquot_res *res, uint64_t reserved, int64_t res_used, int64_t count_delta) { if (reserved != 0) { /* * Subtle math here: If reserved > res_used (the normal case), * we're simply subtracting the unused transaction quota * reservation from the dquot reservation. * * If, however, res_used > reserved, then we have allocated * more quota blocks than were reserved for the transaction. * We must add that excess to the dquot reservation since it * tracks (usage + resv) and by definition we didn't reserve * that excess. */ res->reserved -= abs(reserved - res_used); } else if (count_delta != 0) { /* * These blks were never reserved, either inside a transaction * or outside one (in a delayed allocation). Also, this isn't * always a negative number since we sometimes deliberately * skip quota reservations. */ res->reserved += count_delta; } } #ifdef CONFIG_XFS_LIVE_HOOKS /* Call downstream hooks now that it's time to apply dquot deltas. */ static inline void xfs_trans_apply_dquot_deltas_hook( struct xfs_trans *tp, struct xfs_dquot *dqp) { if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) { struct xfs_apply_dqtrx_params p = { .tx_id = (uintptr_t)tp, .q_type = xfs_dquot_type(dqp), .q_id = dqp->q_id, }; struct xfs_quotainfo *qi = tp->t_mountp->m_quotainfo; xfs_hooks_call(&qi->qi_apply_dqtrx_hooks, XFS_APPLY_DQTRX_COMMIT, &p); } } #else # define xfs_trans_apply_dquot_deltas_hook(tp, dqp) ((void)0) #endif /* CONFIG_XFS_LIVE_HOOKS */ /* * Called by xfs_trans_commit() and similar in spirit to * xfs_trans_apply_sb_deltas(). * Go thru all the dquots belonging to this transaction and modify the * INCORE dquot to reflect the actual usages. * Unreserve just the reservations done by this transaction. * dquot is still left locked at exit. */ void xfs_trans_apply_dquot_deltas( struct xfs_trans *tp) { int i, j; struct xfs_dquot *dqp; struct xfs_dqtrx *qtrx, *qa; int64_t totalbdelta; int64_t totalrtbdelta; if (!tp->t_dqinfo) return; ASSERT(tp->t_dqinfo); for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { qa = tp->t_dqinfo->dqs[j]; if (qa[0].qt_dquot == NULL) continue; /* * Lock all of the dquots and join them to the transaction. */ xfs_trans_dqlockedjoin(tp, qa); for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { uint64_t blk_res_used; qtrx = &qa[i]; /* * The array of dquots is filled * sequentially, not sparsely. */ if ((dqp = qtrx->qt_dquot) == NULL) break; ASSERT(XFS_DQ_IS_LOCKED(dqp)); xfs_trans_apply_dquot_deltas_hook(tp, dqp); /* * adjust the actual number of blocks used */ /* * The issue here is - sometimes we don't make a blkquota * reservation intentionally to be fair to users * (when the amount is small). On the other hand, * delayed allocs do make reservations, but that's * outside of a transaction, so we have no * idea how much was really reserved. * So, here we've accumulated delayed allocation blks and * non-delay blks. The assumption is that the * delayed ones are always reserved (outside of a * transaction), and the others may or may not have * quota reservations. */ totalbdelta = qtrx->qt_bcount_delta + qtrx->qt_delbcnt_delta; totalrtbdelta = qtrx->qt_rtbcount_delta + qtrx->qt_delrtb_delta; if (totalbdelta != 0 || totalrtbdelta != 0 || qtrx->qt_icount_delta != 0) { trace_xfs_trans_apply_dquot_deltas_before(dqp); trace_xfs_trans_apply_dquot_deltas(qtrx); } #ifdef DEBUG if (totalbdelta < 0) ASSERT(dqp->q_blk.count >= -totalbdelta); if (totalrtbdelta < 0) ASSERT(dqp->q_rtb.count >= -totalrtbdelta); if (qtrx->qt_icount_delta < 0) ASSERT(dqp->q_ino.count >= -qtrx->qt_icount_delta); #endif if (totalbdelta) dqp->q_blk.count += totalbdelta; if (qtrx->qt_icount_delta) dqp->q_ino.count += qtrx->qt_icount_delta; if (totalrtbdelta) dqp->q_rtb.count += totalrtbdelta; if (totalbdelta != 0 || totalrtbdelta != 0 || qtrx->qt_icount_delta != 0) trace_xfs_trans_apply_dquot_deltas_after(dqp); /* * Get any default limits in use. * Start/reset the timer(s) if needed. */ if (dqp->q_id) { xfs_qm_adjust_dqlimits(dqp); xfs_qm_adjust_dqtimers(dqp); } dqp->q_flags |= XFS_DQFLAG_DIRTY; /* * add this to the list of items to get logged */ xfs_trans_log_dquot(tp, dqp); /* * Take off what's left of the original reservation. * In case of delayed allocations, there's no * reservation that a transaction structure knows of. */ blk_res_used = max_t(int64_t, 0, qtrx->qt_bcount_delta); xfs_apply_quota_reservation_deltas(&dqp->q_blk, qtrx->qt_blk_res, blk_res_used, qtrx->qt_bcount_delta); /* * Adjust the RT reservation. */ xfs_apply_quota_reservation_deltas(&dqp->q_rtb, qtrx->qt_rtblk_res, qtrx->qt_rtblk_res_used, qtrx->qt_rtbcount_delta); /* * Adjust the inode reservation. */ ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used); xfs_apply_quota_reservation_deltas(&dqp->q_ino, qtrx->qt_ino_res, qtrx->qt_ino_res_used, qtrx->qt_icount_delta); ASSERT(dqp->q_blk.reserved >= dqp->q_blk.count); ASSERT(dqp->q_ino.reserved >= dqp->q_ino.count); ASSERT(dqp->q_rtb.reserved >= dqp->q_rtb.count); /* * We've applied the count changes and given back * whatever reservation we didn't use. Zero out the * dqtrx fields. */ qtrx->qt_blk_res = 0; qtrx->qt_bcount_delta = 0; qtrx->qt_delbcnt_delta = 0; qtrx->qt_rtblk_res = 0; qtrx->qt_rtblk_res_used = 0; qtrx->qt_rtbcount_delta = 0; qtrx->qt_delrtb_delta = 0; qtrx->qt_ino_res = 0; qtrx->qt_ino_res_used = 0; qtrx->qt_icount_delta = 0; } } } #ifdef CONFIG_XFS_LIVE_HOOKS /* Call downstream hooks now that it's time to cancel dquot deltas. */ static inline void xfs_trans_unreserve_and_mod_dquots_hook( struct xfs_trans *tp, struct xfs_dquot *dqp) { if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) { struct xfs_apply_dqtrx_params p = { .tx_id = (uintptr_t)tp, .q_type = xfs_dquot_type(dqp), .q_id = dqp->q_id, }; struct xfs_quotainfo *qi = tp->t_mountp->m_quotainfo; xfs_hooks_call(&qi->qi_apply_dqtrx_hooks, XFS_APPLY_DQTRX_UNRESERVE, &p); } } #else # define xfs_trans_unreserve_and_mod_dquots_hook(tp, dqp) ((void)0) #endif /* CONFIG_XFS_LIVE_HOOKS */ /* * Release the reservations, and adjust the dquots accordingly. * This is called only when the transaction is being aborted. If by * any chance we have done dquot modifications incore (ie. deltas) already, * we simply throw those away, since that's the expected behavior * when a transaction is curtailed without a commit. */ void xfs_trans_unreserve_and_mod_dquots( struct xfs_trans *tp, bool already_locked) { int i, j; struct xfs_dquot *dqp; struct xfs_dqtrx *qtrx, *qa; bool locked; if (!tp->t_dqinfo) return; for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { qa = tp->t_dqinfo->dqs[j]; for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { qtrx = &qa[i]; /* * We assume that the array of dquots is filled * sequentially, not sparsely. */ if ((dqp = qtrx->qt_dquot) == NULL) break; xfs_trans_unreserve_and_mod_dquots_hook(tp, dqp); /* * Unreserve the original reservation. We don't care * about the number of blocks used field, or deltas. * Also we don't bother to zero the fields. */ locked = already_locked; if (qtrx->qt_blk_res) { if (!locked) { xfs_dqlock(dqp); locked = true; } dqp->q_blk.reserved -= (xfs_qcnt_t)qtrx->qt_blk_res; } if (qtrx->qt_ino_res) { if (!locked) { xfs_dqlock(dqp); locked = true; } dqp->q_ino.reserved -= (xfs_qcnt_t)qtrx->qt_ino_res; } if (qtrx->qt_rtblk_res) { if (!locked) { xfs_dqlock(dqp); locked = true; } dqp->q_rtb.reserved -= (xfs_qcnt_t)qtrx->qt_rtblk_res; } if (locked && !already_locked) xfs_dqunlock(dqp); } } } STATIC void xfs_quota_warn( struct xfs_mount *mp, struct xfs_dquot *dqp, int type) { enum quota_type qtype; switch (xfs_dquot_type(dqp)) { case XFS_DQTYPE_PROJ: qtype = PRJQUOTA; break; case XFS_DQTYPE_USER: qtype = USRQUOTA; break; case XFS_DQTYPE_GROUP: qtype = GRPQUOTA; break; default: return; } quota_send_warning(make_kqid(&init_user_ns, qtype, dqp->q_id), mp->m_super->s_dev, type); } /* * Decide if we can make an additional reservation against a quota resource. * Returns an inode QUOTA_NL_ warning code and whether or not it's fatal. * * Note that we assume that the numeric difference between the inode and block * warning codes will always be 3 since it's userspace ABI now, and will never * decrease the quota reservation, so the *BELOW messages are irrelevant. */ static inline int xfs_dqresv_check( struct xfs_dquot_res *res, struct xfs_quota_limits *qlim, int64_t delta, bool *fatal) { xfs_qcnt_t hardlimit = res->hardlimit; xfs_qcnt_t softlimit = res->softlimit; xfs_qcnt_t total_count = res->reserved + delta; BUILD_BUG_ON(QUOTA_NL_BHARDWARN != QUOTA_NL_IHARDWARN + 3); BUILD_BUG_ON(QUOTA_NL_BSOFTLONGWARN != QUOTA_NL_ISOFTLONGWARN + 3); BUILD_BUG_ON(QUOTA_NL_BSOFTWARN != QUOTA_NL_ISOFTWARN + 3); *fatal = false; if (delta <= 0) return QUOTA_NL_NOWARN; if (!hardlimit) hardlimit = qlim->hard; if (!softlimit) softlimit = qlim->soft; if (hardlimit && total_count > hardlimit) { *fatal = true; return QUOTA_NL_IHARDWARN; } if (softlimit && total_count > softlimit) { time64_t now = ktime_get_real_seconds(); if (res->timer != 0 && now > res->timer) { *fatal = true; return QUOTA_NL_ISOFTLONGWARN; } return QUOTA_NL_ISOFTWARN; } return QUOTA_NL_NOWARN; } /* * This reserves disk blocks and inodes against a dquot. * Flags indicate if the dquot is to be locked here and also * if the blk reservation is for RT or regular blocks. * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check. */ STATIC int xfs_trans_dqresv( struct xfs_trans *tp, struct xfs_mount *mp, struct xfs_dquot *dqp, int64_t nblks, long ninos, uint flags) { struct xfs_quotainfo *q = mp->m_quotainfo; struct xfs_def_quota *defq; struct xfs_dquot_res *blkres; struct xfs_quota_limits *qlim; xfs_dqlock(dqp); defq = xfs_get_defquota(q, xfs_dquot_type(dqp)); if (flags & XFS_TRANS_DQ_RES_BLKS) { blkres = &dqp->q_blk; qlim = &defq->blk; } else { blkres = &dqp->q_rtb; qlim = &defq->rtb; } if ((flags & XFS_QMOPT_FORCE_RES) == 0 && dqp->q_id && xfs_dquot_is_enforced(dqp)) { int quota_nl; bool fatal; /* * dquot is locked already. See if we'd go over the hardlimit * or exceed the timelimit if we'd reserve resources. */ quota_nl = xfs_dqresv_check(blkres, qlim, nblks, &fatal); if (quota_nl != QUOTA_NL_NOWARN) { /* * Quota block warning codes are 3 more than the inode * codes, which we check above. */ xfs_quota_warn(mp, dqp, quota_nl + 3); if (fatal) goto error_return; } quota_nl = xfs_dqresv_check(&dqp->q_ino, &defq->ino, ninos, &fatal); if (quota_nl != QUOTA_NL_NOWARN) { xfs_quota_warn(mp, dqp, quota_nl); if (fatal) goto error_return; } } /* * Change the reservation, but not the actual usage. * Note that q_blk.reserved = q_blk.count + resv */ blkres->reserved += (xfs_qcnt_t)nblks; dqp->q_ino.reserved += (xfs_qcnt_t)ninos; /* * note the reservation amt in the trans struct too, * so that the transaction knows how much was reserved by * it against this particular dquot. * We don't do this when we are reserving for a delayed allocation, * because we don't have the luxury of a transaction envelope then. */ if (tp) { ASSERT(flags & XFS_QMOPT_RESBLK_MASK); xfs_trans_mod_dquot(tp, dqp, flags & XFS_QMOPT_RESBLK_MASK, nblks); xfs_trans_mod_dquot(tp, dqp, XFS_TRANS_DQ_RES_INOS, ninos); } if (XFS_IS_CORRUPT(mp, dqp->q_blk.reserved < dqp->q_blk.count) || XFS_IS_CORRUPT(mp, dqp->q_rtb.reserved < dqp->q_rtb.count) || XFS_IS_CORRUPT(mp, dqp->q_ino.reserved < dqp->q_ino.count)) goto error_corrupt; xfs_dqunlock(dqp); return 0; error_return: xfs_dqunlock(dqp); if (xfs_dquot_type(dqp) == XFS_DQTYPE_PROJ) return -ENOSPC; return -EDQUOT; error_corrupt: xfs_dqunlock(dqp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_fs_mark_sick(mp, XFS_SICK_FS_QUOTACHECK); return -EFSCORRUPTED; } /* * Given dquot(s), make disk block and/or inode reservations against them. * The fact that this does the reservation against user, group and * project quotas is important, because this follows a all-or-nothing * approach. * * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota. * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks * dquots are unlocked on return, if they were not locked by caller. */ int xfs_trans_reserve_quota_bydquots( struct xfs_trans *tp, struct xfs_mount *mp, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, int64_t nblks, long ninos, uint flags) { int error; if (!XFS_IS_QUOTA_ON(mp)) return 0; ASSERT(flags & XFS_QMOPT_RESBLK_MASK); if (udqp) { error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, flags); if (error) return error; } if (gdqp) { error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); if (error) goto unwind_usr; } if (pdqp) { error = xfs_trans_dqresv(tp, mp, pdqp, nblks, ninos, flags); if (error) goto unwind_grp; } /* * Didn't change anything critical, so, no need to log */ return 0; unwind_grp: flags |= XFS_QMOPT_FORCE_RES; if (gdqp) xfs_trans_dqresv(tp, mp, gdqp, -nblks, -ninos, flags); unwind_usr: flags |= XFS_QMOPT_FORCE_RES; if (udqp) xfs_trans_dqresv(tp, mp, udqp, -nblks, -ninos, flags); return error; } /* * Lock the dquot and change the reservation if we can. * This doesn't change the actual usage, just the reservation. * The inode sent in is locked. */ int xfs_trans_reserve_quota_nblks( struct xfs_trans *tp, struct xfs_inode *ip, int64_t dblocks, int64_t rblocks, bool force) { struct xfs_mount *mp = ip->i_mount; unsigned int qflags = 0; int error; if (!XFS_IS_QUOTA_ON(mp)) return 0; if (xfs_is_metadir_inode(ip)) return 0; ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino)); xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); if (force) qflags |= XFS_QMOPT_FORCE_RES; /* Reserve data device quota against the inode's dquots. */ error = xfs_trans_reserve_quota_bydquots(tp, mp, ip->i_udquot, ip->i_gdquot, ip->i_pdquot, dblocks, 0, XFS_QMOPT_RES_REGBLKS | qflags); if (error) return error; /* Do the same but for realtime blocks. */ error = xfs_trans_reserve_quota_bydquots(tp, mp, ip->i_udquot, ip->i_gdquot, ip->i_pdquot, rblocks, 0, XFS_QMOPT_RES_RTBLKS | qflags); if (error) { xfs_trans_reserve_quota_bydquots(tp, mp, ip->i_udquot, ip->i_gdquot, ip->i_pdquot, -dblocks, 0, XFS_QMOPT_RES_REGBLKS); return error; } return 0; } /* Change the quota reservations for an inode creation activity. */ int xfs_trans_reserve_quota_icreate( struct xfs_trans *tp, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, int64_t dblocks) { struct xfs_mount *mp = tp->t_mountp; if (!XFS_IS_QUOTA_ON(mp)) return 0; return xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp, pdqp, dblocks, 1, XFS_QMOPT_RES_REGBLKS); } STATIC void xfs_trans_alloc_dqinfo( xfs_trans_t *tp) { tp->t_dqinfo = kmem_cache_zalloc(xfs_dqtrx_cache, GFP_KERNEL | __GFP_NOFAIL); } void xfs_trans_free_dqinfo( xfs_trans_t *tp) { if (!tp->t_dqinfo) return; kmem_cache_free(xfs_dqtrx_cache, tp->t_dqinfo); tp->t_dqinfo = NULL; } int xfs_quota_reserve_blkres( struct xfs_inode *ip, int64_t blocks) { if (XFS_IS_REALTIME_INODE(ip)) return xfs_trans_reserve_quota_nblks(NULL, ip, 0, blocks, false); return xfs_trans_reserve_quota_nblks(NULL, ip, blocks, 0, false); }
43 15 162 162 162 162 2 162 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_REBALANCE_H #define _BCACHEFS_REBALANCE_H #include "compress.h" #include "disk_groups.h" #include "opts.h" #include "rebalance_types.h" static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_fs *c, struct bch_io_opts *opts) { struct bch_extent_rebalance r = { .type = BIT(BCH_EXTENT_ENTRY_rebalance), #define x(_name) \ ._name = opts->_name, \ ._name##_from_inode = opts->_name##_from_inode, BCH_REBALANCE_OPTS() #undef x }; if (r.background_target && !bch2_target_accepts_data(c, BCH_DATA_user, r.background_target)) r.background_target = 0; return r; }; u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); int bch2_get_update_rebalance_opts(struct btree_trans *, struct bch_io_opts *, struct btree_iter *, struct bkey_s_c); int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); int bch2_set_fs_needs_rebalance(struct bch_fs *); static inline void bch2_rebalance_wakeup(struct bch_fs *c) { c->rebalance.kick++; guard(rcu)(); struct task_struct *p = rcu_dereference(c->rebalance.thread); if (p) wake_up_process(p); } void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *); void bch2_rebalance_stop(struct bch_fs *); int bch2_rebalance_start(struct bch_fs *); void bch2_fs_rebalance_exit(struct bch_fs *); int bch2_fs_rebalance_init(struct bch_fs *); int bch2_check_rebalance_work(struct bch_fs *); #endif /* _BCACHEFS_REBALANCE_H */
1 1063 1062 163 164 164 164 800 7714 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com> * * Author: * Casey Schaufler <casey@schaufler-ca.com> */ #ifndef _SECURITY_SMACK_H #define _SECURITY_SMACK_H #include <linux/capability.h> #include <linux/spinlock.h> #include <linux/lsm_hooks.h> #include <linux/in.h> #if IS_ENABLED(CONFIG_IPV6) #include <linux/in6.h> #endif /* CONFIG_IPV6 */ #include <net/netlabel.h> #include <linux/list.h> #include <linux/rculist.h> #include <linux/lsm_audit.h> #include <linux/msg.h> /* * Use IPv6 port labeling if IPv6 is enabled and secmarks * are not being used. */ #if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER) #define SMACK_IPV6_PORT_LABELING 1 #endif #if IS_ENABLED(CONFIG_IPV6) && defined(CONFIG_SECURITY_SMACK_NETFILTER) #define SMACK_IPV6_SECMARK_LABELING 1 #endif /* * Smack labels were limited to 23 characters for a long time. */ #define SMK_LABELLEN 24 #define SMK_LONGLABEL 256 /* * This is the repository for labels seen so that it is * not necessary to keep allocating tiny chunks of memory * and so that they can be shared. * * Labels are never modified in place. Anytime a label * is imported (e.g. xattrset on a file) the list is checked * for it and it is added if it doesn't exist. The address * is passed out in either case. Entries are added, but * never deleted. * * Since labels are hanging around anyway it doesn't * hurt to maintain a secid for those awkward situations * where kernel components that ought to use LSM independent * interfaces don't. The secid should go away when all of * these components have been repaired. * * The cipso value associated with the label gets stored here, too. * * Keep the access rules for this subject label here so that * the entire set of rules does not need to be examined every * time. */ struct smack_known { struct list_head list; struct hlist_node smk_hashed; char *smk_known; u32 smk_secid; struct netlbl_lsm_secattr smk_netlabel; /* on wire labels */ struct list_head smk_rules; /* access rules */ struct mutex smk_rules_lock; /* lock for rules */ }; /* * Maximum number of bytes for the levels in a CIPSO IP option. * Why 23? CIPSO is constrained to 30, so a 32 byte buffer is * bigger than can be used, and 24 is the next lower multiple * of 8, and there are too many issues if there isn't space set * aside for the terminating null byte. */ #define SMK_CIPSOLEN 24 struct superblock_smack { struct smack_known *smk_root; struct smack_known *smk_floor; struct smack_known *smk_hat; struct smack_known *smk_default; int smk_flags; }; /* * Superblock flags */ #define SMK_SB_INITIALIZED 0x01 #define SMK_SB_UNTRUSTED 0x02 struct socket_smack { struct smack_known *smk_out; /* outbound label */ struct smack_known *smk_in; /* inbound label */ struct smack_known *smk_packet; /* TCP peer label */ int smk_state; /* netlabel socket states */ }; #define SMK_NETLBL_UNSET 0 #define SMK_NETLBL_UNLABELED 1 #define SMK_NETLBL_LABELED 2 #define SMK_NETLBL_REQSKB 3 /* * Inode smack data */ struct inode_smack { struct smack_known *smk_inode; /* label of the fso */ struct smack_known *smk_task; /* label of the task */ struct smack_known *smk_mmap; /* label of the mmap domain */ int smk_flags; /* smack inode flags */ }; struct task_smack { struct smack_known *smk_task; /* label for access control */ struct smack_known *smk_forked; /* label when forked */ struct smack_known *smk_transmuted;/* label when transmuted */ struct list_head smk_rules; /* per task access rules */ struct mutex smk_rules_lock; /* lock for the rules */ struct list_head smk_relabel; /* transit allowed labels */ }; #define SMK_INODE_INSTANT 0x01 /* inode is instantiated */ #define SMK_INODE_TRANSMUTE 0x02 /* directory is transmuting */ #define SMK_INODE_CHANGED 0x04 /* smack was transmuted (unused) */ #define SMK_INODE_IMPURE 0x08 /* involved in an impure transaction */ /* * A label access rule. */ struct smack_rule { struct list_head list; struct smack_known *smk_subject; struct smack_known *smk_object; int smk_access; }; /* * An entry in the table identifying IPv4 hosts. */ struct smk_net4addr { struct list_head list; struct in_addr smk_host; /* network address */ struct in_addr smk_mask; /* network mask */ int smk_masks; /* mask size */ struct smack_known *smk_label; /* label */ }; #if IS_ENABLED(CONFIG_IPV6) /* * An entry in the table identifying IPv6 hosts. */ struct smk_net6addr { struct list_head list; struct in6_addr smk_host; /* network address */ struct in6_addr smk_mask; /* network mask */ int smk_masks; /* mask size */ struct smack_known *smk_label; /* label */ }; #endif /* CONFIG_IPV6 */ #ifdef SMACK_IPV6_PORT_LABELING /* * An entry in the table identifying ports. */ struct smk_port_label { struct list_head list; struct sock *smk_sock; /* socket initialized on */ unsigned short smk_port; /* the port number */ struct smack_known *smk_in; /* inbound label */ struct smack_known *smk_out; /* outgoing label */ short smk_sock_type; /* Socket type */ short smk_can_reuse; }; #endif /* SMACK_IPV6_PORT_LABELING */ struct smack_known_list_elem { struct list_head list; struct smack_known *smk_label; }; enum { Opt_error = -1, Opt_fsdefault = 0, Opt_fsfloor = 1, Opt_fshat = 2, Opt_fsroot = 3, Opt_fstransmute = 4, }; #define SMACK_DELETE_OPTION "-DELETE" #define SMACK_CIPSO_OPTION "-CIPSO" /* * CIPSO defaults. */ #define SMACK_CIPSO_DOI_DEFAULT 3 /* Historical */ #define SMACK_CIPSO_DOI_INVALID -1 /* Not a DOI */ #define SMACK_CIPSO_DIRECT_DEFAULT 250 /* Arbitrary */ #define SMACK_CIPSO_MAPPED_DEFAULT 251 /* Also arbitrary */ #define SMACK_CIPSO_MAXLEVEL 255 /* CIPSO 2.2 standard */ /* * CIPSO 2.2 standard is 239, but Smack wants to use the * categories in a structured way that limits the value to * the bits in 23 bytes, hence the unusual number. */ #define SMACK_CIPSO_MAXCATNUM 184 /* 23 * 8 */ /* * Ptrace rules */ #define SMACK_PTRACE_DEFAULT 0 #define SMACK_PTRACE_EXACT 1 #define SMACK_PTRACE_DRACONIAN 2 #define SMACK_PTRACE_MAX SMACK_PTRACE_DRACONIAN /* * Flags for untraditional access modes. * It shouldn't be necessary to avoid conflicts with definitions * in fs.h, but do so anyway. */ #define MAY_TRANSMUTE 0x00001000 /* Controls directory labeling */ #define MAY_LOCK 0x00002000 /* Locks should be writes, but ... */ #define MAY_BRINGUP 0x00004000 /* Report use of this rule */ /* * The policy for delivering signals is configurable. * It is usually "write", but can be "append". */ #ifdef CONFIG_SECURITY_SMACK_APPEND_SIGNALS #define MAY_DELIVER MAY_APPEND /* Signal delivery requires append */ #else #define MAY_DELIVER MAY_WRITE /* Signal delivery requires write */ #endif #define SMACK_BRINGUP_ALLOW 1 /* Allow bringup mode */ #define SMACK_UNCONFINED_SUBJECT 2 /* Allow unconfined label */ #define SMACK_UNCONFINED_OBJECT 3 /* Allow unconfined label */ /* * Just to make the common cases easier to deal with */ #define MAY_ANYREAD (MAY_READ | MAY_EXEC) #define MAY_READWRITE (MAY_READ | MAY_WRITE) #define MAY_NOT 0 /* * Number of access types used by Smack (rwxatlb) */ #define SMK_NUM_ACCESS_TYPE 7 /* SMACK data */ struct smack_audit_data { const char *function; char *subject; char *object; char *request; int result; }; /* * Smack audit data; is empty if CONFIG_AUDIT not set * to save some stack */ struct smk_audit_info { #ifdef CONFIG_AUDIT struct common_audit_data a; struct smack_audit_data sad; #endif }; /* * These functions are in smack_access.c */ int smk_access_entry(char *, char *, struct list_head *); int smk_access(struct smack_known *, struct smack_known *, int, struct smk_audit_info *); int smk_tskacc(struct task_smack *, struct smack_known *, u32, struct smk_audit_info *); int smk_curacc(struct smack_known *, u32, struct smk_audit_info *); int smack_str_from_perm(char *string, int access); struct smack_known *smack_from_secid(const u32); char *smk_parse_smack(const char *string, int len); int smk_netlbl_mls(int, char *, struct netlbl_lsm_secattr *, int); struct smack_known *smk_import_entry(const char *, int); void smk_insert_entry(struct smack_known *skp); struct smack_known *smk_find_entry(const char *); bool smack_privileged(int cap); bool smack_privileged_cred(int cap, const struct cred *cred); void smk_destroy_label_list(struct list_head *list); int smack_populate_secattr(struct smack_known *skp); /* * Shared data. */ extern int smack_enabled __initdata; extern int smack_cipso_direct; extern int smack_cipso_mapped; extern struct smack_known *smack_net_ambient; extern struct smack_known *smack_syslog_label; #ifdef CONFIG_SECURITY_SMACK_BRINGUP extern struct smack_known *smack_unconfined; #endif extern int smack_ptrace_rule; extern struct lsm_blob_sizes smack_blob_sizes; extern struct smack_known smack_known_floor; extern struct smack_known smack_known_hat; extern struct smack_known smack_known_huh; extern struct smack_known smack_known_star; extern struct smack_known smack_known_web; extern struct mutex smack_known_lock; extern struct list_head smack_known_list; extern struct list_head smk_net4addr_list; #if IS_ENABLED(CONFIG_IPV6) extern struct list_head smk_net6addr_list; #endif /* CONFIG_IPV6 */ extern struct mutex smack_onlycap_lock; extern struct list_head smack_onlycap_list; #define SMACK_HASH_SLOTS 16 extern struct hlist_head smack_known_hash[SMACK_HASH_SLOTS]; extern struct kmem_cache *smack_rule_cache; static inline struct task_smack *smack_cred(const struct cred *cred) { return cred->security + smack_blob_sizes.lbs_cred; } static inline struct smack_known **smack_file(const struct file *file) { return (struct smack_known **)(file->f_security + smack_blob_sizes.lbs_file); } static inline struct inode_smack *smack_inode(const struct inode *inode) { return inode->i_security + smack_blob_sizes.lbs_inode; } static inline struct smack_known **smack_msg_msg(const struct msg_msg *msg) { return msg->security + smack_blob_sizes.lbs_msg_msg; } static inline struct smack_known **smack_ipc(const struct kern_ipc_perm *ipc) { return ipc->security + smack_blob_sizes.lbs_ipc; } static inline struct superblock_smack *smack_superblock( const struct super_block *superblock) { return superblock->s_security + smack_blob_sizes.lbs_superblock; } static inline struct socket_smack *smack_sock(const struct sock *sock) { return sock->sk_security + smack_blob_sizes.lbs_sock; } #ifdef CONFIG_KEYS static inline struct smack_known **smack_key(const struct key *key) { return key->security + smack_blob_sizes.lbs_key; } #endif /* CONFIG_KEYS */ /* * Is the directory transmuting? */ static inline int smk_inode_transmutable(const struct inode *isp) { struct inode_smack *sip = smack_inode(isp); return (sip->smk_flags & SMK_INODE_TRANSMUTE) != 0; } /* * Present a pointer to the smack label entry in an inode blob. */ static inline struct smack_known *smk_of_inode(const struct inode *isp) { struct inode_smack *sip = smack_inode(isp); return sip->smk_inode; } /* * Present a pointer to the smack label entry in an task blob. */ static inline struct smack_known *smk_of_task(const struct task_smack *tsp) { return tsp->smk_task; } static inline struct smack_known *smk_of_task_struct_obj( const struct task_struct *t) { struct smack_known *skp; const struct cred *cred; rcu_read_lock(); cred = __task_cred(t); skp = smk_of_task(smack_cred(cred)); rcu_read_unlock(); return skp; } /* * Present a pointer to the forked smack label entry in an task blob. */ static inline struct smack_known *smk_of_forked(const struct task_smack *tsp) { return tsp->smk_forked; } /* * Present a pointer to the smack label in the current task blob. */ static inline struct smack_known *smk_of_current(void) { return smk_of_task(smack_cred(current_cred())); } void smack_log(char *subject_label, char *object_label, int request, int result, struct smk_audit_info *auditdata); #ifdef CONFIG_AUDIT /* * logging functions */ #define SMACK_AUDIT_DENIED 0x1 #define SMACK_AUDIT_ACCEPT 0x2 extern int log_policy; /* * some inline functions to set up audit data * they do nothing if CONFIG_AUDIT is not set * */ static inline void smk_ad_init(struct smk_audit_info *a, const char *func, char type) { memset(&a->sad, 0, sizeof(a->sad)); a->a.type = type; a->a.smack_audit_data = &a->sad; a->a.smack_audit_data->function = func; } static inline void smk_ad_init_net(struct smk_audit_info *a, const char *func, char type, struct lsm_network_audit *net) { smk_ad_init(a, func, type); memset(net, 0, sizeof(*net)); a->a.u.net = net; } static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a, struct task_struct *t) { a->a.u.tsk = t; } static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a, struct dentry *d) { a->a.u.dentry = d; } static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a, struct inode *i) { a->a.u.inode = i; } static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a, struct path p) { a->a.u.path = p; } static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a, struct sock *sk) { a->a.u.net->sk = sk; } #else /* no AUDIT */ static inline void smk_ad_init(struct smk_audit_info *a, const char *func, char type) { } static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a, struct task_struct *t) { } static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a, struct dentry *d) { } static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a, struct inode *i) { } static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a, struct path p) { } static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a, struct sock *sk) { } #endif #endif /* _SECURITY_SMACK_H */
1636 99 1631 4676 3672 3676 2405 1655 2388 176 177 905 904 3440 3441 4158 4166 3918 8 4 3903 54 53 45 7 7 874 874 875 873 832 45 4951 4961 1 1 1 1140 1142 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 // SPDX-License-Identifier: GPL-2.0-only #include <linux/mm.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/compiler.h> #include <linux/export.h> #include <linux/err.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/sched/task_stack.h> #include <linux/security.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/sysctl.h> #include <linux/mman.h> #include <linux/hugetlb.h> #include <linux/vmalloc.h> #include <linux/userfaultfd_k.h> #include <linux/elf.h> #include <linux/elf-randomize.h> #include <linux/personality.h> #include <linux/random.h> #include <linux/processor.h> #include <linux/sizes.h> #include <linux/compat.h> #include <linux/fsnotify.h> #include <linux/uaccess.h> #include <kunit/visibility.h> #include "internal.h" #include "swap.h" /** * kfree_const - conditionally free memory * @x: pointer to the memory * * Function calls kfree only if @x is not in .rodata section. */ void kfree_const(const void *x) { if (!is_kernel_rodata((unsigned long)x)) kfree(x); } EXPORT_SYMBOL(kfree_const); /** * __kmemdup_nul - Create a NUL-terminated string from @s, which might be unterminated. * @s: The data to copy * @len: The size of the data, not including the NUL terminator * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Return: newly allocated copy of @s with NUL-termination or %NULL in * case of error */ static __always_inline char *__kmemdup_nul(const char *s, size_t len, gfp_t gfp) { char *buf; /* '+1' for the NUL terminator */ buf = kmalloc_track_caller(len + 1, gfp); if (!buf) return NULL; memcpy(buf, s, len); /* Ensure the buf is always NUL-terminated, regardless of @s. */ buf[len] = '\0'; return buf; } /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Return: newly allocated copy of @s or %NULL in case of error */ noinline char *kstrdup(const char *s, gfp_t gfp) { return s ? __kmemdup_nul(s, strlen(s), gfp) : NULL; } EXPORT_SYMBOL(kstrdup); /** * kstrdup_const - conditionally duplicate an existing const string * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Note: Strings allocated by kstrdup_const should be freed by kfree_const and * must not be passed to krealloc(). * * Return: source string if it is in .rodata section otherwise * fallback to kstrdup. */ const char *kstrdup_const(const char *s, gfp_t gfp) { if (is_kernel_rodata((unsigned long)s)) return s; return kstrdup(s, gfp); } EXPORT_SYMBOL(kstrdup_const); /** * kstrndup - allocate space for and copy an existing string * @s: the string to duplicate * @max: read at most @max chars from @s * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Note: Use kmemdup_nul() instead if the size is known exactly. * * Return: newly allocated copy of @s or %NULL in case of error */ char *kstrndup(const char *s, size_t max, gfp_t gfp) { return s ? __kmemdup_nul(s, strnlen(s, max), gfp) : NULL; } EXPORT_SYMBOL(kstrndup); /** * kmemdup - duplicate region of memory * * @src: memory region to duplicate * @len: memory region length * @gfp: GFP mask to use * * Return: newly allocated copy of @src or %NULL in case of error, * result is physically contiguous. Use kfree() to free. */ void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) { void *p; p = kmalloc_node_track_caller_noprof(len, gfp, NUMA_NO_NODE, _RET_IP_); if (p) memcpy(p, src, len); return p; } EXPORT_SYMBOL(kmemdup_noprof); /** * kmemdup_array - duplicate a given array. * * @src: array to duplicate. * @count: number of elements to duplicate from array. * @element_size: size of each element of array. * @gfp: GFP mask to use. * * Return: duplicated array of @src or %NULL in case of error, * result is physically contiguous. Use kfree() to free. */ void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp) { return kmemdup(src, size_mul(element_size, count), gfp); } EXPORT_SYMBOL(kmemdup_array); /** * kvmemdup - duplicate region of memory * * @src: memory region to duplicate * @len: memory region length * @gfp: GFP mask to use * * Return: newly allocated copy of @src or %NULL in case of error, * result may be not physically contiguous. Use kvfree() to free. */ void *kvmemdup(const void *src, size_t len, gfp_t gfp) { void *p; p = kvmalloc(len, gfp); if (p) memcpy(p, src, len); return p; } EXPORT_SYMBOL(kvmemdup); /** * kmemdup_nul - Create a NUL-terminated string from unterminated data * @s: The data to stringify * @len: The size of the data * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Return: newly allocated copy of @s with NUL-termination or %NULL in * case of error */ char *kmemdup_nul(const char *s, size_t len, gfp_t gfp) { return s ? __kmemdup_nul(s, len, gfp) : NULL; } EXPORT_SYMBOL(kmemdup_nul); static kmem_buckets *user_buckets __ro_after_init; static int __init init_user_buckets(void) { user_buckets = kmem_buckets_create("memdup_user", 0, 0, INT_MAX, NULL); return 0; } subsys_initcall(init_user_buckets); /** * memdup_user - duplicate memory region from user space * * @src: source address in user space * @len: number of bytes to copy * * Return: an ERR_PTR() on failure. Result is physically * contiguous, to be freed by kfree(). */ void *memdup_user(const void __user *src, size_t len) { void *p; p = kmem_buckets_alloc_track_caller(user_buckets, len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_user(p, src, len)) { kfree(p); return ERR_PTR(-EFAULT); } return p; } EXPORT_SYMBOL(memdup_user); /** * vmemdup_user - duplicate memory region from user space * * @src: source address in user space * @len: number of bytes to copy * * Return: an ERR_PTR() on failure. Result may be not * physically contiguous. Use kvfree() to free. */ void *vmemdup_user(const void __user *src, size_t len) { void *p; p = kmem_buckets_valloc(user_buckets, len, GFP_USER); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_user(p, src, len)) { kvfree(p); return ERR_PTR(-EFAULT); } return p; } EXPORT_SYMBOL(vmemdup_user); /** * strndup_user - duplicate an existing string from user space * @s: The string to duplicate * @n: Maximum number of bytes to copy, including the trailing NUL. * * Return: newly allocated copy of @s or an ERR_PTR() in case of error */ char *strndup_user(const char __user *s, long n) { char *p; long length; length = strnlen_user(s, n); if (!length) return ERR_PTR(-EFAULT); if (length > n) return ERR_PTR(-EINVAL); p = memdup_user(s, length); if (IS_ERR(p)) return p; p[length - 1] = '\0'; return p; } EXPORT_SYMBOL(strndup_user); /** * memdup_user_nul - duplicate memory region from user space and NUL-terminate * * @src: source address in user space * @len: number of bytes to copy * * Return: an ERR_PTR() on failure. */ void *memdup_user_nul(const void __user *src, size_t len) { char *p; p = kmem_buckets_alloc_track_caller(user_buckets, len + 1, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_user(p, src, len)) { kfree(p); return ERR_PTR(-EFAULT); } p[len] = '\0'; return p; } EXPORT_SYMBOL(memdup_user_nul); /* Check if the vma is being used as a stack by this task */ int vma_is_stack_for_current(struct vm_area_struct *vma) { struct task_struct * __maybe_unused t = current; return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); } /* * Change backing file, only valid to use during initial VMA setup. */ void vma_set_file(struct vm_area_struct *vma, struct file *file) { /* Changing an anonymous vma with this is illegal */ get_file(file); swap(vma->vm_file, file); fput(file); } EXPORT_SYMBOL(vma_set_file); #ifndef STACK_RND_MASK #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */ #endif unsigned long randomize_stack_top(unsigned long stack_top) { unsigned long random_variable = 0; if (current->flags & PF_RANDOMIZE) { random_variable = get_random_long(); random_variable &= STACK_RND_MASK; random_variable <<= PAGE_SHIFT; } #ifdef CONFIG_STACK_GROWSUP return PAGE_ALIGN(stack_top) + random_variable; #else return PAGE_ALIGN(stack_top) - random_variable; #endif } /** * randomize_page - Generate a random, page aligned address * @start: The smallest acceptable address the caller will take. * @range: The size of the area, starting at @start, within which the * random address must fall. * * If @start + @range would overflow, @range is capped. * * NOTE: Historical use of randomize_range, which this replaces, presumed that * @start was already page aligned. We now align it regardless. * * Return: A page aligned address within [start, start + range). On error, * @start is returned. */ unsigned long randomize_page(unsigned long start, unsigned long range) { if (!PAGE_ALIGNED(start)) { range -= PAGE_ALIGN(start) - start; start = PAGE_ALIGN(start); } if (start > ULONG_MAX - range) range = ULONG_MAX - start; range >>= PAGE_SHIFT; if (range == 0) return start; return start + (get_random_long() % range << PAGE_SHIFT); } #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT unsigned long __weak arch_randomize_brk(struct mm_struct *mm) { /* Is the current task 32bit ? */ if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) return randomize_page(mm->brk, SZ_32M); return randomize_page(mm->brk, SZ_1G); } unsigned long arch_mmap_rnd(void) { unsigned long rnd; #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS if (is_compat_task()) rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); else #endif /* CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS */ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); return rnd << PAGE_SHIFT; } static int mmap_is_legacy(struct rlimit *rlim_stack) { if (current->personality & ADDR_COMPAT_LAYOUT) return 1; /* On parisc the stack always grows up - so a unlimited stack should * not be an indicator to use the legacy memory layout. */ if (rlim_stack->rlim_cur == RLIM_INFINITY && !IS_ENABLED(CONFIG_STACK_GROWSUP)) return 1; return sysctl_legacy_va_layout; } /* * Leave enough space between the mmap area and the stack to honour ulimit in * the face of randomisation. */ #define MIN_GAP (SZ_128M) #define MAX_GAP (STACK_TOP / 6 * 5) static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { #ifdef CONFIG_STACK_GROWSUP /* * For an upwards growing stack the calculation is much simpler. * Memory for the maximum stack size is reserved at the top of the * task. mmap_base starts directly below the stack and grows * downwards. */ return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd); #else unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_guard_gap; /* Account for stack randomization if necessary */ if (current->flags & PF_RANDOMIZE) pad += (STACK_RND_MASK << PAGE_SHIFT); /* Values close to RLIM_INFINITY can overflow. */ if (gap + pad > gap) gap += pad; if (gap < MIN_GAP && MIN_GAP < MAX_GAP) gap = MIN_GAP; else if (gap > MAX_GAP) gap = MAX_GAP; return PAGE_ALIGN(STACK_TOP - gap - rnd); #endif } void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) random_factor = arch_mmap_rnd(); if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; clear_bit(MMF_TOPDOWN, &mm->flags); } else { mm->mmap_base = mmap_base(random_factor, rlim_stack); set_bit(MMF_TOPDOWN, &mm->flags); } } #elif defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { mm->mmap_base = TASK_UNMAPPED_BASE; clear_bit(MMF_TOPDOWN, &mm->flags); } #endif #ifdef CONFIG_MMU EXPORT_SYMBOL_IF_KUNIT(arch_pick_mmap_layout); #endif /** * __account_locked_vm - account locked pages to an mm's locked_vm * @mm: mm to account against * @pages: number of pages to account * @inc: %true if @pages should be considered positive, %false if not * @task: task used to check RLIMIT_MEMLOCK * @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped * * Assumes @task and @mm are valid (i.e. at least one reference on each), and * that mmap_lock is held as writer. * * Return: * * 0 on success * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. */ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, struct task_struct *task, bool bypass_rlim) { unsigned long locked_vm, limit; int ret = 0; mmap_assert_write_locked(mm); locked_vm = mm->locked_vm; if (inc) { if (!bypass_rlim) { limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; if (locked_vm + pages > limit) ret = -ENOMEM; } if (!ret) mm->locked_vm = locked_vm + pages; } else { WARN_ON_ONCE(pages > locked_vm); mm->locked_vm = locked_vm - pages; } pr_debug("%s: [%d] caller %ps %c%lu %lu/%lu%s\n", __func__, task->pid, (void *)_RET_IP_, (inc) ? '+' : '-', pages << PAGE_SHIFT, locked_vm << PAGE_SHIFT, task_rlimit(task, RLIMIT_MEMLOCK), ret ? " - exceeded" : ""); return ret; } EXPORT_SYMBOL_GPL(__account_locked_vm); /** * account_locked_vm - account locked pages to an mm's locked_vm * @mm: mm to account against, may be NULL * @pages: number of pages to account * @inc: %true if @pages should be considered positive, %false if not * * Assumes a non-NULL @mm is valid (i.e. at least one reference on it). * * Return: * * 0 on success, or if mm is NULL * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. */ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc) { int ret; if (pages == 0 || !mm) return 0; mmap_write_lock(mm); ret = __account_locked_vm(mm, pages, inc, current, capable(CAP_IPC_LOCK)); mmap_write_unlock(mm); return ret; } EXPORT_SYMBOL_GPL(account_locked_vm); unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { unsigned long ret; struct mm_struct *mm = current->mm; unsigned long populate; LIST_HEAD(uf); ret = security_mmap_file(file, prot, flag); if (!ret) ret = fsnotify_mmap_perm(file, prot, pgoff >> PAGE_SHIFT, len); if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; ret = do_mmap(file, addr, len, prot, flag, 0, pgoff, &populate, &uf); mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); if (populate) mm_populate(ret, populate); } return ret; } /* * Perform a userland memory mapping into the current process address space. See * the comment for do_mmap() for more details on this operation in general. * * This differs from do_mmap() in that: * * a. An offset parameter is provided rather than pgoff, which is both checked * for overflow and page alignment. * b. mmap locking is performed on the caller's behalf. * c. Userfaultfd unmap events and memory population are handled. * * This means that this function performs essentially the same work as if * userland were invoking mmap (2). * * Returns either an error, or the address at which the requested mapping has * been performed. */ unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { if (unlikely(offset + PAGE_ALIGN(len) < offset)) return -EINVAL; if (unlikely(offset_in_page(offset))) return -EINVAL; return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); } EXPORT_SYMBOL(vm_mmap); /** * __vmalloc_array - allocate memory for a virtually contiguous array. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; return __vmalloc_noprof(bytes, flags); } EXPORT_SYMBOL(__vmalloc_array_noprof); /** * vmalloc_array - allocate memory for a virtually contiguous array. * @n: number of elements. * @size: element size. */ void *vmalloc_array_noprof(size_t n, size_t size) { return __vmalloc_array_noprof(n, size, GFP_KERNEL); } EXPORT_SYMBOL(vmalloc_array_noprof); /** * __vcalloc - allocate and zero memory for a virtually contiguous array. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) { return __vmalloc_array_noprof(n, size, flags | __GFP_ZERO); } EXPORT_SYMBOL(__vcalloc_noprof); /** * vcalloc - allocate and zero memory for a virtually contiguous array. * @n: number of elements. * @size: element size. */ void *vcalloc_noprof(size_t n, size_t size) { return __vmalloc_array_noprof(n, size, GFP_KERNEL | __GFP_ZERO); } EXPORT_SYMBOL(vcalloc_noprof); struct anon_vma *folio_anon_vma(const struct folio *folio) { unsigned long mapping = (unsigned long)folio->mapping; if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) return NULL; return (void *)(mapping - PAGE_MAPPING_ANON); } /** * folio_mapping - Find the mapping where this folio is stored. * @folio: The folio. * * For folios which are in the page cache, return the mapping that this * page belongs to. Folios in the swap cache return the swap mapping * this page is stored in (which is different from the mapping for the * swap file or swap device where the data is stored). * * You can call this for folios which aren't in the swap cache or page * cache and it will return NULL. */ struct address_space *folio_mapping(struct folio *folio) { struct address_space *mapping; /* This happens if someone calls flush_dcache_page on slab page */ if (unlikely(folio_test_slab(folio))) return NULL; if (unlikely(folio_test_swapcache(folio))) return swap_address_space(folio->swap); mapping = folio->mapping; if ((unsigned long)mapping & PAGE_MAPPING_FLAGS) return NULL; return mapping; } EXPORT_SYMBOL(folio_mapping); /** * folio_copy - Copy the contents of one folio to another. * @dst: Folio to copy to. * @src: Folio to copy from. * * The bytes in the folio represented by @src are copied to @dst. * Assumes the caller has validated that @dst is at least as large as @src. * Can be called in atomic context for order-0 folios, but if the folio is * larger, it may sleep. */ void folio_copy(struct folio *dst, struct folio *src) { long i = 0; long nr = folio_nr_pages(src); for (;;) { copy_highpage(folio_page(dst, i), folio_page(src, i)); if (++i == nr) break; cond_resched(); } } EXPORT_SYMBOL(folio_copy); int folio_mc_copy(struct folio *dst, struct folio *src) { long nr = folio_nr_pages(src); long i = 0; for (;;) { if (copy_mc_highpage(folio_page(dst, i), folio_page(src, i))) return -EHWPOISON; if (++i == nr) break; cond_resched(); } return 0; } EXPORT_SYMBOL(folio_mc_copy); int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; static int sysctl_overcommit_ratio __read_mostly = 50; static unsigned long sysctl_overcommit_kbytes __read_mostly; int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ #ifdef CONFIG_SYSCTL static int overcommit_ratio_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret == 0 && write) sysctl_overcommit_kbytes = 0; return ret; } static void sync_overcommit_as(struct work_struct *dummy) { percpu_counter_sync(&vm_committed_as); } static int overcommit_policy_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int new_policy = -1; int ret; /* * The deviation of sync_overcommit_as could be big with loose policy * like OVERCOMMIT_ALWAYS/OVERCOMMIT_GUESS. When changing policy to * strict OVERCOMMIT_NEVER, we need to reduce the deviation to comply * with the strict "NEVER", and to avoid possible race condition (even * though user usually won't too frequently do the switching to policy * OVERCOMMIT_NEVER), the switch is done in the following order: * 1. changing the batch * 2. sync percpu count on each CPU * 3. switch the policy */ if (write) { t = *table; t.data = &new_policy; ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); if (ret || new_policy == -1) return ret; mm_compute_batch(new_policy); if (new_policy == OVERCOMMIT_NEVER) schedule_on_each_cpu(sync_overcommit_as); sysctl_overcommit_memory = new_policy; } else { ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); } return ret; } static int overcommit_kbytes_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) sysctl_overcommit_ratio = 0; return ret; } static const struct ctl_table util_sysctl_table[] = { { .procname = "overcommit_memory", .data = &sysctl_overcommit_memory, .maxlen = sizeof(sysctl_overcommit_memory), .mode = 0644, .proc_handler = overcommit_policy_handler, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, { .procname = "overcommit_ratio", .data = &sysctl_overcommit_ratio, .maxlen = sizeof(sysctl_overcommit_ratio), .mode = 0644, .proc_handler = overcommit_ratio_handler, }, { .procname = "overcommit_kbytes", .data = &sysctl_overcommit_kbytes, .maxlen = sizeof(sysctl_overcommit_kbytes), .mode = 0644, .proc_handler = overcommit_kbytes_handler, }, { .procname = "user_reserve_kbytes", .data = &sysctl_user_reserve_kbytes, .maxlen = sizeof(sysctl_user_reserve_kbytes), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "admin_reserve_kbytes", .data = &sysctl_admin_reserve_kbytes, .maxlen = sizeof(sysctl_admin_reserve_kbytes), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, }; static int __init init_vm_util_sysctls(void) { register_sysctl_init("vm", util_sysctl_table); return 0; } subsys_initcall(init_vm_util_sysctls); #endif /* CONFIG_SYSCTL */ /* * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used */ unsigned long vm_commit_limit(void) { unsigned long allowed; if (sysctl_overcommit_kbytes) allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10); else allowed = ((totalram_pages() - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100); allowed += total_swap_pages; return allowed; } /* * Make sure vm_committed_as in one cacheline and not cacheline shared with * other variables. It can be updated by several CPUs frequently. */ struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp; /* * The global memory commitment made in the system can be a metric * that can be used to drive ballooning decisions when Linux is hosted * as a guest. On Hyper-V, the host implements a policy engine for dynamically * balancing memory across competing virtual machines that are hosted. * Several metrics drive this policy engine including the guest reported * memory commitment. * * The time cost of this is very low for small platforms, and for big * platform like a 2S/36C/72T Skylake server, in worst case where * vm_committed_as's spinlock is under severe contention, the time cost * could be about 30~40 microseconds. */ unsigned long vm_memory_committed(void) { return percpu_counter_sum_positive(&vm_committed_as); } EXPORT_SYMBOL_GPL(vm_memory_committed); /* * Check that a process has enough memory to allocate a new virtual * mapping. 0 means there is enough memory for the allocation to * succeed and -ENOMEM implies there is not. * * We currently support three overcommit policies, which are set via the * vm.overcommit_memory sysctl. See Documentation/mm/overcommit-accounting.rst * * Strict overcommit modes added 2002 Feb 26 by Alan Cox. * Additional code 2002 Jul 20 by Robert Love. * * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. * * Note this is a helper function intended to be used by LSMs which * wish to use this logic. */ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { long allowed; unsigned long bytes_failed; vm_acct_memory(pages); /* * Sometimes we want to use more memory than we have */ if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) return 0; if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { if (pages > totalram_pages() + total_swap_pages) goto error; return 0; } allowed = vm_commit_limit(); /* * Reserve some for root */ if (!cap_sys_admin) allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); /* * Don't let a single process grow so big a user can't recover */ if (mm) { long reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); allowed -= min_t(long, mm->total_vm / 32, reserve); } if (percpu_counter_read_positive(&vm_committed_as) < allowed) return 0; error: bytes_failed = pages << PAGE_SHIFT; pr_warn_ratelimited("%s: pid: %d, comm: %s, bytes: %lu not enough memory for the allocation\n", __func__, current->pid, current->comm, bytes_failed); vm_unacct_memory(pages); return -ENOMEM; } /** * get_cmdline() - copy the cmdline value to a buffer. * @task: the task whose cmdline value to copy. * @buffer: the buffer to copy to. * @buflen: the length of the buffer. Larger cmdline values are truncated * to this length. * * Return: the size of the cmdline field copied. Note that the copy does * not guarantee an ending NULL byte. */ int get_cmdline(struct task_struct *task, char *buffer, int buflen) { int res = 0; unsigned int len; struct mm_struct *mm = get_task_mm(task); unsigned long arg_start, arg_end, env_start, env_end; if (!mm) goto out; if (!mm->arg_end) goto out_mm; /* Shh! No looking before we're done */ spin_lock(&mm->arg_lock); arg_start = mm->arg_start; arg_end = mm->arg_end; env_start = mm->env_start; env_end = mm->env_end; spin_unlock(&mm->arg_lock); len = arg_end - arg_start; if (len > buflen) len = buflen; res = access_process_vm(task, arg_start, buffer, len, FOLL_FORCE); /* * If the nul at the end of args has been overwritten, then * assume application is using setproctitle(3). */ if (res > 0 && buffer[res-1] != '\0' && len < buflen) { len = strnlen(buffer, res); if (len < res) { res = len; } else { len = env_end - env_start; if (len > buflen - res) len = buflen - res; res += access_process_vm(task, env_start, buffer+res, len, FOLL_FORCE); res = strnlen(buffer, res); } } out_mm: mmput(mm); out: return res; } int __weak memcmp_pages(struct page *page1, struct page *page2) { char *addr1, *addr2; int ret; addr1 = kmap_local_page(page1); addr2 = kmap_local_page(page2); ret = memcmp(addr1, addr2, PAGE_SIZE); kunmap_local(addr2); kunmap_local(addr1); return ret; } #ifdef CONFIG_PRINTK /** * mem_dump_obj - Print available provenance information * @object: object for which to find provenance information. * * This function uses pr_cont(), so that the caller is expected to have * printed out whatever preamble is appropriate. The provenance information * depends on the type of object and on how much debugging is enabled. * For example, for a slab-cache object, the slab name is printed, and, * if available, the return address and stack trace from the allocation * and last free path of that object. */ void mem_dump_obj(void *object) { const char *type; if (kmem_dump_obj(object)) return; if (vmalloc_dump_obj(object)) return; if (is_vmalloc_addr(object)) type = "vmalloc memory"; else if (virt_addr_valid(object)) type = "non-slab/vmalloc memory"; else if (object == NULL) type = "NULL pointer"; else if (object == ZERO_SIZE_PTR) type = "zero-size pointer"; else type = "non-paged memory"; pr_cont(" %s\n", type); } EXPORT_SYMBOL_GPL(mem_dump_obj); #endif /* * A driver might set a page logically offline -- PageOffline() -- and * turn the page inaccessible in the hypervisor; after that, access to page * content can be fatal. * * Some special PFN walkers -- i.e., /proc/kcore -- read content of random * pages after checking PageOffline(); however, these PFN walkers can race * with drivers that set PageOffline(). * * page_offline_freeze()/page_offline_thaw() allows for a subsystem to * synchronize with such drivers, achieving that a page cannot be set * PageOffline() while frozen. * * page_offline_begin()/page_offline_end() is used by drivers that care about * such races when setting a page PageOffline(). */ static DECLARE_RWSEM(page_offline_rwsem); void page_offline_freeze(void) { down_read(&page_offline_rwsem); } void page_offline_thaw(void) { up_read(&page_offline_rwsem); } void page_offline_begin(void) { down_write(&page_offline_rwsem); } EXPORT_SYMBOL(page_offline_begin); void page_offline_end(void) { up_write(&page_offline_rwsem); } EXPORT_SYMBOL(page_offline_end); #ifndef flush_dcache_folio void flush_dcache_folio(struct folio *folio) { long i, nr = folio_nr_pages(folio); for (i = 0; i < nr; i++) flush_dcache_page(folio_page(folio, i)); } EXPORT_SYMBOL(flush_dcache_folio); #endif /** * compat_vma_mmap_prepare() - Apply the file's .mmap_prepare() hook to an * existing VMA * @file: The file which possesss an f_op->mmap_prepare() hook * @vma: The VMA to apply the .mmap_prepare() hook to. * * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain * 'wrapper' file systems invoke a nested mmap hook of an underlying file. * * Until all filesystems are converted to use .mmap_prepare(), we must be * conservative and continue to invoke these 'wrapper' filesystems using the * deprecated .mmap() hook. * * However we have a problem if the underlying file system possesses an * .mmap_prepare() hook, as we are in a different context when we invoke the * .mmap() hook, already having a VMA to deal with. * * compat_vma_mmap_prepare() is a compatibility function that takes VMA state, * establishes a struct vm_area_desc descriptor, passes to the underlying * .mmap_prepare() hook and applies any changes performed by it. * * Once the conversion of filesystems is complete this function will no longer * be required and will be removed. * * Returns: 0 on success or error. */ int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma) { struct vm_area_desc desc; int err; err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); if (err) return err; set_vma_from_desc(vma, &desc); return 0; } EXPORT_SYMBOL(compat_vma_mmap_prepare);
723 725 725 726 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 // SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/symlink.c - kernfs symlink implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> */ #include <linux/fs.h> #include <linux/gfp.h> #include <linux/namei.h> #include "kernfs-internal.h" /** * kernfs_create_link - create a symlink * @parent: directory to create the symlink in * @name: name of the symlink * @target: target node for the symlink to point to * * Return: the created node on success, ERR_PTR() value on error. * Ownership of the link matches ownership of the target. */ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target) { struct kernfs_node *kn; int error; kuid_t uid = GLOBAL_ROOT_UID; kgid_t gid = GLOBAL_ROOT_GID; if (target->iattr) { uid = target->iattr->ia_uid; gid = target->iattr->ia_gid; } kn = kernfs_new_node(parent, name, S_IFLNK|0777, uid, gid, KERNFS_LINK); if (!kn) return ERR_PTR(-ENOMEM); if (kernfs_ns_enabled(parent)) kn->ns = target->ns; kn->symlink.target_kn = target; kernfs_get(target); /* ref owned by symlink */ error = kernfs_add_one(kn); if (!error) return kn; kernfs_put(kn); return ERR_PTR(error); } static int kernfs_get_target_path(struct kernfs_node *parent, struct kernfs_node *target, char *path) { struct kernfs_node *base, *kn; char *s = path; int len = 0; /* go up to the root, stop at the base */ base = parent; while (kernfs_parent(base)) { kn = kernfs_parent(target); while (kernfs_parent(kn) && base != kn) kn = kernfs_parent(kn); if (base == kn) break; if ((s - path) + 3 >= PATH_MAX) return -ENAMETOOLONG; strcpy(s, "../"); s += 3; base = kernfs_parent(base); } /* determine end of target string for reverse fillup */ kn = target; while (kernfs_parent(kn) && kn != base) { len += strlen(kernfs_rcu_name(kn)) + 1; kn = kernfs_parent(kn); } /* check limits */ if (len < 2) return -EINVAL; len--; if ((s - path) + len >= PATH_MAX) return -ENAMETOOLONG; /* reverse fillup of target string from target to base */ kn = target; while (kernfs_parent(kn) && kn != base) { const char *name = kernfs_rcu_name(kn); int slen = strlen(name); len -= slen; memcpy(s + len, name, slen); if (len) s[--len] = '/'; kn = kernfs_parent(kn); } return 0; } static int kernfs_getlink(struct inode *inode, char *path) { struct kernfs_node *kn = inode->i_private; struct kernfs_node *parent; struct kernfs_node *target = kn->symlink.target_kn; struct kernfs_root *root = kernfs_root(kn); int error; down_read(&root->kernfs_rwsem); parent = kernfs_parent(kn); error = kernfs_get_target_path(parent, target, path); up_read(&root->kernfs_rwsem); return error; } static const char *kernfs_iop_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { char *body; int error; if (!dentry) return ERR_PTR(-ECHILD); body = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!body) return ERR_PTR(-ENOMEM); error = kernfs_getlink(inode, body); if (unlikely(error < 0)) { kfree(body); return ERR_PTR(error); } set_delayed_call(done, kfree_link, body); return body; } const struct inode_operations kernfs_symlink_iops = { .listxattr = kernfs_iop_listxattr, .get_link = kernfs_iop_get_link, .setattr = kernfs_iop_setattr, .getattr = kernfs_iop_getattr, .permission = kernfs_iop_permission, };
2 199 107 199 2 127 2 2 2 2 2 2 2 2 2 2 127 127 127 127 127 127 127 127 127 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 37 127 127 127 191 198 137 198 190 8 8 8 8 8 21 3 23 2 21 4 21 23 20 3 2 22 18 5 23 23 1 5 17 1 1 1 1 1 1 1 1 1 1 196 196 195 196 196 75 196 196 196 194 196 127 70 196 196 1 196 196 69 127 196 9 2 79 9 70 16 3 1 3 15 15 79 196 196 196 1 79 9 69 79 79 2 9 79 20 4 16 16 14 3 2 16 70 70 70 70 69 70 70 70 70 70 69 70 69 70 70 70 70 70 70 1 69 70 70 1 69 70 6 70 70 70 70 70 69 69 69 1 69 69 3 69 3 68 69 69 69 70 70 70 69 9 69 9 9 69 1 10 68 4 19 69 3 69 70 70 70 70 69 70 69 70 9 69 71 71 1 1 1 1 1 1 126 126 127 126 127 127 127 6 127 127 127 6 127 127 127 127 127 126 6 127 127 127 127 127 127 127 127 124 1 129 38 126 131 131 131 131 2 131 131 127 127 127 126 6 7 7 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "async_objs.h" #include "bkey_buf.h" #include "bkey_methods.h" #include "bkey_sort.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" #include "btree_locking.h" #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" #include "checksum.h" #include "debug.h" #include "enumerated_ref.h" #include "error.h" #include "extents.h" #include "io_write.h" #include "journal_reclaim.h" #include "journal_seq_blacklist.h" #include "recovery.h" #include "super-io.h" #include "trace.h" #include <linux/sched/mm.h> static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) { bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn)); prt_printf(out, " seq %llx %llu\n", bn->keys.seq, BTREE_NODE_SEQ(bn)); prt_str(out, "min: "); bch2_bpos_to_text(out, bn->min_key); prt_newline(out); prt_str(out, "max: "); bch2_bpos_to_text(out, bn->max_key); } void bch2_btree_node_io_unlock(struct btree *b) { EBUG_ON(!btree_node_write_in_flight(b)); clear_btree_node_write_in_flight_inner(b); clear_btree_node_write_in_flight(b); smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); } void bch2_btree_node_io_lock(struct btree *b) { wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight, TASK_UNINTERRUPTIBLE); } void __bch2_btree_node_wait_on_read(struct btree *b) { wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, TASK_UNINTERRUPTIBLE); } void __bch2_btree_node_wait_on_write(struct btree *b) { wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, TASK_UNINTERRUPTIBLE); } void bch2_btree_node_wait_on_read(struct btree *b) { wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, TASK_UNINTERRUPTIBLE); } void bch2_btree_node_wait_on_write(struct btree *b) { wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, TASK_UNINTERRUPTIBLE); } static void verify_no_dups(struct btree *b, struct bkey_packed *start, struct bkey_packed *end) { #ifdef CONFIG_BCACHEFS_DEBUG struct bkey_packed *k, *p; if (start == end) return; for (p = start, k = bkey_p_next(start); k != end; p = k, k = bkey_p_next(k)) { struct bkey l = bkey_unpack_key(b, p); struct bkey r = bkey_unpack_key(b, k); BUG_ON(bpos_ge(l.p, bkey_start_pos(&r))); } #endif } static void set_needs_whiteout(struct bset *i, int v) { struct bkey_packed *k; for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) k->needs_whiteout = v; } static void btree_bounce_free(struct bch_fs *c, size_t size, bool used_mempool, void *p) { if (used_mempool) mempool_free(p, &c->btree_bounce_pool); else kvfree(p); } static void *btree_bounce_alloc(struct bch_fs *c, size_t size, bool *used_mempool) { unsigned flags = memalloc_nofs_save(); void *p; BUG_ON(size > c->opts.btree_node_size); *used_mempool = false; p = kvmalloc(size, __GFP_NOWARN|GFP_NOWAIT); if (!p) { *used_mempool = true; p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); } memalloc_nofs_restore(flags); return p; } static void sort_bkey_ptrs(const struct btree *bt, struct bkey_packed **ptrs, unsigned nr) { unsigned n = nr, a = nr / 2, b, c, d; if (!a) return; /* Heap sort: see lib/sort.c: */ while (1) { if (a) a--; else if (--n) swap(ptrs[0], ptrs[n]); else break; for (b = a; c = 2 * b + 1, (d = c + 1) < n;) b = bch2_bkey_cmp_packed(bt, ptrs[c], ptrs[d]) >= 0 ? c : d; if (d == n) b = c; while (b != a && bch2_bkey_cmp_packed(bt, ptrs[a], ptrs[b]) >= 0) b = (b - 1) / 2; c = b; while (b != a) { b = (b - 1) / 2; swap(ptrs[b], ptrs[c]); } } } static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) { struct bkey_packed *new_whiteouts, **ptrs, **ptrs_end, *k; bool used_mempool = false; size_t bytes = b->whiteout_u64s * sizeof(u64); if (!b->whiteout_u64s) return; new_whiteouts = btree_bounce_alloc(c, bytes, &used_mempool); ptrs = ptrs_end = ((void *) new_whiteouts + bytes); for (k = unwritten_whiteouts_start(b); k != unwritten_whiteouts_end(b); k = bkey_p_next(k)) *--ptrs = k; sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs); k = new_whiteouts; while (ptrs != ptrs_end) { bkey_p_copy(k, *ptrs); k = bkey_p_next(k); ptrs++; } verify_no_dups(b, new_whiteouts, (void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); memcpy_u64s(unwritten_whiteouts_start(b), new_whiteouts, b->whiteout_u64s); btree_bounce_free(c, bytes, used_mempool, new_whiteouts); } static bool should_compact_bset(struct btree *b, struct bset_tree *t, bool compacting, enum compact_mode mode) { if (!bset_dead_u64s(b, t)) return false; switch (mode) { case COMPACT_LAZY: return should_compact_bset_lazy(b, t) || (compacting && !bset_written(b, bset(b, t))); case COMPACT_ALL: return true; default: BUG(); } } static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) { bool ret = false; for_each_bset(b, t) { struct bset *i = bset(b, t); struct bkey_packed *k, *n, *out, *start, *end; struct btree_node_entry *src = NULL, *dst = NULL; if (t != b->set && !bset_written(b, i)) { src = container_of(i, struct btree_node_entry, keys); dst = max(write_block(b), (void *) btree_bkey_last(b, t - 1)); } if (src != dst) ret = true; if (!should_compact_bset(b, t, ret, mode)) { if (src != dst) { memmove(dst, src, sizeof(*src) + le16_to_cpu(src->keys.u64s) * sizeof(u64)); i = &dst->keys; set_btree_bset(b, t, i); } continue; } start = btree_bkey_first(b, t); end = btree_bkey_last(b, t); if (src != dst) { memmove(dst, src, sizeof(*src)); i = &dst->keys; set_btree_bset(b, t, i); } out = i->start; for (k = start; k != end; k = n) { n = bkey_p_next(k); if (!bkey_deleted(k)) { bkey_p_copy(out, k); out = bkey_p_next(out); } else { BUG_ON(k->needs_whiteout); } } i->u64s = cpu_to_le16((u64 *) out - i->_data); set_btree_bset_end(b, t); bch2_bset_set_no_aux_tree(b, t); ret = true; } bch2_verify_btree_nr_keys(b); bch2_btree_build_aux_trees(b); return ret; } bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, enum compact_mode mode) { return bch2_drop_whiteouts(b, mode); } static void btree_node_sort(struct bch_fs *c, struct btree *b, unsigned start_idx, unsigned end_idx) { struct btree_node *out; struct sort_iter_stack sort_iter; struct bset_tree *t; struct bset *start_bset = bset(b, &b->set[start_idx]); bool used_mempool = false; u64 start_time, seq = 0; unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1; bool sorting_entire_node = start_idx == 0 && end_idx == b->nsets; sort_iter_stack_init(&sort_iter, b); for (t = b->set + start_idx; t < b->set + end_idx; t++) { u64s += le16_to_cpu(bset(b, t)->u64s); sort_iter_add(&sort_iter.iter, btree_bkey_first(b, t), btree_bkey_last(b, t)); } bytes = sorting_entire_node ? btree_buf_bytes(b) : __vstruct_bytes(struct btree_node, u64s); out = btree_bounce_alloc(c, bytes, &used_mempool); start_time = local_clock(); u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter); out->keys.u64s = cpu_to_le16(u64s); BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes); if (sorting_entire_node) bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort], start_time); /* Make sure we preserve bset journal_seq: */ for (t = b->set + start_idx; t < b->set + end_idx; t++) seq = max(seq, le64_to_cpu(bset(b, t)->journal_seq)); start_bset->journal_seq = cpu_to_le64(seq); if (sorting_entire_node) { u64s = le16_to_cpu(out->keys.u64s); BUG_ON(bytes != btree_buf_bytes(b)); /* * Our temporary buffer is the same size as the btree node's * buffer, we can just swap buffers instead of doing a big * memcpy() */ *out = *b->data; out->keys.u64s = cpu_to_le16(u64s); swap(out, b->data); set_btree_bset(b, b->set, &b->data->keys); } else { start_bset->u64s = out->keys.u64s; memcpy_u64s(start_bset->start, out->keys.start, le16_to_cpu(out->keys.u64s)); } for (i = start_idx + 1; i < end_idx; i++) b->nr.bset_u64s[start_idx] += b->nr.bset_u64s[i]; b->nsets -= shift; for (i = start_idx + 1; i < b->nsets; i++) { b->nr.bset_u64s[i] = b->nr.bset_u64s[i + shift]; b->set[i] = b->set[i + shift]; } for (i = b->nsets; i < MAX_BSETS; i++) b->nr.bset_u64s[i] = 0; set_btree_bset_end(b, &b->set[start_idx]); bch2_bset_set_no_aux_tree(b, &b->set[start_idx]); btree_bounce_free(c, bytes, used_mempool, out); bch2_verify_btree_nr_keys(b); } void bch2_btree_sort_into(struct bch_fs *c, struct btree *dst, struct btree *src) { struct btree_nr_keys nr; struct btree_node_iter src_iter; u64 start_time = local_clock(); BUG_ON(dst->nsets != 1); bch2_bset_set_no_aux_tree(dst, dst->set); bch2_btree_node_iter_init_from_start(&src_iter, src); nr = bch2_sort_repack(btree_bset_first(dst), src, &src_iter, &dst->format, true); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort], start_time); set_btree_bset_end(dst, dst->set); dst->nr.live_u64s += nr.live_u64s; dst->nr.bset_u64s[0] += nr.bset_u64s[0]; dst->nr.packed_keys += nr.packed_keys; dst->nr.unpacked_keys += nr.unpacked_keys; bch2_verify_btree_nr_keys(dst); } /* * We're about to add another bset to the btree node, so if there's currently * too many bsets - sort some of them together: */ static bool btree_node_compact(struct bch_fs *c, struct btree *b) { unsigned unwritten_idx; bool ret = false; for (unwritten_idx = 0; unwritten_idx < b->nsets; unwritten_idx++) if (!bset_written(b, bset(b, &b->set[unwritten_idx]))) break; if (b->nsets - unwritten_idx > 1) { btree_node_sort(c, b, unwritten_idx, b->nsets); ret = true; } if (unwritten_idx > 1) { btree_node_sort(c, b, 0, unwritten_idx); ret = true; } return ret; } void bch2_btree_build_aux_trees(struct btree *b) { for_each_bset(b, t) bch2_bset_build_aux_tree(b, t, !bset_written(b, bset(b, t)) && t == bset_tree_last(b)); } /* * If we have MAX_BSETS (3) bsets, should we sort them all down to just one? * * The first bset is going to be of similar order to the size of the node, the * last bset is bounded by btree_write_set_buffer(), which is set to keep the * memmove on insert from being too expensive: the middle bset should, ideally, * be the geometric mean of the first and the last. * * Returns true if the middle bset is greater than that geometric mean: */ static inline bool should_compact_all(struct bch_fs *c, struct btree *b) { unsigned mid_u64s_bits = (ilog2(btree_max_u64s(c)) + BTREE_WRITE_SET_U64s_BITS) / 2; return bset_u64s(&b->set[1]) > 1U << mid_u64s_bits; } /* * @bch_btree_init_next - initialize a new (unwritten) bset that can then be * inserted into * * Safe to call if there already is an unwritten bset - will only add a new bset * if @b doesn't already have one. * * Returns true if we sorted (i.e. invalidated iterators */ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) { struct bch_fs *c = trans->c; struct btree_node_entry *bne; bool reinit_iter = false; EBUG_ON(!six_lock_counts(&b->c.lock).n[SIX_LOCK_write]); BUG_ON(bset_written(b, bset(b, &b->set[1]))); BUG_ON(btree_node_just_written(b)); if (b->nsets == MAX_BSETS && !btree_node_write_in_flight(b) && should_compact_all(c, b)) { bch2_btree_node_write_trans(trans, b, SIX_LOCK_write, BTREE_WRITE_init_next_bset); reinit_iter = true; } if (b->nsets == MAX_BSETS && btree_node_compact(c, b)) reinit_iter = true; BUG_ON(b->nsets >= MAX_BSETS); bne = want_new_bset(c, b); if (bne) bch2_bset_init_next(b, bne); bch2_btree_build_aux_trees(b); if (reinit_iter) bch2_trans_node_reinit_iter(trans, b); } static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca, bool print_pos, struct btree *b, struct bset *i, struct bkey_packed *k, unsigned offset, int rw) { if (print_pos) { prt_str(out, rw == READ ? "error validating btree node " : "corrupt btree node before write "); prt_printf(out, "at btree "); bch2_btree_pos_to_text(out, c, b); prt_newline(out); } if (ca) prt_printf(out, "%s ", ca->name); prt_printf(out, "node offset %u/%u", b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key))); if (i) prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); if (k) prt_printf(out, " bset byte offset %lu", (unsigned long)(void *)k - ((unsigned long)(void *)i & ~511UL)); prt_str(out, ": "); } __printf(11, 12) static int __btree_err(int ret, struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, struct bkey_packed *k, int rw, enum bch_sb_error_id err_type, struct bch_io_failures *failed, struct printbuf *err_msg, const char *fmt, ...) { if (c->recovery.curr_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes) return ret == -BCH_ERR_btree_node_read_err_fixable ? bch_err_throw(c, fsck_fix) : ret; bool have_retry = false; int ret2; if (ca) { bch2_mark_btree_validate_failure(failed, ca->dev_idx); struct extent_ptr_decoded pick; have_retry = !bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), failed, &pick, -1); } if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) ret = bch_err_throw(c, btree_node_read_err_fixable); if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) ret = bch_err_throw(c, btree_node_read_err_bad_node); bch2_sb_error_count(c, err_type); bool print_deferred = err_msg && rw == READ && !(test_bit(BCH_FS_in_fsck, &c->flags) && c->opts.fix_errors == FSCK_FIX_ask); struct printbuf out = PRINTBUF; bch2_log_msg_start(c, &out); if (!print_deferred) err_msg = &out; btree_err_msg(err_msg, c, ca, !print_deferred, b, i, k, b->written, rw); va_list args; va_start(args, fmt); prt_vprintf(err_msg, fmt, args); va_end(args); if (print_deferred) { prt_newline(err_msg); switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: ret2 = bch2_fsck_err_opt(c, FSCK_CAN_FIX, err_type); if (!bch2_err_matches(ret2, BCH_ERR_fsck_fix) && !bch2_err_matches(ret2, BCH_ERR_fsck_ignore)) { ret = ret2; goto fsck_err; } if (!have_retry) ret = bch_err_throw(c, fsck_fix); goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); ret = __bch2_topology_error(c, &out); break; } goto out; } if (rw == WRITE) { prt_str(&out, ", "); ret = __bch2_inconsistent_error(c, &out) ? -BCH_ERR_fsck_errors_not_fixed : 0; goto print; } switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: ret2 = __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf); if (!bch2_err_matches(ret2, BCH_ERR_fsck_fix) && !bch2_err_matches(ret2, BCH_ERR_fsck_ignore)) { ret = ret2; goto fsck_err; } if (!have_retry) ret = bch_err_throw(c, fsck_fix); goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); ret = __bch2_topology_error(c, &out); break; } print: bch2_print_str(c, KERN_ERR, out.buf); out: fsck_err: printbuf_exit(&out); return ret; } #define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \ ({ \ int _ret = __btree_err(type, c, ca, b, i, k, write, \ BCH_FSCK_ERR_##_err_type, \ failed, err_msg, \ msg, ##__VA_ARGS__); \ \ if (!bch2_err_matches(_ret, BCH_ERR_fsck_fix)) { \ ret = _ret; \ goto fsck_err; \ } \ \ true; \ }) #define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false) /* * When btree topology repair changes the start or end of a node, that might * mean we have to drop keys that are no longer inside the node: */ __cold void bch2_btree_node_drop_keys_outside_node(struct btree *b) { for_each_bset(b, t) { struct bset *i = bset(b, t); struct bkey_packed *k; for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) if (bkey_cmp_left_packed(b, k, &b->data->min_key) >= 0) break; if (k != i->start) { unsigned shift = (u64 *) k - (u64 *) i->start; memmove_u64s_down(i->start, k, (u64 *) vstruct_end(i) - (u64 *) k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift); set_btree_bset_end(b, t); } for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) if (bkey_cmp_left_packed(b, k, &b->data->max_key) > 0) break; if (k != vstruct_last(i)) { i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start); set_btree_bset_end(b, t); } } /* * Always rebuild search trees: eytzinger search tree nodes directly * depend on the values of min/max key: */ bch2_bset_set_no_aux_tree(b, b->set); bch2_btree_build_aux_trees(b); b->nr = bch2_btree_node_count_keys(b); struct bkey_s_c k; struct bkey unpacked; struct btree_node_iter iter; for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { BUG_ON(bpos_lt(k.k->p, b->data->min_key)); BUG_ON(bpos_gt(k.k->p, b->data->max_key)); } } static int validate_bset(struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, unsigned offset, int write, struct bch_io_failures *failed, struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; int ret = 0; btree_err_on(!bch2_version_compatible(version), -BCH_ERR_btree_node_read_err_incompatible, c, ca, b, i, NULL, btree_node_unsupported_version, "unsupported bset version %u.%u", BCH_VERSION_MAJOR(version), BCH_VERSION_MINOR(version)); if (c->recovery.curr_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes && btree_err_on(version < c->sb.version_min, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, NULL, btree_node_bset_older_than_sb_min, "bset version %u older than superblock version_min %u", version, c->sb.version_min)) { if (bch2_version_compatible(version)) { mutex_lock(&c->sb_lock); c->disk_sb.sb->version_min = cpu_to_le16(version); bch2_write_super(c); mutex_unlock(&c->sb_lock); } else { /* We have no idea what's going on: */ i->version = cpu_to_le16(c->sb.version); } } if (btree_err_on(BCH_VERSION_MAJOR(version) > BCH_VERSION_MAJOR(c->sb.version), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, NULL, btree_node_bset_newer_than_sb, "bset version %u newer than superblock version %u", version, c->sb.version)) { mutex_lock(&c->sb_lock); c->disk_sb.sb->version = cpu_to_le16(version); bch2_write_super(c); mutex_unlock(&c->sb_lock); } btree_err_on(BSET_SEPARATE_WHITEOUTS(i), -BCH_ERR_btree_node_read_err_incompatible, c, ca, b, i, NULL, btree_node_unsupported_version, "BSET_SEPARATE_WHITEOUTS no longer supported"); btree_err_on(offset && !i->u64s, -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, bset_empty, "empty bset"); btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, NULL, bset_wrong_sector_offset, "bset at wrong sector offset"); if (!offset) { struct btree_node *bn = container_of(i, struct btree_node, keys); /* These indicate that we read the wrong btree node: */ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { struct bch_btree_ptr_v2 *bp = &bkey_i_to_btree_ptr_v2(&b->key)->v; /* XXX endianness */ btree_err_on(bp->seq != bn->keys.seq, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, bset_bad_seq, "incorrect sequence number (wrong btree node)"); } btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, NULL, btree_node_bad_btree, "incorrect btree id"); btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, NULL, btree_node_bad_level, "incorrect level"); if (!write) compat_btree_node(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, bn); if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { struct bch_btree_ptr_v2 *bp = &bkey_i_to_btree_ptr_v2(&b->key)->v; if (BTREE_PTR_RANGE_UPDATED(bp)) { b->data->min_key = bp->min_key; b->data->max_key = b->key.k.p; } btree_err_on(!bpos_eq(b->data->min_key, bp->min_key), -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, btree_node_bad_min_key, "incorrect min_key: got %s should be %s", (printbuf_reset(&buf1), bch2_bpos_to_text(&buf1, bn->min_key), buf1.buf), (printbuf_reset(&buf2), bch2_bpos_to_text(&buf2, bp->min_key), buf2.buf)); } btree_err_on(!bpos_eq(bn->max_key, b->key.k.p), -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, i, NULL, btree_node_bad_max_key, "incorrect max key %s", (printbuf_reset(&buf1), bch2_bpos_to_text(&buf1, bn->max_key), buf1.buf)); if (write) compat_btree_node(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, bn); btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1), -BCH_ERR_btree_node_read_err_bad_node, c, ca, b, i, NULL, btree_node_bad_format, "invalid bkey format: %s\n%s", buf1.buf, (printbuf_reset(&buf2), bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf)); printbuf_reset(&buf1); compat_bformat(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, &bn->format); } fsck_err: printbuf_exit(&buf2); printbuf_exit(&buf1); return ret; } static int btree_node_bkey_val_validate(struct bch_fs *c, struct btree *b, struct bkey_s_c k, enum bch_validate_flags flags) { return bch2_bkey_val_validate(c, k, (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, .level = b->c.level, .btree = b->c.btree_id, .flags = flags }); } static int bset_key_validate(struct bch_fs *c, struct btree *b, struct bkey_s_c k, bool updated_range, enum bch_validate_flags flags) { struct bkey_validate_context from = (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, .level = b->c.level, .btree = b->c.btree_id, .flags = flags, }; return __bch2_bkey_validate(c, k, from) ?: (!updated_range ? bch2_bkey_in_btree_node(c, b, k, from) : 0) ?: (flags & BCH_VALIDATE_write ? btree_node_bkey_val_validate(c, b, k, flags) : 0); } static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, struct bset *i, struct bkey_packed *k) { if (bkey_p_next(k) > vstruct_last(i)) return false; if (k->format > KEY_FORMAT_CURRENT) return false; if (!bkeyp_u64s_valid(&b->format, k)) return false; struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); return !__bch2_bkey_validate(c, u.s_c, (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, .level = b->c.level, .btree = b->c.btree_id, .flags = BCH_VALIDATE_silent }); } static inline int btree_node_read_bkey_cmp(const struct btree *b, const struct bkey_packed *l, const struct bkey_packed *r) { return bch2_bkey_cmp_packed(b, l, r) ?: (int) bkey_deleted(r) - (int) bkey_deleted(l); } static int validate_bset_keys(struct bch_fs *c, struct btree *b, struct bset *i, int write, struct bch_io_failures *failed, struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; struct printbuf buf = PRINTBUF; bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); int ret = 0; for (k = i->start; k != vstruct_last(i);) { struct bkey_s u; struct bkey tmp; unsigned next_good_key; if (btree_err_on(bkey_p_next(k) > vstruct_last(i), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, k, btree_node_bkey_past_bset_end, "key extends past end of bset")) { i->u64s = cpu_to_le16((u64 *) k - i->_data); break; } if (btree_err_on(k->format > KEY_FORMAT_CURRENT, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, k, btree_node_bkey_bad_format, "invalid bkey format %u", k->format)) goto drop_this_key; if (btree_err_on(!bkeyp_u64s_valid(&b->format, k), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, k, btree_node_bkey_bad_u64s, "bad k->u64s %u (min %u max %zu)", k->u64s, bkeyp_key_u64s(&b->format, k), U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k))) goto drop_this_key; if (!write) bch2_bkey_compat(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, &b->format, k); u = __bkey_disassemble(b, k, &tmp); ret = bset_key_validate(c, b, u.s_c, updated_range, write); if (ret == -BCH_ERR_fsck_delete_bkey) goto drop_this_key; if (ret) goto fsck_err; if (write) bch2_bkey_compat(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, &b->format, k); if (prev && btree_node_read_bkey_cmp(b, prev, k) >= 0) { struct bkey up = bkey_unpack_key(b, prev); printbuf_reset(&buf); prt_printf(&buf, "keys out of order: "); bch2_bkey_to_text(&buf, &up); prt_printf(&buf, " > "); bch2_bkey_to_text(&buf, u.k); if (btree_err(-BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, k, btree_node_bkey_out_of_order, "%s", buf.buf)) goto drop_this_key; } prev = k; k = bkey_p_next(k); continue; drop_this_key: next_good_key = k->u64s; if (!next_good_key || (BSET_BIG_ENDIAN(i) == CPU_BIG_ENDIAN && version >= bcachefs_metadata_version_snapshot)) { /* * only do scanning if bch2_bkey_compat() has nothing to * do */ if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) { for (next_good_key = 1; next_good_key < (u64 *) vstruct_last(i) - (u64 *) k; next_good_key++) if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) goto got_good_key; } /* * didn't find a good key, have to truncate the rest of * the bset */ next_good_key = (u64 *) vstruct_last(i) - (u64 *) k; } got_good_key: le16_add_cpu(&i->u64s, -next_good_key); memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k); set_btree_node_need_rewrite(b); set_btree_node_need_rewrite_error(b); } fsck_err: printbuf_exit(&buf); return ret; } int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bch_io_failures *failed, struct printbuf *err_msg) { struct btree_node_entry *bne; struct sort_iter *iter; struct btree_node *sorted; struct bkey_packed *k; struct bset *i; bool used_mempool, blacklisted; bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); u64 max_journal_seq = 0; struct printbuf buf = PRINTBUF; int ret = 0, write = READ; u64 start_time = local_clock(); b->version_ondisk = U16_MAX; /* We might get called multiple times on read retry: */ b->written = 0; iter = mempool_alloc(&c->fill_iter, GFP_NOFS); sort_iter_init(iter, b, (btree_blocks(c) + 1) * 2); if (bch2_meta_read_fault("btree")) btree_err(-BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, btree_node_fault_injected, "dynamic fault"); btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c), -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, btree_node_bad_magic, "bad magic: want %llx, got %llx", bset_magic(c), le64_to_cpu(b->data->magic)); if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { struct bch_btree_ptr_v2 *bp = &bkey_i_to_btree_ptr_v2(&b->key)->v; bch2_bpos_to_text(&buf, b->data->min_key); prt_str(&buf, "-"); bch2_bpos_to_text(&buf, b->data->max_key); btree_err_on(b->data->keys.seq != bp->seq, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, btree_node_bad_seq, "got wrong btree node: got\n%s", (printbuf_reset(&buf), bch2_btree_node_header_to_text(&buf, b->data), buf.buf)); } else { btree_err_on(!b->data->keys.seq, -BCH_ERR_btree_node_read_err_must_retry, c, ca, b, NULL, NULL, btree_node_bad_seq, "bad btree header: seq 0\n%s", (printbuf_reset(&buf), bch2_btree_node_header_to_text(&buf, b->data), buf.buf)); } while (b->written < (ptr_written ?: btree_sectors(c))) { unsigned sectors; bool first = !b->written; if (first) { bne = NULL; i = &b->data->keys; } else { bne = write_block(b); i = &bne->keys; if (i->seq != b->data->keys.seq) break; } struct nonce nonce = btree_nonce(i, b->written << 9); bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)); btree_err_on(!good_csum_type, bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) ? -BCH_ERR_btree_node_read_err_must_retry : -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, NULL, bset_unknown_csum, "unknown checksum type %llu", BSET_CSUM_TYPE(i)); if (first) { sectors = vstruct_sectors(b->data, c->block_bits); if (btree_err_on(b->written + sectors > (ptr_written ?: btree_sectors(c)), -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, bset_past_end_of_btree_node, "bset past end of btree node (offset %u len %u but written %zu)", b->written, sectors, ptr_written ?: btree_sectors(c))) i->u64s = 0; if (good_csum_type) { struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); bool csum_bad = bch2_crc_cmp(b->data->csum, csum); if (csum_bad) bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); btree_err_on(csum_bad, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, NULL, bset_bad_csum, "%s", (printbuf_reset(&buf), bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum), buf.buf)); ret = bset_encrypt(c, i, b->written << 9); if (bch2_fs_fatal_err_on(ret, c, "decrypting btree node: %s", bch2_err_str(ret))) goto fsck_err; } btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), -BCH_ERR_btree_node_read_err_incompatible, c, NULL, b, NULL, NULL, btree_node_unsupported_version, "btree node does not have NEW_EXTENT_OVERWRITE set"); } else { sectors = vstruct_sectors(bne, c->block_bits); if (btree_err_on(b->written + sectors > (ptr_written ?: btree_sectors(c)), -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, bset_past_end_of_btree_node, "bset past end of btree node (offset %u len %u but written %zu)", b->written, sectors, ptr_written ?: btree_sectors(c))) i->u64s = 0; if (good_csum_type) { struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); bool csum_bad = bch2_crc_cmp(bne->csum, csum); if (ca && csum_bad) bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); btree_err_on(csum_bad, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, i, NULL, bset_bad_csum, "%s", (printbuf_reset(&buf), bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum), buf.buf)); ret = bset_encrypt(c, i, b->written << 9); if (bch2_fs_fatal_err_on(ret, c, "decrypting btree node: %s", bch2_err_str(ret))) goto fsck_err; } } b->version_ondisk = min(b->version_ondisk, le16_to_cpu(i->version)); ret = validate_bset(c, ca, b, i, b->written, READ, failed, err_msg); if (ret) goto fsck_err; if (!b->written) btree_node_set_format(b, b->data->format); ret = validate_bset_keys(c, b, i, READ, failed, err_msg); if (ret) goto fsck_err; SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); blacklisted = bch2_journal_seq_is_blacklisted(c, le64_to_cpu(i->journal_seq), true); btree_err_on(blacklisted && first, -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, bset_blacklisted_journal_seq, "first btree node bset has blacklisted journal seq (%llu)", le64_to_cpu(i->journal_seq)); btree_err_on(blacklisted && ptr_written, -BCH_ERR_btree_node_read_err_fixable, c, ca, b, i, NULL, first_bset_blacklisted_journal_seq, "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u", le64_to_cpu(i->journal_seq), b->written, b->written + sectors, ptr_written); b->written = min(b->written + sectors, btree_sectors(c)); if (blacklisted && !first) continue; sort_iter_add(iter, vstruct_idx(i, 0), vstruct_last(i)); max_journal_seq = max(max_journal_seq, le64_to_cpu(i->journal_seq)); } if (ptr_written) { btree_err_on(b->written < ptr_written, -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL, NULL, btree_node_data_missing, "btree node data missing: expected %u sectors, found %u", ptr_written, b->written); } else { for (bne = write_block(b); bset_byte_offset(b, bne) < btree_buf_bytes(b); bne = (void *) bne + block_bytes(c)) btree_err_on(bne->keys.seq == b->data->keys.seq && !bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), true), -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL, NULL, btree_node_bset_after_end, "found bset signature after last bset"); } sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool); sorted->keys.u64s = 0; b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter); memset((uint8_t *)(sorted + 1) + b->nr.live_u64s * sizeof(u64), 0, btree_buf_bytes(b) - sizeof(struct btree_node) - b->nr.live_u64s * sizeof(u64)); b->data->keys.u64s = sorted->keys.u64s; *sorted = *b->data; swap(sorted, b->data); set_btree_bset(b, b->set, &b->data->keys); b->nsets = 1; b->data->keys.journal_seq = cpu_to_le64(max_journal_seq); BUG_ON(b->nr.live_u64s != le16_to_cpu(b->data->keys.u64s)); btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted); if (updated_range) bch2_btree_node_drop_keys_outside_node(b); i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); ret = btree_node_bkey_val_validate(c, b, u.s_c, READ); if (ret == -BCH_ERR_fsck_delete_bkey || (static_branch_unlikely(&bch2_inject_invalid_keys) && !bversion_cmp(u.k->bversion, MAX_VERSION))) { btree_keys_account_key_drop(&b->nr, 0, k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); set_btree_bset_end(b, b->set); set_btree_node_need_rewrite(b); set_btree_node_need_rewrite_error(b); continue; } if (ret) goto fsck_err; if (u.k->type == KEY_TYPE_btree_ptr_v2) { struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u); bp.v->mem_ptr = 0; } k = bkey_p_next(k); } bch2_bset_build_aux_tree(b, b->set, false); set_needs_whiteout(btree_bset_first(b), true); btree_node_reset_sib_u64s(b); /* * XXX: * * We deadlock if too many btree updates require node rewrites while * we're still in journal replay. * * This is because btree node rewrites generate more updates for the * interior updates (alloc, backpointers), and if those updates touch * new nodes and generate more rewrites - well, you see the problem. * * The biggest cause is that we don't use the btree write buffer (for * the backpointer updates - this needs some real thought on locking in * order to fix. * * The problem with this workaround (not doing the rewrite for degraded * nodes in journal replay) is that those degraded nodes persist, and we * don't want that (this is a real bug when a btree node write completes * with fewer replicas than we wanted and leaves a degraded node due to * device _removal_, i.e. the device went away mid write). * * It's less of a bug here, but still a problem because we don't yet * have a way of tracking degraded data - we another index (all * extents/btree nodes, by replicas entry) in order to fix properly * (re-replicate degraded data at the earliest possible time). */ if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay)) { scoped_guard(rcu) bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { set_btree_node_need_rewrite(b); set_btree_node_need_rewrite_degraded(b); } } } if (!ptr_written) { set_btree_node_need_rewrite(b); set_btree_node_need_rewrite_ptr_written_zero(b); } fsck_err: mempool_free(iter, &c->fill_iter); printbuf_exit(&buf); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time); return ret; } static void btree_node_read_work(struct work_struct *work) { struct btree_read_bio *rb = container_of(work, struct btree_read_bio, work); struct bch_fs *c = rb->c; struct bch_dev *ca = rb->have_ioref ? bch2_dev_have_ref(c, rb->pick.ptr.dev) : NULL; struct btree *b = rb->b; struct bio *bio = &rb->bio; struct bch_io_failures failed = { .nr = 0 }; int ret = 0; struct printbuf buf = PRINTBUF; bch2_log_msg_start(c, &buf); prt_printf(&buf, "btree node read error at btree "); bch2_btree_pos_to_text(&buf, c, b); prt_newline(&buf); goto start; while (1) { ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), &failed, &rb->pick, -1); if (ret) { set_btree_node_read_error(b); break; } ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read); rb->have_ioref = ca != NULL; rb->start_time = local_clock(); bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META); bio->bi_iter.bi_sector = rb->pick.ptr.offset; bio->bi_iter.bi_size = btree_buf_bytes(b); if (rb->have_ioref) { bio_set_dev(bio, ca->disk_sb.bdev); submit_bio_wait(bio); } else { bio->bi_status = BLK_STS_REMOVED; } bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, rb->start_time, !bio->bi_status); start: if (rb->have_ioref) enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read); rb->have_ioref = false; if (bio->bi_status) { bch2_mark_io_failure(&failed, &rb->pick, false); continue; } ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf); if (ret == -BCH_ERR_btree_node_read_err_want_retry || ret == -BCH_ERR_btree_node_read_err_must_retry) continue; if (ret) set_btree_node_read_error(b); break; } bch2_io_failures_to_text(&buf, c, &failed); if (btree_node_read_error(b)) bch2_btree_lost_data(c, &buf, b->c.btree_id); /* * only print retry success if we read from a replica with no errors */ if (btree_node_read_error(b)) prt_printf(&buf, "ret %s", bch2_err_str(ret)); else if (failed.nr) { if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev)) prt_printf(&buf, "retry success"); else prt_printf(&buf, "repair success"); } if ((failed.nr || btree_node_need_rewrite(b)) && !btree_node_read_error(b) && c->recovery.curr_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { prt_printf(&buf, " (rewriting node)"); bch2_btree_node_rewrite_async(c, b); } prt_newline(&buf); if (failed.nr) bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); async_object_list_del(c, btree_read_bio, rb->list_idx); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], rb->start_time); bio_put(&rb->bio); printbuf_exit(&buf); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } static void btree_node_read_endio(struct bio *bio) { struct btree_read_bio *rb = container_of(bio, struct btree_read_bio, bio); struct bch_fs *c = rb->c; struct bch_dev *ca = rb->have_ioref ? bch2_dev_have_ref(c, rb->pick.ptr.dev) : NULL; bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, rb->start_time, !bio->bi_status); queue_work(c->btree_read_complete_wq, &rb->work); } void bch2_btree_read_bio_to_text(struct printbuf *out, struct btree_read_bio *rbio) { bch2_bio_to_text(out, &rbio->bio); } struct btree_node_read_all { struct closure cl; struct bch_fs *c; struct btree *b; unsigned nr; void *buf[BCH_REPLICAS_MAX]; struct bio *bio[BCH_REPLICAS_MAX]; blk_status_t err[BCH_REPLICAS_MAX]; }; static unsigned btree_node_sectors_written(struct bch_fs *c, void *data) { struct btree_node *bn = data; struct btree_node_entry *bne; unsigned offset = 0; if (le64_to_cpu(bn->magic) != bset_magic(c)) return 0; while (offset < btree_sectors(c)) { if (!offset) { offset += vstruct_sectors(bn, c->block_bits); } else { bne = data + (offset << 9); if (bne->keys.seq != bn->keys.seq) break; offset += vstruct_sectors(bne, c->block_bits); } } return offset; } static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void *data) { struct btree_node *bn = data; struct btree_node_entry *bne; if (!offset) return false; while (offset < btree_sectors(c)) { bne = data + (offset << 9); if (bne->keys.seq == bn->keys.seq) return true; offset++; } return false; return offset; } static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) { closure_type(ra, struct btree_node_read_all, cl); struct bch_fs *c = ra->c; struct btree *b = ra->b; struct printbuf buf = PRINTBUF; bool dump_bset_maps = false; int ret = 0, best = -1, write = READ; unsigned i, written = 0, written2 = 0; __le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2 ? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0; bool _saw_error = false, *saw_error = &_saw_error; struct printbuf *err_msg = NULL; struct bch_io_failures *failed = NULL; for (i = 0; i < ra->nr; i++) { struct btree_node *bn = ra->buf[i]; if (ra->err[i]) continue; if (le64_to_cpu(bn->magic) != bset_magic(c) || (seq && seq != bn->keys.seq)) continue; if (best < 0) { best = i; written = btree_node_sectors_written(c, bn); continue; } written2 = btree_node_sectors_written(c, ra->buf[i]); if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, NULL, btree_node_replicas_sectors_written_mismatch, "btree node sectors written mismatch: %u != %u", written, written2) || btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, NULL, btree_node_bset_after_end, "found bset signature after last bset") || btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9), -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, NULL, NULL, btree_node_replicas_data_mismatch, "btree node replicas content mismatch")) dump_bset_maps = true; if (written2 > written) { written = written2; best = i; } } fsck_err: if (dump_bset_maps) { for (i = 0; i < ra->nr; i++) { struct btree_node *bn = ra->buf[i]; struct btree_node_entry *bne = NULL; unsigned offset = 0, sectors; bool gap = false; if (ra->err[i]) continue; printbuf_reset(&buf); while (offset < btree_sectors(c)) { if (!offset) { sectors = vstruct_sectors(bn, c->block_bits); } else { bne = ra->buf[i] + (offset << 9); if (bne->keys.seq != bn->keys.seq) break; sectors = vstruct_sectors(bne, c->block_bits); } prt_printf(&buf, " %u-%u", offset, offset + sectors); if (bne && bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), false)) prt_printf(&buf, "*"); offset += sectors; } while (offset < btree_sectors(c)) { bne = ra->buf[i] + (offset << 9); if (bne->keys.seq == bn->keys.seq) { if (!gap) prt_printf(&buf, " GAP"); gap = true; sectors = vstruct_sectors(bne, c->block_bits); prt_printf(&buf, " %u-%u", offset, offset + sectors); if (bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), false)) prt_printf(&buf, "*"); } offset++; } bch_err(c, "replica %u:%s", i, buf.buf); } } if (best >= 0) { memcpy(b->data, ra->buf[best], btree_buf_bytes(b)); ret = bch2_btree_node_read_done(c, NULL, b, NULL, NULL); } else { ret = -1; } if (ret) { set_btree_node_read_error(b); struct printbuf buf = PRINTBUF; bch2_btree_lost_data(c, &buf, b->c.btree_id); if (buf.pos) bch_err(c, "%s", buf.buf); printbuf_exit(&buf); } else if (*saw_error) bch2_btree_node_rewrite_async(c, b); for (i = 0; i < ra->nr; i++) { mempool_free(ra->buf[i], &c->btree_bounce_pool); bio_put(ra->bio[i]); } closure_debug_destroy(&ra->cl); kfree(ra); printbuf_exit(&buf); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } static void btree_node_read_all_replicas_endio(struct bio *bio) { struct btree_read_bio *rb = container_of(bio, struct btree_read_bio, bio); struct bch_fs *c = rb->c; struct btree_node_read_all *ra = rb->ra; if (rb->have_ioref) { struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); bch2_latency_acct(ca, rb->start_time, READ); enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read_all_replicas); } ra->err[rb->idx] = bio->bi_status; closure_put(&ra->cl); } /* * XXX This allocates multiple times from the same mempools, and can deadlock * under sufficient memory pressure (but is only a debug path) */ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool sync) { struct bkey_s_c k = bkey_i_to_s_c(&b->key); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded pick; struct btree_node_read_all *ra; unsigned i; ra = kzalloc(sizeof(*ra), GFP_NOFS); if (!ra) return bch_err_throw(c, ENOMEM_btree_node_read_all_replicas); closure_init(&ra->cl, NULL); ra->c = c; ra->b = b; ra->nr = bch2_bkey_nr_ptrs(k); for (i = 0; i < ra->nr; i++) { ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); ra->bio[i] = bio_alloc_bioset(NULL, buf_pages(ra->buf[i], btree_buf_bytes(b)), REQ_OP_READ|REQ_SYNC|REQ_META, GFP_NOFS, &c->btree_bio); } i = 0; bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) { struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read_all_replicas); struct btree_read_bio *rb = container_of(ra->bio[i], struct btree_read_bio, bio); rb->c = c; rb->b = b; rb->ra = ra; rb->start_time = local_clock(); rb->have_ioref = ca != NULL; rb->idx = i; rb->pick = pick; rb->bio.bi_iter.bi_sector = pick.ptr.offset; rb->bio.bi_end_io = btree_node_read_all_replicas_endio; bch2_bio_map(&rb->bio, ra->buf[i], btree_buf_bytes(b)); if (rb->have_ioref) { this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], bio_sectors(&rb->bio)); bio_set_dev(&rb->bio, ca->disk_sb.bdev); closure_get(&ra->cl); submit_bio(&rb->bio); } else { ra->err[i] = BLK_STS_REMOVED; } i++; } if (sync) { closure_sync(&ra->cl); btree_node_read_all_replicas_done(&ra->cl.work); } else { continue_at(&ra->cl, btree_node_read_all_replicas_done, c->btree_read_complete_wq); } return 0; } void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, bool sync) { struct bch_fs *c = trans->c; struct extent_ptr_decoded pick; struct btree_read_bio *rb; struct bch_dev *ca; struct bio *bio; int ret; trace_and_count(c, btree_node_read, trans, b); if (static_branch_unlikely(&bch2_verify_all_btree_replicas) && !btree_node_read_all_replicas(c, b, sync)) return; ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick, -1); if (ret <= 0) { bool ratelimit = true; struct printbuf buf = PRINTBUF; bch2_log_msg_start(c, &buf); prt_str(&buf, "btree node read error: no device to read from\n at "); bch2_btree_pos_to_text(&buf, c, b); prt_newline(&buf); bch2_btree_lost_data(c, &buf, b->c.btree_id); if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && bch2_fs_emergency_read_only2(c, &buf)) ratelimit = false; static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); if (!ratelimit || __ratelimit(&rs)) bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); set_btree_node_read_error(b); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); return; } ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read); bio = bio_alloc_bioset(NULL, buf_pages(b->data, btree_buf_bytes(b)), REQ_OP_READ|REQ_SYNC|REQ_META, GFP_NOFS, &c->btree_bio); rb = container_of(bio, struct btree_read_bio, bio); rb->c = c; rb->b = b; rb->ra = NULL; rb->start_time = local_clock(); rb->have_ioref = ca != NULL; rb->pick = pick; INIT_WORK(&rb->work, btree_node_read_work); bio->bi_iter.bi_sector = pick.ptr.offset; bio->bi_end_io = btree_node_read_endio; bch2_bio_map(bio, b->data, btree_buf_bytes(b)); async_object_list_add(c, btree_read_bio, rb, &rb->list_idx); if (rb->have_ioref) { this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], bio_sectors(bio)); bio_set_dev(bio, ca->disk_sb.bdev); if (sync) { submit_bio_wait(bio); bch2_latency_acct(ca, rb->start_time, READ); btree_node_read_work(&rb->work); } else { submit_bio(bio); } } else { bio->bi_status = BLK_STS_REMOVED; if (sync) btree_node_read_work(&rb->work); else queue_work(c->btree_read_complete_wq, &rb->work); } } static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, const struct bkey_i *k, unsigned level) { struct bch_fs *c = trans->c; struct closure cl; struct btree *b; int ret; closure_init_stack(&cl); do { ret = bch2_btree_cache_cannibalize_lock(trans, &cl); closure_sync(&cl); } while (ret); b = bch2_btree_node_mem_alloc(trans, level != 0); bch2_btree_cache_cannibalize_unlock(trans); BUG_ON(IS_ERR(b)); bkey_copy(&b->key, k); BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id)); set_btree_node_read_in_flight(b); /* we can't pass the trans to read_done() for fsck errors, so it must be unlocked */ bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { mutex_lock(&c->btree_cache.lock); bch2_btree_node_hash_remove(&c->btree_cache, b); mutex_unlock(&c->btree_cache.lock); ret = bch_err_throw(c, btree_node_read_error); goto err; } bch2_btree_set_root_for_read(c, b); err: six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); return ret; } int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, const struct bkey_i *k, unsigned level) { return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level)); } struct btree_node_scrub { struct bch_fs *c; struct bch_dev *ca; void *buf; bool used_mempool; unsigned written; enum btree_id btree; unsigned level; struct bkey_buf key; __le64 seq; struct work_struct work; struct bio bio; }; static bool btree_node_scrub_check(struct bch_fs *c, struct btree_node *data, unsigned ptr_written, struct printbuf *err) { unsigned written = 0; if (le64_to_cpu(data->magic) != bset_magic(c)) { prt_printf(err, "bad magic: want %llx, got %llx", bset_magic(c), le64_to_cpu(data->magic)); return false; } while (written < (ptr_written ?: btree_sectors(c))) { struct btree_node_entry *bne; struct bset *i; bool first = !written; if (first) { bne = NULL; i = &data->keys; } else { bne = (void *) data + (written << 9); i = &bne->keys; if (!ptr_written && i->seq != data->keys.seq) break; } struct nonce nonce = btree_nonce(i, written << 9); bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)); if (first) { if (good_csum_type) { struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, data); if (bch2_crc_cmp(data->csum, csum)) { bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), data->csum, csum); return false; } } written += vstruct_sectors(data, c->block_bits); } else { if (good_csum_type) { struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); if (bch2_crc_cmp(bne->csum, csum)) { bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), bne->csum, csum); return false; } } written += vstruct_sectors(bne, c->block_bits); } } return true; } static void btree_node_scrub_work(struct work_struct *work) { struct btree_node_scrub *scrub = container_of(work, struct btree_node_scrub, work); struct bch_fs *c = scrub->c; struct printbuf err = PRINTBUF; __bch2_btree_pos_to_text(&err, c, scrub->btree, scrub->level, bkey_i_to_s_c(scrub->key.k)); prt_newline(&err); if (!btree_node_scrub_check(c, scrub->buf, scrub->written, &err)) { int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans, scrub->btree, scrub->level - 1, scrub->key.k, 0)); if (!bch2_err_matches(ret, ENOENT) && !bch2_err_matches(ret, EROFS)) bch_err_fn_ratelimited(c, ret); } printbuf_exit(&err); bch2_bkey_buf_exit(&scrub->key, c);; btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf); enumerated_ref_put(&scrub->ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub); kfree(scrub); enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub); } static void btree_node_scrub_endio(struct bio *bio) { struct btree_node_scrub *scrub = container_of(bio, struct btree_node_scrub, bio); queue_work(scrub->c->btree_read_complete_wq, &scrub->work); } int bch2_btree_node_scrub(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_s_c k, unsigned dev) { if (k.k->type != KEY_TYPE_btree_ptr_v2) return 0; struct bch_fs *c = trans->c; if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_node_scrub)) return bch_err_throw(c, erofs_no_writes); struct extent_ptr_decoded pick; int ret = bch2_bkey_pick_read_device(c, k, NULL, &pick, dev); if (ret <= 0) goto err; struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_scrub); if (!ca) { ret = bch_err_throw(c, device_offline); goto err; } bool used_mempool = false; void *buf = btree_bounce_alloc(c, c->opts.btree_node_size, &used_mempool); unsigned vecs = buf_pages(buf, c->opts.btree_node_size); struct btree_node_scrub *scrub = kzalloc(sizeof(*scrub) + sizeof(struct bio_vec) * vecs, GFP_KERNEL); if (!scrub) { ret = -ENOMEM; goto err_free; } scrub->c = c; scrub->ca = ca; scrub->buf = buf; scrub->used_mempool = used_mempool; scrub->written = btree_ptr_sectors_written(k); scrub->btree = btree; scrub->level = level; bch2_bkey_buf_init(&scrub->key); bch2_bkey_buf_reassemble(&scrub->key, c, k); scrub->seq = bkey_s_c_to_btree_ptr_v2(k).v->seq; INIT_WORK(&scrub->work, btree_node_scrub_work); bio_init(&scrub->bio, ca->disk_sb.bdev, scrub->bio.bi_inline_vecs, vecs, REQ_OP_READ); bch2_bio_map(&scrub->bio, scrub->buf, c->opts.btree_node_size); scrub->bio.bi_iter.bi_sector = pick.ptr.offset; scrub->bio.bi_end_io = btree_node_scrub_endio; submit_bio(&scrub->bio); return 0; err_free: btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf); enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub); err: enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub); return ret; } static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, struct btree_write *w) { unsigned long old, new; old = READ_ONCE(b->will_make_reachable); do { new = old; if (!(old & 1)) break; new &= ~1UL; } while (!try_cmpxchg(&b->will_make_reachable, &old, new)); if (old & 1) closure_put(&((struct btree_update *) new)->cl); bch2_journal_pin_drop(&c->journal, &w->journal); } static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time) { struct btree_write *w = btree_prev_write(b); unsigned long old, new; unsigned type = 0; bch2_btree_complete_write(c, b, w); if (start_time) bch2_time_stats_update(&c->times[BCH_TIME_btree_node_write], start_time); old = READ_ONCE(b->flags); do { new = old; if ((old & (1U << BTREE_NODE_dirty)) && (old & (1U << BTREE_NODE_need_write)) && !(old & (1U << BTREE_NODE_never_write)) && !(old & (1U << BTREE_NODE_write_blocked)) && !(old & (1U << BTREE_NODE_will_make_reachable))) { new &= ~(1U << BTREE_NODE_dirty); new &= ~(1U << BTREE_NODE_need_write); new |= (1U << BTREE_NODE_write_in_flight); new |= (1U << BTREE_NODE_write_in_flight_inner); new |= (1U << BTREE_NODE_just_written); new ^= (1U << BTREE_NODE_write_idx); type = new & BTREE_WRITE_TYPE_MASK; new &= ~BTREE_WRITE_TYPE_MASK; } else { new &= ~(1U << BTREE_NODE_write_in_flight); new &= ~(1U << BTREE_NODE_write_in_flight_inner); } } while (!try_cmpxchg(&b->flags, &old, new)); if (new & (1U << BTREE_NODE_write_in_flight)) __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|type); else { smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); } } static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time) { struct btree_trans *trans = bch2_trans_get(c); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); /* we don't need transaction context anymore after we got the lock. */ bch2_trans_put(trans); __btree_node_write_done(c, b, start_time); six_unlock_read(&b->c.lock); } static void btree_node_write_work(struct work_struct *work) { struct btree_write_bio *wbio = container_of(work, struct btree_write_bio, work); struct bch_fs *c = wbio->wbio.c; struct btree *b = wbio->wbio.bio.bi_private; u64 start_time = wbio->start_time; int ret = 0; btree_bounce_free(c, wbio->data_bytes, wbio->wbio.used_mempool, wbio->data); bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), ptr, bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) { ret = bch_err_throw(c, btree_node_write_all_failed); goto err; } if (wbio->wbio.first_btree_write) { if (wbio->wbio.failed.nr) { } } else { ret = bch2_trans_do(c, bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, BCH_WATERMARK_interior_updates| BCH_TRANS_COMMIT_journal_reclaim| BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_check_rw, !wbio->wbio.failed.nr)); if (ret) goto err; } out: async_object_list_del(c, btree_write_bio, wbio->list_idx); bio_put(&wbio->wbio.bio); btree_node_write_done(c, b, start_time); return; err: set_btree_node_noevict(b); if (!bch2_err_matches(ret, EROFS)) { struct printbuf buf = PRINTBUF; prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret)); bch2_btree_pos_to_text(&buf, c, b); bch2_fs_fatal_error(c, "%s", buf.buf); printbuf_exit(&buf); } goto out; } static void btree_node_write_endio(struct bio *bio) { struct bch_write_bio *wbio = to_wbio(bio); struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL; struct bch_write_bio *orig = parent ?: wbio; struct btree_write_bio *wb = container_of(orig, struct btree_write_bio, wbio); struct bch_fs *c = wbio->c; struct btree *b = wbio->bio.bi_private; struct bch_dev *ca = wbio->have_ioref ? bch2_dev_have_ref(c, wbio->dev) : NULL; bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write, wbio->submit_time, !bio->bi_status); if (ca && bio->bi_status) { struct printbuf buf = PRINTBUF; buf.atomic++; prt_printf(&buf, "btree write error: %s\n ", bch2_blk_status_to_str(bio->bi_status)); bch2_btree_pos_to_text(&buf, c, b); bch_err_dev_ratelimited(ca, "%s", buf.buf); printbuf_exit(&buf); } if (bio->bi_status) { unsigned long flags; spin_lock_irqsave(&c->btree_write_error_lock, flags); bch2_dev_list_add_dev(&orig->failed, wbio->dev); spin_unlock_irqrestore(&c->btree_write_error_lock, flags); } /* * XXX: we should be using io_ref[WRITE], but we aren't retrying failed * btree writes yet (due to device removal/ro): */ if (wbio->have_ioref) enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_write); if (parent) { bio_put(bio); bio_endio(&parent->bio); return; } clear_btree_node_write_in_flight_inner(b); smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner); INIT_WORK(&wb->work, btree_node_write_work); queue_work(c->btree_write_complete_wq, &wb->work); } static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i) { int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, .level = b->c.level + 1, .btree = b->c.btree_id, .flags = BCH_VALIDATE_write, }); if (ret) { bch2_fs_inconsistent(c, "invalid btree node key before write"); return ret; } ret = validate_bset_keys(c, b, i, WRITE, NULL, NULL) ?: validate_bset(c, NULL, b, i, b->written, WRITE, NULL, NULL); if (ret) { bch2_inconsistent_error(c); dump_stack(); } return ret; } static void btree_write_submit(struct work_struct *work) { struct btree_write_bio *wbio = container_of(work, struct btree_write_bio, work); BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; bkey_copy(&tmp.k, &wbio->key); bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&tmp.k)), ptr) ptr->offset += wbio->sector_offset; bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &tmp.k, false); } void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) { struct btree_write_bio *wbio; struct bset *i; struct btree_node *bn = NULL; struct btree_node_entry *bne = NULL; struct sort_iter_stack sort_iter; struct nonce nonce; unsigned bytes_to_write, sectors_to_write, bytes, u64s; u64 seq = 0; bool used_mempool; unsigned long old, new; bool validate_before_checksum = false; enum btree_write_type type = flags & BTREE_WRITE_TYPE_MASK; void *data; u64 start_time = local_clock(); int ret; if (flags & BTREE_WRITE_ALREADY_STARTED) goto do_write; /* * We may only have a read lock on the btree node - the dirty bit is our * "lock" against racing with other threads that may be trying to start * a write, we do a write iff we clear the dirty bit. Since setting the * dirty bit requires a write lock, we can't race with other threads * redirtying it: */ old = READ_ONCE(b->flags); do { new = old; if (!(old & (1 << BTREE_NODE_dirty))) return; if ((flags & BTREE_WRITE_ONLY_IF_NEED) && !(old & (1 << BTREE_NODE_need_write))) return; if (old & ((1 << BTREE_NODE_never_write)| (1 << BTREE_NODE_write_blocked))) return; if (b->written && (old & (1 << BTREE_NODE_will_make_reachable))) return; if (old & (1 << BTREE_NODE_write_in_flight)) return; if (flags & BTREE_WRITE_ONLY_IF_NEED) type = new & BTREE_WRITE_TYPE_MASK; new &= ~BTREE_WRITE_TYPE_MASK; new &= ~(1 << BTREE_NODE_dirty); new &= ~(1 << BTREE_NODE_need_write); new |= (1 << BTREE_NODE_write_in_flight); new |= (1 << BTREE_NODE_write_in_flight_inner); new |= (1 << BTREE_NODE_just_written); new ^= (1 << BTREE_NODE_write_idx); } while (!try_cmpxchg_acquire(&b->flags, &old, new)); if (new & (1U << BTREE_NODE_need_write)) return; do_write: BUG_ON((type == BTREE_WRITE_initial) != (b->written == 0)); atomic_long_dec(&c->btree_cache.nr_dirty); BUG_ON(btree_node_fake(b)); BUG_ON((b->will_make_reachable != 0) != !b->written); BUG_ON(b->written >= btree_sectors(c)); BUG_ON(b->written & (block_sectors(c) - 1)); BUG_ON(bset_written(b, btree_bset_last(b))); BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c)); BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format))); bch2_sort_whiteouts(c, b); sort_iter_stack_init(&sort_iter, b); bytes = !b->written ? sizeof(struct btree_node) : sizeof(struct btree_node_entry); bytes += b->whiteout_u64s * sizeof(u64); for_each_bset(b, t) { i = bset(b, t); if (bset_written(b, i)) continue; bytes += le16_to_cpu(i->u64s) * sizeof(u64); sort_iter_add(&sort_iter.iter, btree_bkey_first(b, t), btree_bkey_last(b, t)); seq = max(seq, le64_to_cpu(i->journal_seq)); } BUG_ON(b->written && !seq); /* bch2_varint_decode may read up to 7 bytes past the end of the buffer: */ bytes += 8; /* buffer must be a multiple of the block size */ bytes = round_up(bytes, block_bytes(c)); data = btree_bounce_alloc(c, bytes, &used_mempool); if (!b->written) { bn = data; *bn = *b->data; i = &bn->keys; } else { bne = data; bne->keys = b->data->keys; i = &bne->keys; } i->journal_seq = cpu_to_le64(seq); i->u64s = 0; sort_iter_add(&sort_iter.iter, unwritten_whiteouts_start(b), unwritten_whiteouts_end(b)); SET_BSET_SEPARATE_WHITEOUTS(i, false); u64s = bch2_sort_keys_keep_unwritten_whiteouts(i->start, &sort_iter.iter); le16_add_cpu(&i->u64s, u64s); b->whiteout_u64s = 0; BUG_ON(!b->written && i->u64s != b->data->keys.u64s); set_needs_whiteout(i, false); /* do we have data to write? */ if (b->written && !i->u64s) goto nowrite; bytes_to_write = vstruct_end(i) - data; sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9; if (!b->written && b->key.k.type == KEY_TYPE_btree_ptr_v2) BUG_ON(btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)) != sectors_to_write); memset(data + bytes_to_write, 0, (sectors_to_write << 9) - bytes_to_write); BUG_ON(b->written + sectors_to_write > btree_sectors(c)); BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); BUG_ON(i->seq != b->data->keys.seq); i->version = cpu_to_le16(c->sb.version); SET_BSET_OFFSET(i, b->written); SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c)); if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i))) validate_before_checksum = true; /* validate_bset will be modifying: */ if (le16_to_cpu(i->version) < bcachefs_metadata_version_current) validate_before_checksum = true; /* if we're going to be encrypting, check metadata validity first: */ if (validate_before_checksum && validate_bset_for_write(c, b, i)) goto err; ret = bset_encrypt(c, i, b->written << 9); if (bch2_fs_fatal_err_on(ret, c, "encrypting btree node: %s", bch2_err_str(ret))) goto err; nonce = btree_nonce(i, b->written << 9); if (bn) bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn); else bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); /* if we're not encrypting, check metadata after checksumming: */ if (!validate_before_checksum && validate_bset_for_write(c, b, i)) goto err; /* * We handle btree write errors by immediately halting the journal - * after we've done that, we can't issue any subsequent btree writes * because they might have pointers to new nodes that failed to write. * * Furthermore, there's no point in doing any more btree writes because * with the journal stopped, we're never going to update the journal to * reflect that those writes were done and the data flushed from the * journal: * * Also on journal error, the pending write may have updates that were * never journalled (interior nodes, see btree_update_nodes_written()) - * it's critical that we don't do the write in that case otherwise we * will have updates visible that weren't in the journal: * * Make sure to update b->written so bch2_btree_init_next() doesn't * break: */ if (bch2_journal_error(&c->journal) || c->opts.nochanges) goto err; trace_and_count(c, btree_node_write, b, bytes_to_write, sectors_to_write); wbio = container_of(bio_alloc_bioset(NULL, buf_pages(data, sectors_to_write << 9), REQ_OP_WRITE|REQ_META, GFP_NOFS, &c->btree_bio), struct btree_write_bio, wbio.bio); wbio_init(&wbio->wbio.bio); wbio->data = data; wbio->data_bytes = bytes; wbio->sector_offset = b->written; wbio->start_time = start_time; wbio->wbio.c = c; wbio->wbio.used_mempool = used_mempool; wbio->wbio.first_btree_write = !b->written; wbio->wbio.bio.bi_end_io = btree_node_write_endio; wbio->wbio.bio.bi_private = b; bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9); bkey_copy(&wbio->key, &b->key); b->written += sectors_to_write; if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2) bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written = cpu_to_le16(b->written); atomic64_inc(&c->btree_write_stats[type].nr); atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes); async_object_list_add(c, btree_write_bio, wbio, &wbio->list_idx); INIT_WORK(&wbio->work, btree_write_submit); queue_work(c->btree_write_submit_wq, &wbio->work); return; err: set_btree_node_noevict(b); b->written += sectors_to_write; nowrite: btree_bounce_free(c, bytes, used_mempool, data); __btree_node_write_done(c, b, 0); } /* * Work that must be done with write lock held: */ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) { bool invalidated_iter = false; struct btree_node_entry *bne; if (!btree_node_just_written(b)) return false; BUG_ON(b->whiteout_u64s); clear_btree_node_just_written(b); /* * Note: immediately after write, bset_written() doesn't work - the * amount of data we had to write after compaction might have been * smaller than the offset of the last bset. * * However, we know that all bsets have been written here, as long as * we're still holding the write lock: */ /* * XXX: decide if we really want to unconditionally sort down to a * single bset: */ if (b->nsets > 1) { btree_node_sort(c, b, 0, b->nsets); invalidated_iter = true; } else { invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL); } for_each_bset(b, t) set_needs_whiteout(bset(b, t), true); bch2_btree_verify(c, b); /* * If later we don't unconditionally sort down to a single bset, we have * to ensure this is still true: */ BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b)); bne = want_new_bset(c, b); if (bne) bch2_bset_init_next(b, bne); bch2_btree_build_aux_trees(b); return invalidated_iter; } /* * Use this one if the node is intent locked: */ void bch2_btree_node_write(struct bch_fs *c, struct btree *b, enum six_lock_type lock_type_held, unsigned flags) { if (lock_type_held == SIX_LOCK_intent || (lock_type_held == SIX_LOCK_read && six_lock_tryupgrade(&b->c.lock))) { __bch2_btree_node_write(c, b, flags); /* don't cycle lock unnecessarily: */ if (btree_node_just_written(b) && six_trylock_write(&b->c.lock)) { bch2_btree_post_write_cleanup(c, b); six_unlock_write(&b->c.lock); } if (lock_type_held == SIX_LOCK_read) six_lock_downgrade(&b->c.lock); } else { __bch2_btree_node_write(c, b, flags); if (lock_type_held == SIX_LOCK_write && btree_node_just_written(b)) bch2_btree_post_write_cleanup(c, b); } } void bch2_btree_node_write_trans(struct btree_trans *trans, struct btree *b, enum six_lock_type lock_type_held, unsigned flags) { struct bch_fs *c = trans->c; if (lock_type_held == SIX_LOCK_intent || (lock_type_held == SIX_LOCK_read && six_lock_tryupgrade(&b->c.lock))) { __bch2_btree_node_write(c, b, flags); /* don't cycle lock unnecessarily: */ if (btree_node_just_written(b) && six_trylock_write(&b->c.lock)) { bch2_btree_post_write_cleanup(c, b); __bch2_btree_node_unlock_write(trans, b); } if (lock_type_held == SIX_LOCK_read) six_lock_downgrade(&b->c.lock); } else { __bch2_btree_node_write(c, b, flags); if (lock_type_held == SIX_LOCK_write && btree_node_just_written(b)) bch2_btree_post_write_cleanup(c, b); } } static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag) { struct bucket_table *tbl; struct rhash_head *pos; struct btree *b; unsigned i; bool ret = false; restart: rcu_read_lock(); for_each_cached_btree(b, c, tbl, i, pos) if (test_bit(flag, &b->flags)) { rcu_read_unlock(); wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE); ret = true; goto restart; } rcu_read_unlock(); return ret; } bool bch2_btree_flush_all_reads(struct bch_fs *c) { return __bch2_btree_flush_all(c, BTREE_NODE_read_in_flight); } bool bch2_btree_flush_all_writes(struct bch_fs *c) { return __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight); } static const char * const bch2_btree_write_types[] = { #define x(t, n) [n] = #t, BCH_BTREE_WRITE_TYPES() NULL }; void bch2_btree_write_stats_to_text(struct printbuf *out, struct bch_fs *c) { printbuf_tabstop_push(out, 20); printbuf_tabstop_push(out, 10); prt_printf(out, "\tnr\tsize\n"); for (unsigned i = 0; i < BTREE_WRITE_TYPE_NR; i++) { u64 nr = atomic64_read(&c->btree_write_stats[i].nr); u64 bytes = atomic64_read(&c->btree_write_stats[i].bytes); prt_printf(out, "%s:\t%llu\t", bch2_btree_write_types[i], nr); prt_human_readable_u64(out, nr ? div64_u64(bytes, nr) : 0); prt_newline(out); } }
738 1312 222 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 // SPDX-License-Identifier: GPL-2.0 /* * kobject.h - generic kernel object infrastructure. * * Copyright (c) 2002-2003 Patrick Mochel * Copyright (c) 2002-2003 Open Source Development Labs * Copyright (c) 2006-2008 Greg Kroah-Hartman <greg@kroah.com> * Copyright (c) 2006-2008 Novell Inc. * * Please read Documentation/core-api/kobject.rst before using the kobject * interface, ESPECIALLY the parts about reference counts and object * destructors. */ #ifndef _KOBJECT_H_ #define _KOBJECT_H_ #include <linux/types.h> #include <linux/list.h> #include <linux/sysfs.h> #include <linux/compiler.h> #include <linux/container_of.h> #include <linux/spinlock.h> #include <linux/kref.h> #include <linux/kobject_ns.h> #include <linux/wait.h> #include <linux/atomic.h> #include <linux/workqueue.h> #include <linux/uidgid.h> #define UEVENT_HELPER_PATH_LEN 256 #define UEVENT_NUM_ENVP 64 /* number of env pointers */ #define UEVENT_BUFFER_SIZE 2048 /* buffer for the variables */ #ifdef CONFIG_UEVENT_HELPER /* path to the userspace helper executed on an event */ extern char uevent_helper[]; #endif /* counter to tag the uevent, read only except for the kobject core */ extern atomic64_t uevent_seqnum; /* * The actions here must match the index to the string array * in lib/kobject_uevent.c * * Do not add new actions here without checking with the driver-core * maintainers. Action strings are not meant to express subsystem * or device specific properties. In most cases you want to send a * kobject_uevent_env(kobj, KOBJ_CHANGE, env) with additional event * specific variables added to the event environment. */ enum kobject_action { KOBJ_ADD, KOBJ_REMOVE, KOBJ_CHANGE, KOBJ_MOVE, KOBJ_ONLINE, KOBJ_OFFLINE, KOBJ_BIND, KOBJ_UNBIND, }; struct kobject { const char *name; struct list_head entry; struct kobject *parent; struct kset *kset; const struct kobj_type *ktype; struct kernfs_node *sd; /* sysfs directory entry */ struct kref kref; unsigned int state_initialized:1; unsigned int state_in_sysfs:1; unsigned int state_add_uevent_sent:1; unsigned int state_remove_uevent_sent:1; unsigned int uevent_suppress:1; #ifdef CONFIG_DEBUG_KOBJECT_RELEASE struct delayed_work release; #endif }; __printf(2, 3) int kobject_set_name(struct kobject *kobj, const char *name, ...); __printf(2, 0) int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list vargs); static inline const char *kobject_name(const struct kobject *kobj) { return kobj->name; } void kobject_init(struct kobject *kobj, const struct kobj_type *ktype); __printf(3, 4) __must_check int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...); __printf(4, 5) __must_check int kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...); void kobject_del(struct kobject *kobj); struct kobject * __must_check kobject_create_and_add(const char *name, struct kobject *parent); int __must_check kobject_rename(struct kobject *, const char *new_name); int __must_check kobject_move(struct kobject *, struct kobject *); struct kobject *kobject_get(struct kobject *kobj); struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj); void kobject_put(struct kobject *kobj); const void *kobject_namespace(const struct kobject *kobj); void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid); char *kobject_get_path(const struct kobject *kobj, gfp_t flag); struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; const struct attribute_group **default_groups; const struct kobj_ns_type_operations *(*child_ns_type)(const struct kobject *kobj); const void *(*namespace)(const struct kobject *kobj); void (*get_ownership)(const struct kobject *kobj, kuid_t *uid, kgid_t *gid); }; struct kobj_uevent_env { char *argv[3]; char *envp[UEVENT_NUM_ENVP]; int envp_idx; char buf[UEVENT_BUFFER_SIZE]; int buflen; }; struct kset_uevent_ops { int (* const filter)(const struct kobject *kobj); const char *(* const name)(const struct kobject *kobj); int (* const uevent)(const struct kobject *kobj, struct kobj_uevent_env *env); }; struct kobj_attribute { struct attribute attr; ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, char *buf); ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count); }; extern const struct sysfs_ops kobj_sysfs_ops; struct sock; /** * struct kset - a set of kobjects of a specific type, belonging to a specific subsystem. * * A kset defines a group of kobjects. They can be individually * different "types" but overall these kobjects all want to be grouped * together and operated on in the same manner. ksets are used to * define the attribute callbacks and other common events that happen to * a kobject. * * @list: the list of all kobjects for this kset * @list_lock: a lock for iterating over the kobjects * @kobj: the embedded kobject for this kset (recursion, isn't it fun...) * @uevent_ops: the set of uevent operations for this kset. These are * called whenever a kobject has something happen to it so that the kset * can add new environment variables, or filter out the uevents if so * desired. */ struct kset { struct list_head list; spinlock_t list_lock; struct kobject kobj; const struct kset_uevent_ops *uevent_ops; } __randomize_layout; void kset_init(struct kset *kset); int __must_check kset_register(struct kset *kset); void kset_unregister(struct kset *kset); struct kset * __must_check kset_create_and_add(const char *name, const struct kset_uevent_ops *u, struct kobject *parent_kobj); static inline struct kset *to_kset(struct kobject *kobj) { return kobj ? container_of(kobj, struct kset, kobj) : NULL; } static inline struct kset *kset_get(struct kset *k) { return k ? to_kset(kobject_get(&k->kobj)) : NULL; } static inline void kset_put(struct kset *k) { kobject_put(&k->kobj); } static inline const struct kobj_type *get_ktype(const struct kobject *kobj) { return kobj->ktype; } struct kobject *kset_find_obj(struct kset *, const char *); /* The global /sys/kernel/ kobject for people to chain off of */ extern struct kobject *kernel_kobj; /* The global /sys/kernel/mm/ kobject for people to chain off of */ extern struct kobject *mm_kobj; /* The global /sys/hypervisor/ kobject for people to chain off of */ extern struct kobject *hypervisor_kobj; /* The global /sys/power/ kobject for people to chain off of */ extern struct kobject *power_kobj; /* The global /sys/firmware/ kobject for people to chain off of */ extern struct kobject *firmware_kobj; int kobject_uevent(struct kobject *kobj, enum kobject_action action); int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, char *envp[]); int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count); __printf(2, 3) int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...); #endif /* _KOBJECT_H_ */
354 352 361 362 138 37 14 121 135 42 97 96 135 26 136 89 17 73 35 30 2 339 339 2 3 1 153 20 21 17 211 26 26 135 48 70 136 210 178 31 104 105 189 20 18 17 17 348 347 1 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 /* * Ext4 orphan inode handling */ #include <linux/fs.h> #include <linux/quotaops.h> #include <linux/buffer_head.h> #include "ext4.h" #include "ext4_jbd2.h" static int ext4_orphan_file_add(handle_t *handle, struct inode *inode) { int i, j, start; struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info; int ret = 0; bool found = false; __le32 *bdata; int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb); int looped = 0; /* * Find block with free orphan entry. Use CPU number for a naive hash * for a search start in the orphan file */ start = raw_smp_processor_id()*13 % oi->of_blocks; i = start; do { if (atomic_dec_if_positive(&oi->of_binfo[i].ob_free_entries) >= 0) { found = true; break; } if (++i >= oi->of_blocks) i = 0; } while (i != start); if (!found) { /* * For now we don't grow or shrink orphan file. We just use * whatever was allocated at mke2fs time. The additional * credits we would have to reserve for each orphan inode * operation just don't seem worth it. */ return -ENOSPC; } ret = ext4_journal_get_write_access(handle, inode->i_sb, oi->of_binfo[i].ob_bh, EXT4_JTR_ORPHAN_FILE); if (ret) { atomic_inc(&oi->of_binfo[i].ob_free_entries); return ret; } bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data); /* Find empty slot in a block */ j = 0; do { if (looped) { /* * Did we walk through the block several times without * finding free entry? It is theoretically possible * if entries get constantly allocated and freed or * if the block is corrupted. Avoid indefinite looping * and bail. We'll use orphan list instead. */ if (looped > 3) { atomic_inc(&oi->of_binfo[i].ob_free_entries); return -ENOSPC; } cond_resched(); } while (bdata[j]) { if (++j >= inodes_per_ob) { j = 0; looped++; } } } while (cmpxchg(&bdata[j], (__le32)0, cpu_to_le32(inode->i_ino)) != (__le32)0); EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j; ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE); return ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[i].ob_bh); } /* * ext4_orphan_add() links an unlinked or truncated inode into a list of * such inodes, starting at the superblock, in case we crash before the * file is closed/deleted, or in case the inode truncate spans multiple * transactions and the last transaction is not recovered after a crash. * * At filesystem recovery time, we walk this list deleting unlinked * inodes and truncating linked inodes in ext4_orphan_cleanup(). * * Orphan list manipulation functions must be called under i_rwsem unless * we are just creating the inode or deleting it. */ int ext4_orphan_add(handle_t *handle, struct inode *inode) { struct super_block *sb = inode->i_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_iloc iloc; int err = 0, rc; bool dirty = false; if (!sbi->s_journal || is_bad_inode(inode)) return 0; WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && !inode_is_locked(inode)); /* * Inode orphaned in orphan file or in orphan list? */ if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) || !list_empty(&EXT4_I(inode)->i_orphan)) return 0; /* * Orphan handling is only valid for files with data blocks * being truncated, or files being unlinked. Note that we either * hold i_rwsem, or the inode can not be referenced from outside, * so i_nlink should not be bumped due to race */ ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); if (sbi->s_orphan_info.of_blocks) { err = ext4_orphan_file_add(handle, inode); /* * Fallback to normal orphan list of orphan file is * out of space */ if (err != -ENOSPC) return err; } BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh, EXT4_JTR_NONE); if (err) goto out; err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) goto out; mutex_lock(&sbi->s_orphan_lock); /* * Due to previous errors inode may be already a part of on-disk * orphan list. If so skip on-disk list modification. */ if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) > (le32_to_cpu(sbi->s_es->s_inodes_count))) { /* Insert this inode at the head of the on-disk orphan list */ NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); lock_buffer(sbi->s_sbh); sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); dirty = true; } list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); mutex_unlock(&sbi->s_orphan_lock); if (dirty) { err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; if (err) { /* * We have to remove inode from in-memory list if * addition to on disk orphan list failed. Stray orphan * list entries can cause panics at unmount time. */ mutex_lock(&sbi->s_orphan_lock); list_del_init(&EXT4_I(inode)->i_orphan); mutex_unlock(&sbi->s_orphan_lock); } } else brelse(iloc.bh); ext4_debug("superblock will point to %lu\n", inode->i_ino); ext4_debug("orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); out: ext4_std_error(sb, err); return err; } static int ext4_orphan_file_del(handle_t *handle, struct inode *inode) { struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info; __le32 *bdata; int blk, off; int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb); int ret = 0; if (!handle) goto out; blk = EXT4_I(inode)->i_orphan_idx / inodes_per_ob; off = EXT4_I(inode)->i_orphan_idx % inodes_per_ob; if (WARN_ON_ONCE(blk >= oi->of_blocks)) goto out; ret = ext4_journal_get_write_access(handle, inode->i_sb, oi->of_binfo[blk].ob_bh, EXT4_JTR_ORPHAN_FILE); if (ret) goto out; bdata = (__le32 *)(oi->of_binfo[blk].ob_bh->b_data); bdata[off] = 0; atomic_inc(&oi->of_binfo[blk].ob_free_entries); ret = ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[blk].ob_bh); out: ext4_clear_inode_state(inode, EXT4_STATE_ORPHAN_FILE); INIT_LIST_HEAD(&EXT4_I(inode)->i_orphan); return ret; } /* * ext4_orphan_del() removes an unlinked or truncated inode from the list * of such inodes stored on disk, because it is finally being cleaned up. */ int ext4_orphan_del(handle_t *handle, struct inode *inode) { struct list_head *prev; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); __u32 ino_next; struct ext4_iloc iloc; int err = 0; if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS)) return 0; WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && !inode_is_locked(inode)); if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE)) return ext4_orphan_file_del(handle, inode); /* Do this quick check before taking global s_orphan_lock. */ if (list_empty(&ei->i_orphan)) return 0; if (handle) { /* Grab inode buffer early before taking global s_orphan_lock */ err = ext4_reserve_inode_write(handle, inode, &iloc); } mutex_lock(&sbi->s_orphan_lock); ext4_debug("remove inode %lu from orphan list\n", inode->i_ino); prev = ei->i_orphan.prev; list_del_init(&ei->i_orphan); /* If we're on an error path, we may not have a valid * transaction handle with which to update the orphan list on * disk, but we still need to remove the inode from the linked * list in memory. */ if (!handle || err) { mutex_unlock(&sbi->s_orphan_lock); goto out_err; } ino_next = NEXT_ORPHAN(inode); if (prev == &sbi->s_orphan) { ext4_debug("superblock will point to %u\n", ino_next); BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, inode->i_sb, sbi->s_sbh, EXT4_JTR_NONE); if (err) { mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; } lock_buffer(sbi->s_sbh); sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); ext4_superblock_csum_set(inode->i_sb); unlock_buffer(sbi->s_sbh); mutex_unlock(&sbi->s_orphan_lock); err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); } else { struct ext4_iloc iloc2; struct inode *i_prev = &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode; ext4_debug("orphan inode %lu will point to %u\n", i_prev->i_ino, ino_next); err = ext4_reserve_inode_write(handle, i_prev, &iloc2); if (err) { mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; } NEXT_ORPHAN(i_prev) = ino_next; err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); mutex_unlock(&sbi->s_orphan_lock); } if (err) goto out_brelse; NEXT_ORPHAN(inode) = 0; err = ext4_mark_iloc_dirty(handle, inode, &iloc); out_err: ext4_std_error(inode->i_sb, err); return err; out_brelse: brelse(iloc.bh); goto out_err; } #ifdef CONFIG_QUOTA static int ext4_quota_on_mount(struct super_block *sb, int type) { return dquot_quota_on_mount(sb, rcu_dereference_protected(EXT4_SB(sb)->s_qf_names[type], lockdep_is_held(&sb->s_umount)), EXT4_SB(sb)->s_jquota_fmt, type); } #endif static void ext4_process_orphan(struct inode *inode, int *nr_truncates, int *nr_orphans) { struct super_block *sb = inode->i_sb; int ret; dquot_initialize(inode); if (inode->i_nlink) { if (test_opt(sb, DEBUG)) ext4_msg(sb, KERN_DEBUG, "%s: truncating inode %lu to %lld bytes", __func__, inode->i_ino, inode->i_size); ext4_debug("truncating inode %lu to %lld bytes\n", inode->i_ino, inode->i_size); inode_lock(inode); truncate_inode_pages(inode->i_mapping, inode->i_size); ret = ext4_truncate(inode); if (ret) { /* * We need to clean up the in-core orphan list * manually if ext4_truncate() failed to get a * transaction handle. */ ext4_orphan_del(NULL, inode); ext4_std_error(inode->i_sb, ret); } inode_unlock(inode); (*nr_truncates)++; } else { if (test_opt(sb, DEBUG)) ext4_msg(sb, KERN_DEBUG, "%s: deleting unreferenced inode %lu", __func__, inode->i_ino); ext4_debug("deleting unreferenced inode %lu\n", inode->i_ino); (*nr_orphans)++; } iput(inode); /* The delete magic happens here! */ } /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at * the superblock) which were deleted from all directories, but held open by * a process at the time of a crash. We walk the list and try to delete these * inodes at recovery time (only with a read-write filesystem). * * In order to keep the orphan inode chain consistent during traversal (in * case of crash during recovery), we link each inode into the superblock * orphan list_head and handle it the same way as an inode deletion during * normal operation (which journals the operations for us). * * We only do an iget() and an iput() on each inode, which is very safe if we * accidentally point at an in-use or already deleted inode. The worst that * can happen in this case is that we get a "bit already cleared" message from * ext4_free_inode(). The only reason we would point at a wrong inode is if * e2fsck was run on this filesystem, and it must have already done the orphan * inode cleanup for us, so we can safely abort without any further action. */ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es) { unsigned int s_flags = sb->s_flags; int nr_orphans = 0, nr_truncates = 0; struct inode *inode; int i, j; #ifdef CONFIG_QUOTA int quota_update = 0; #endif __le32 *bdata; struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info; int inodes_per_ob = ext4_inodes_per_orphan_block(sb); if (!es->s_last_orphan && !oi->of_blocks) { ext4_debug("no orphan inodes to clean up\n"); return; } if (bdev_read_only(sb->s_bdev)) { ext4_msg(sb, KERN_ERR, "write access " "unavailable, skipping orphan cleanup"); return; } /* Check if feature set would not allow a r/w mount */ if (!ext4_feature_set_ok(sb, 0)) { ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " "unknown ROCOMPAT features"); return; } if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { /* don't clear list on RO mount w/ errors */ if (es->s_last_orphan && !(s_flags & SB_RDONLY)) { ext4_msg(sb, KERN_INFO, "Errors on filesystem, " "clearing orphan list."); es->s_last_orphan = 0; } ext4_debug("Skipping orphan recovery on fs with errors.\n"); return; } if (s_flags & SB_RDONLY) { ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); sb->s_flags &= ~SB_RDONLY; } #ifdef CONFIG_QUOTA /* * Turn on quotas which were not enabled for read-only mounts if * filesystem has quota feature, so that they are updated correctly. */ if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) { int ret = ext4_enable_quotas(sb); if (!ret) quota_update = 1; else ext4_msg(sb, KERN_ERR, "Cannot turn on quotas: error %d", ret); } /* Turn on journaled quotas used for old sytle */ for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (EXT4_SB(sb)->s_qf_names[i]) { int ret = ext4_quota_on_mount(sb, i); if (!ret) quota_update = 1; else ext4_msg(sb, KERN_ERR, "Cannot turn on journaled " "quota: type %d: error %d", i, ret); } } #endif while (es->s_last_orphan) { /* * We may have encountered an error during cleanup; if * so, skip the rest. */ if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { ext4_debug("Skipping orphan recovery on fs with errors.\n"); es->s_last_orphan = 0; break; } inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); if (IS_ERR(inode)) { es->s_last_orphan = 0; break; } list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); ext4_process_orphan(inode, &nr_truncates, &nr_orphans); } for (i = 0; i < oi->of_blocks; i++) { bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data); for (j = 0; j < inodes_per_ob; j++) { if (!bdata[j]) continue; inode = ext4_orphan_get(sb, le32_to_cpu(bdata[j])); if (IS_ERR(inode)) continue; ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE); EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j; ext4_process_orphan(inode, &nr_truncates, &nr_orphans); } } #define PLURAL(x) (x), ((x) == 1) ? "" : "s" if (nr_orphans) ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", PLURAL(nr_orphans)); if (nr_truncates) ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", PLURAL(nr_truncates)); #ifdef CONFIG_QUOTA /* Turn off quotas if they were enabled for orphan cleanup */ if (quota_update) { for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (sb_dqopt(sb)->files[i]) dquot_quota_off(sb, i); } } #endif sb->s_flags = s_flags; /* Restore SB_RDONLY status */ } void ext4_release_orphan_info(struct super_block *sb) { int i; struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info; if (!oi->of_blocks) return; for (i = 0; i < oi->of_blocks; i++) brelse(oi->of_binfo[i].ob_bh); kfree(oi->of_binfo); } static struct ext4_orphan_block_tail *ext4_orphan_block_tail( struct super_block *sb, struct buffer_head *bh) { return (struct ext4_orphan_block_tail *)(bh->b_data + sb->s_blocksize - sizeof(struct ext4_orphan_block_tail)); } static int ext4_orphan_file_block_csum_verify(struct super_block *sb, struct buffer_head *bh) { __u32 calculated; int inodes_per_ob = ext4_inodes_per_orphan_block(sb); struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info; struct ext4_orphan_block_tail *ot; __le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr); if (!ext4_has_feature_metadata_csum(sb)) return 1; ot = ext4_orphan_block_tail(sb, bh); calculated = ext4_chksum(oi->of_csum_seed, (__u8 *)&dsk_block_nr, sizeof(dsk_block_nr)); calculated = ext4_chksum(calculated, (__u8 *)bh->b_data, inodes_per_ob * sizeof(__u32)); return le32_to_cpu(ot->ob_checksum) == calculated; } /* This gets called only when checksumming is enabled */ void ext4_orphan_file_block_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { struct super_block *sb = EXT4_TRIGGER(triggers)->sb; __u32 csum; int inodes_per_ob = ext4_inodes_per_orphan_block(sb); struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info; struct ext4_orphan_block_tail *ot; __le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr); csum = ext4_chksum(oi->of_csum_seed, (__u8 *)&dsk_block_nr, sizeof(dsk_block_nr)); csum = ext4_chksum(csum, (__u8 *)data, inodes_per_ob * sizeof(__u32)); ot = ext4_orphan_block_tail(sb, bh); ot->ob_checksum = cpu_to_le32(csum); } int ext4_init_orphan_info(struct super_block *sb) { struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info; struct inode *inode; int i, j; int ret; int free; __le32 *bdata; int inodes_per_ob = ext4_inodes_per_orphan_block(sb); struct ext4_orphan_block_tail *ot; ino_t orphan_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_orphan_file_inum); if (!ext4_has_feature_orphan_file(sb)) return 0; inode = ext4_iget(sb, orphan_ino, EXT4_IGET_SPECIAL); if (IS_ERR(inode)) { ext4_msg(sb, KERN_ERR, "get orphan inode failed"); return PTR_ERR(inode); } oi->of_blocks = inode->i_size >> sb->s_blocksize_bits; oi->of_csum_seed = EXT4_I(inode)->i_csum_seed; oi->of_binfo = kmalloc(oi->of_blocks*sizeof(struct ext4_orphan_block), GFP_KERNEL); if (!oi->of_binfo) { ret = -ENOMEM; goto out_put; } for (i = 0; i < oi->of_blocks; i++) { oi->of_binfo[i].ob_bh = ext4_bread(NULL, inode, i, 0); if (IS_ERR(oi->of_binfo[i].ob_bh)) { ret = PTR_ERR(oi->of_binfo[i].ob_bh); goto out_free; } if (!oi->of_binfo[i].ob_bh) { ret = -EIO; goto out_free; } ot = ext4_orphan_block_tail(sb, oi->of_binfo[i].ob_bh); if (le32_to_cpu(ot->ob_magic) != EXT4_ORPHAN_BLOCK_MAGIC) { ext4_error(sb, "orphan file block %d: bad magic", i); ret = -EIO; goto out_free; } if (!ext4_orphan_file_block_csum_verify(sb, oi->of_binfo[i].ob_bh)) { ext4_error(sb, "orphan file block %d: bad checksum", i); ret = -EIO; goto out_free; } bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data); free = 0; for (j = 0; j < inodes_per_ob; j++) if (bdata[j] == 0) free++; atomic_set(&oi->of_binfo[i].ob_free_entries, free); } iput(inode); return 0; out_free: for (i--; i >= 0; i--) brelse(oi->of_binfo[i].ob_bh); kfree(oi->of_binfo); out_put: iput(inode); return ret; } int ext4_orphan_file_empty(struct super_block *sb) { struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info; int i; int inodes_per_ob = ext4_inodes_per_orphan_block(sb); if (!ext4_has_feature_orphan_file(sb)) return 1; for (i = 0; i < oi->of_blocks; i++) if (atomic_read(&oi->of_binfo[i].ob_free_entries) != inodes_per_ob) return 0; return 1; }
3 2 1 1 1 2 1 1 1 3 1 2 1 2 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 2 2 4 2 1 1 37 1 2 2 1 3 1 2 2 3 4 3 2 2 4 2 4 16 162 162 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 // SPDX-License-Identifier: GPL-2.0 #ifndef NO_BCACHEFS_CHARDEV #include "bcachefs.h" #include "bcachefs_ioctl.h" #include "buckets.h" #include "chardev.h" #include "disk_accounting.h" #include "fsck.h" #include "journal.h" #include "move.h" #include "recovery_passes.h" #include "replicas.h" #include "sb-counters.h" #include "super-io.h" #include "thread_with_file.h" #include <linux/cdev.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/ioctl.h> #include <linux/major.h> #include <linux/sched/task.h> #include <linux/slab.h> #include <linux/uaccess.h> /* returns with ref on ca->ref */ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev, unsigned flags) { struct bch_dev *ca; if (flags & BCH_BY_INDEX) { if (dev >= c->sb.nr_devices) return ERR_PTR(-EINVAL); ca = bch2_dev_tryget_noerror(c, dev); if (!ca) return ERR_PTR(-EINVAL); } else { char *path; path = strndup_user((const char __user *) (unsigned long) dev, PATH_MAX); if (IS_ERR(path)) return ERR_CAST(path); ca = bch2_dev_lookup(c, path); kfree(path); } return ca; } #if 0 static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg) { struct bch_ioctl_assemble arg; struct bch_fs *c; u64 *user_devs = NULL; char **devs = NULL; unsigned i; int ret = -EFAULT; if (copy_from_user(&arg, user_arg, sizeof(arg))) return -EFAULT; if (arg.flags || arg.pad) return -EINVAL; user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL); if (!user_devs) return -ENOMEM; devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL); if (copy_from_user(user_devs, user_arg->devs, sizeof(u64) * arg.nr_devs)) goto err; for (i = 0; i < arg.nr_devs; i++) { devs[i] = strndup_user((const char __user *)(unsigned long) user_devs[i], PATH_MAX); ret= PTR_ERR_OR_ZERO(devs[i]); if (ret) goto err; } c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty()); ret = PTR_ERR_OR_ZERO(c); if (!ret) closure_put(&c->cl); err: if (devs) for (i = 0; i < arg.nr_devs; i++) kfree(devs[i]); kfree(devs); return ret; } static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg) { struct bch_ioctl_incremental arg; const char *err; char *path; if (copy_from_user(&arg, user_arg, sizeof(arg))) return -EFAULT; if (arg.flags || arg.pad) return -EINVAL; path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); ret = PTR_ERR_OR_ZERO(path); if (ret) return ret; err = bch2_fs_open_incremental(path); kfree(path); if (err) { pr_err("Could not register bcachefs devices: %s", err); return -EINVAL; } return 0; } #endif static long bch2_global_ioctl(unsigned cmd, void __user *arg) { long ret; switch (cmd) { #if 0 case BCH_IOCTL_ASSEMBLE: return bch2_ioctl_assemble(arg); case BCH_IOCTL_INCREMENTAL: return bch2_ioctl_incremental(arg); #endif case BCH_IOCTL_FSCK_OFFLINE: { ret = bch2_ioctl_fsck_offline(arg); break; } default: ret = -ENOTTY; break; } if (ret < 0) ret = bch2_err_class(ret); return ret; } static long bch2_ioctl_query_uuid(struct bch_fs *c, struct bch_ioctl_query_uuid __user *user_arg) { return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid, sizeof(c->sb.user_uuid)); } #if 0 static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (arg.flags || arg.pad) return -EINVAL; return bch2_fs_start(c); } static long bch2_ioctl_stop(struct bch_fs *c) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; bch2_fs_stop(c); return 0; } #endif static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) { char *path; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (arg.flags || arg.pad) return -EINVAL; path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); ret = PTR_ERR_OR_ZERO(path); if (ret) return ret; ret = bch2_dev_add(c, path); if (!IS_ERR(path)) kfree(path); return ret; } static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg) { struct bch_dev *ca; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| BCH_FORCE_IF_METADATA_LOST| BCH_FORCE_IF_DEGRADED| BCH_BY_INDEX)) || arg.pad) return -EINVAL; ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); return bch2_dev_remove(c, ca, arg.flags); } static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg) { char *path; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (arg.flags || arg.pad) return -EINVAL; path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX); ret = PTR_ERR_OR_ZERO(path); if (ret) return ret; ret = bch2_dev_online(c, path); kfree(path); return ret; } static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg) { struct bch_dev *ca; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| BCH_FORCE_IF_METADATA_LOST| BCH_FORCE_IF_DEGRADED| BCH_BY_INDEX)) || arg.pad) return -EINVAL; ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); ret = bch2_dev_offline(c, ca, arg.flags); bch2_dev_put(ca); return ret; } static long bch2_ioctl_disk_set_state(struct bch_fs *c, struct bch_ioctl_disk_set_state arg) { struct bch_dev *ca; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| BCH_FORCE_IF_METADATA_LOST| BCH_FORCE_IF_DEGRADED| BCH_BY_INDEX)) || arg.pad[0] || arg.pad[1] || arg.pad[2] || arg.new_state >= BCH_MEMBER_STATE_NR) return -EINVAL; ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags); if (ret) bch_err(c, "Error setting device state: %s", bch2_err_str(ret)); bch2_dev_put(ca); return ret; } struct bch_data_ctx { struct thread_with_file thr; struct bch_fs *c; struct bch_ioctl_data arg; struct bch_move_stats stats; }; static int bch2_data_thread(void *arg) { struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr); ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg); if (ctx->thr.ret == -BCH_ERR_device_offline) ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline; else { ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done; ctx->stats.data_type = (int) DATA_PROGRESS_DATA_TYPE_done; } enumerated_ref_put(&ctx->c->writes, BCH_WRITE_REF_ioctl_data); return 0; } static int bch2_data_job_release(struct inode *inode, struct file *file) { struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr); bch2_thread_with_file_exit(&ctx->thr); kfree(ctx); return 0; } static ssize_t bch2_data_job_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr); struct bch_fs *c = ctx->c; struct bch_ioctl_data_event e = { .type = BCH_DATA_EVENT_PROGRESS, .ret = ctx->stats.ret, .p.data_type = ctx->stats.data_type, .p.btree_id = ctx->stats.pos.btree, .p.pos = ctx->stats.pos.pos, .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), .p.sectors_error_corrected = atomic64_read(&ctx->stats.sectors_error_corrected), .p.sectors_error_uncorrected = atomic64_read(&ctx->stats.sectors_error_uncorrected), }; if (ctx->arg.op == BCH_DATA_OP_scrub) { struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev); if (ca) { struct bch_dev_usage_full u; bch2_dev_usage_full_read_fast(ca, &u); for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++) if (ctx->arg.scrub.data_types & BIT(i)) e.p.sectors_total += u.d[i].sectors; bch2_dev_put(ca); } } else { e.p.sectors_total = bch2_fs_usage_read_short(c).used; } if (len < sizeof(e)) return -EINVAL; return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e); } static const struct file_operations bcachefs_data_ops = { .release = bch2_data_job_release, .read = bch2_data_job_read, }; static long bch2_ioctl_data(struct bch_fs *c, struct bch_ioctl_data arg) { struct bch_data_ctx *ctx; int ret; if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_ioctl_data)) return -EROFS; if (!capable(CAP_SYS_ADMIN)) { ret = -EPERM; goto put_ref; } if (arg.op >= BCH_DATA_OP_NR || arg.flags) { ret = -EINVAL; goto put_ref; } ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) { ret = -ENOMEM; goto put_ref; } ctx->c = c; ctx->arg = arg; ret = bch2_run_thread_with_file(&ctx->thr, &bcachefs_data_ops, bch2_data_thread); if (ret < 0) goto cleanup; return ret; cleanup: kfree(ctx); put_ref: enumerated_ref_put(&c->writes, BCH_WRITE_REF_ioctl_data); return ret; } static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c, struct bch_ioctl_fs_usage __user *user_arg) { struct bch_ioctl_fs_usage arg = {}; darray_char replicas = {}; u32 replica_entries_bytes; int ret = 0; if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes)) return -EFAULT; ret = bch2_fs_replicas_usage_read(c, &replicas) ?: (replica_entries_bytes < replicas.nr ? -ERANGE : 0) ?: copy_to_user_errcode(&user_arg->replicas, replicas.data, replicas.nr); if (ret) goto err; struct bch_fs_usage_short u = bch2_fs_usage_read_short(c); arg.capacity = c->capacity; arg.used = u.used; arg.online_reserved = percpu_u64_get(c->online_reserved); arg.replica_entries_bytes = replicas.nr; for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) { struct disk_accounting_pos k; disk_accounting_key_init(k, persistent_reserved, .nr_replicas = i); bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&k), &arg.persistent_reserved[i], 1); } ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); err: darray_exit(&replicas); return ret; } static long bch2_ioctl_query_accounting(struct bch_fs *c, struct bch_ioctl_query_accounting __user *user_arg) { struct bch_ioctl_query_accounting arg; darray_char accounting = {}; int ret = 0; if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)) ?: bch2_fs_accounting_read(c, &accounting, arg.accounting_types_mask) ?: (arg.accounting_u64s * sizeof(u64) < accounting.nr ? -ERANGE : 0) ?: copy_to_user_errcode(&user_arg->accounting, accounting.data, accounting.nr); if (ret) goto err; arg.capacity = c->capacity; arg.used = bch2_fs_usage_read_short(c).used; arg.online_reserved = percpu_u64_get(c->online_reserved); arg.accounting_u64s = accounting.nr / sizeof(u64); ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); err: darray_exit(&accounting); return ret; } /* obsolete, didn't allow for new data types: */ static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_ioctl_dev_usage __user *user_arg) { struct bch_ioctl_dev_usage arg; struct bch_dev_usage_full src; struct bch_dev *ca; unsigned i; if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; if (copy_from_user(&arg, user_arg, sizeof(arg))) return -EFAULT; if ((arg.flags & ~BCH_BY_INDEX) || arg.pad[0] || arg.pad[1] || arg.pad[2]) return -EINVAL; ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); src = bch2_dev_usage_full_read(ca); arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; for (i = 0; i < ARRAY_SIZE(arg.d); i++) { arg.d[i].buckets = src.d[i].buckets; arg.d[i].sectors = src.d[i].sectors; arg.d[i].fragmented = src.d[i].fragmented; } bch2_dev_put(ca); return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); } static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, struct bch_ioctl_dev_usage_v2 __user *user_arg) { struct bch_ioctl_dev_usage_v2 arg; struct bch_dev_usage_full src; struct bch_dev *ca; int ret = 0; if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; if (copy_from_user(&arg, user_arg, sizeof(arg))) return -EFAULT; if ((arg.flags & ~BCH_BY_INDEX) || arg.pad[0] || arg.pad[1] || arg.pad[2]) return -EINVAL; ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); src = bch2_dev_usage_full_read(ca); arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR); arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); if (ret) goto err; for (unsigned i = 0; i < arg.nr_data_types; i++) { struct bch_ioctl_dev_usage_type t = { .buckets = src.d[i].buckets, .sectors = src.d[i].sectors, .fragmented = src.d[i].fragmented, }; ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t)); if (ret) goto err; } err: bch2_dev_put(ca); return ret; } static long bch2_ioctl_read_super(struct bch_fs *c, struct bch_ioctl_read_super arg) { struct bch_dev *ca = NULL; struct bch_sb *sb; int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) || arg.pad) return -EINVAL; mutex_lock(&c->sb_lock); if (arg.flags & BCH_READ_DEV) { ca = bch2_device_lookup(c, arg.dev, arg.flags); ret = PTR_ERR_OR_ZERO(ca); if (ret) goto err_unlock; sb = ca->disk_sb.sb; } else { sb = c->disk_sb.sb; } if (vstruct_bytes(sb) > arg.size) { ret = -ERANGE; goto err; } ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb, vstruct_bytes(sb)); err: bch2_dev_put(ca); err_unlock: mutex_unlock(&c->sb_lock); return ret; } static long bch2_ioctl_disk_get_idx(struct bch_fs *c, struct bch_ioctl_disk_get_idx arg) { dev_t dev = huge_decode_dev(arg.dev); if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!dev) return -EINVAL; guard(rcu)(); for_each_online_member_rcu(c, ca) if (ca->dev == dev) return ca->dev_idx; return bch_err_throw(c, ENOENT_dev_idx_not_found); } static long bch2_ioctl_disk_resize(struct bch_fs *c, struct bch_ioctl_disk_resize arg) { struct bch_dev *ca; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if ((arg.flags & ~BCH_BY_INDEX) || arg.pad) return -EINVAL; ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); ret = bch2_dev_resize(c, ca, arg.nbuckets); bch2_dev_put(ca); return ret; } static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, struct bch_ioctl_disk_resize_journal arg) { struct bch_dev *ca; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if ((arg.flags & ~BCH_BY_INDEX) || arg.pad) return -EINVAL; if (arg.nbuckets > U32_MAX) return -EINVAL; ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets); bch2_dev_put(ca); return ret; } #define BCH_IOCTL(_name, _argtype) \ do { \ _argtype i; \ \ if (copy_from_user(&i, arg, sizeof(i))) \ return -EFAULT; \ ret = bch2_ioctl_##_name(c, i); \ goto out; \ } while (0) long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) { long ret; switch (cmd) { case BCH_IOCTL_QUERY_UUID: return bch2_ioctl_query_uuid(c, arg); case BCH_IOCTL_FS_USAGE: return bch2_ioctl_fs_usage(c, arg); case BCH_IOCTL_DEV_USAGE: return bch2_ioctl_dev_usage(c, arg); case BCH_IOCTL_DEV_USAGE_V2: return bch2_ioctl_dev_usage_v2(c, arg); #if 0 case BCH_IOCTL_START: BCH_IOCTL(start, struct bch_ioctl_start); case BCH_IOCTL_STOP: return bch2_ioctl_stop(c); #endif case BCH_IOCTL_READ_SUPER: BCH_IOCTL(read_super, struct bch_ioctl_read_super); case BCH_IOCTL_DISK_GET_IDX: BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx); } if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; switch (cmd) { case BCH_IOCTL_DISK_ADD: BCH_IOCTL(disk_add, struct bch_ioctl_disk); case BCH_IOCTL_DISK_REMOVE: BCH_IOCTL(disk_remove, struct bch_ioctl_disk); case BCH_IOCTL_DISK_ONLINE: BCH_IOCTL(disk_online, struct bch_ioctl_disk); case BCH_IOCTL_DISK_OFFLINE: BCH_IOCTL(disk_offline, struct bch_ioctl_disk); case BCH_IOCTL_DISK_SET_STATE: BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state); case BCH_IOCTL_DATA: BCH_IOCTL(data, struct bch_ioctl_data); case BCH_IOCTL_DISK_RESIZE: BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize); case BCH_IOCTL_DISK_RESIZE_JOURNAL: BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal); case BCH_IOCTL_FSCK_ONLINE: BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online); case BCH_IOCTL_QUERY_ACCOUNTING: return bch2_ioctl_query_accounting(c, arg); case BCH_IOCTL_QUERY_COUNTERS: return bch2_ioctl_query_counters(c, arg); default: return -ENOTTY; } out: if (ret < 0) ret = bch2_err_class(ret); return ret; } static DEFINE_IDR(bch_chardev_minor); static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v) { unsigned minor = iminor(file_inode(filp)); struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL; void __user *arg = (void __user *) v; return c ? bch2_fs_ioctl(c, cmd, arg) : bch2_global_ioctl(cmd, arg); } static const struct file_operations bch_chardev_fops = { .owner = THIS_MODULE, .unlocked_ioctl = bch2_chardev_ioctl, .open = nonseekable_open, }; static int bch_chardev_major; static const struct class bch_chardev_class = { .name = "bcachefs", }; static struct device *bch_chardev; void bch2_fs_chardev_exit(struct bch_fs *c) { if (!IS_ERR_OR_NULL(c->chardev)) device_unregister(c->chardev); if (c->minor >= 0) idr_remove(&bch_chardev_minor, c->minor); } int bch2_fs_chardev_init(struct bch_fs *c) { c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL); if (c->minor < 0) return c->minor; c->chardev = device_create(&bch_chardev_class, NULL, MKDEV(bch_chardev_major, c->minor), c, "bcachefs%u-ctl", c->minor); if (IS_ERR(c->chardev)) return PTR_ERR(c->chardev); return 0; } void bch2_chardev_exit(void) { device_destroy(&bch_chardev_class, MKDEV(bch_chardev_major, U8_MAX)); class_unregister(&bch_chardev_class); if (bch_chardev_major > 0) unregister_chrdev(bch_chardev_major, "bcachefs"); } int __init bch2_chardev_init(void) { int ret; bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops); if (bch_chardev_major < 0) return bch_chardev_major; ret = class_register(&bch_chardev_class); if (ret) goto major_out; bch_chardev = device_create(&bch_chardev_class, NULL, MKDEV(bch_chardev_major, U8_MAX), NULL, "bcachefs-ctl"); if (IS_ERR(bch_chardev)) { ret = PTR_ERR(bch_chardev); goto class_out; } return 0; class_out: class_unregister(&bch_chardev_class); major_out: unregister_chrdev(bch_chardev_major, "bcachefs-ctl"); return ret; } #endif /* NO_BCACHEFS_CHARDEV */
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 // SPDX-License-Identifier: GPL-2.0-or-later /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> */ #include <linux/ethtool.h> #include <net/netdev_lock.h> #include "ipvlan.h" static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval, struct netlink_ext_ack *extack) { struct ipvl_dev *ipvlan; unsigned int flags; int err; ASSERT_RTNL(); if (port->mode != nval) { list_for_each_entry(ipvlan, &port->ipvlans, pnode) { flags = ipvlan->dev->flags; if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) { err = dev_change_flags(ipvlan->dev, flags | IFF_NOARP, extack); } else { err = dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP, extack); } if (unlikely(err)) goto fail; } if (nval == IPVLAN_MODE_L3S) { /* New mode is L3S */ err = ipvlan_l3s_register(port); if (err) goto fail; } else if (port->mode == IPVLAN_MODE_L3S) { /* Old mode was L3S */ ipvlan_l3s_unregister(port); } port->mode = nval; } return 0; fail: /* Undo the flags changes that have been done so far. */ list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) { flags = ipvlan->dev->flags; if (port->mode == IPVLAN_MODE_L3 || port->mode == IPVLAN_MODE_L3S) dev_change_flags(ipvlan->dev, flags | IFF_NOARP, NULL); else dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP, NULL); } return err; } static int ipvlan_port_create(struct net_device *dev) { struct ipvl_port *port; int err, idx; port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL); if (!port) return -ENOMEM; write_pnet(&port->pnet, dev_net(dev)); port->dev = dev; port->mode = IPVLAN_MODE_L3; INIT_LIST_HEAD(&port->ipvlans); for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) INIT_HLIST_HEAD(&port->hlhead[idx]); skb_queue_head_init(&port->backlog); INIT_WORK(&port->wq, ipvlan_process_multicast); ida_init(&port->ida); port->dev_id_start = 1; err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port); if (err) goto err; netdev_hold(dev, &port->dev_tracker, GFP_KERNEL); return 0; err: kfree(port); return err; } static void ipvlan_port_destroy(struct net_device *dev) { struct ipvl_port *port = ipvlan_port_get_rtnl(dev); struct sk_buff *skb; netdev_put(dev, &port->dev_tracker); if (port->mode == IPVLAN_MODE_L3S) ipvlan_l3s_unregister(port); netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); while ((skb = __skb_dequeue(&port->backlog)) != NULL) { dev_put(skb->dev); kfree_skb(skb); } ida_destroy(&port->ida); kfree(port); } #define IPVLAN_ALWAYS_ON_OFLOADS \ (NETIF_F_SG | NETIF_F_HW_CSUM | \ NETIF_F_GSO_ROBUST | NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL) #define IPVLAN_ALWAYS_ON \ (IPVLAN_ALWAYS_ON_OFLOADS | NETIF_F_VLAN_CHALLENGED) #define IPVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_ALL_TSO | NETIF_F_GSO_ROBUST | \ NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) /* NETIF_F_GSO_ENCAP_ALL NETIF_F_GSO_SOFTWARE Newly added */ #define IPVLAN_STATE_MASK \ ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) static int ipvlan_init(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_port *port; int err; dev->state = (dev->state & ~IPVLAN_STATE_MASK) | (phy_dev->state & IPVLAN_STATE_MASK); dev->features = phy_dev->features & IPVLAN_FEATURES; dev->features |= IPVLAN_ALWAYS_ON; dev->vlan_features = phy_dev->vlan_features & IPVLAN_FEATURES; dev->vlan_features |= IPVLAN_ALWAYS_ON_OFLOADS; dev->hw_enc_features |= dev->features; dev->lltx = true; netif_inherit_tso_max(dev, phy_dev); dev->hard_header_len = phy_dev->hard_header_len; netdev_lockdep_set_classes(dev); ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats); if (!ipvlan->pcpu_stats) return -ENOMEM; if (!netif_is_ipvlan_port(phy_dev)) { err = ipvlan_port_create(phy_dev); if (err < 0) { free_percpu(ipvlan->pcpu_stats); return err; } } port = ipvlan_port_get_rtnl(phy_dev); port->count += 1; return 0; } static void ipvlan_uninit(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_port *port; free_percpu(ipvlan->pcpu_stats); port = ipvlan_port_get_rtnl(phy_dev); port->count -= 1; if (!port->count) ipvlan_port_destroy(port->dev); } static int ipvlan_open(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_addr *addr; if (ipvlan->port->mode == IPVLAN_MODE_L3 || ipvlan->port->mode == IPVLAN_MODE_L3S) dev->flags |= IFF_NOARP; else dev->flags &= ~IFF_NOARP; rcu_read_lock(); list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_add(ipvlan, addr); rcu_read_unlock(); return 0; } static int ipvlan_stop(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; dev_uc_unsync(phy_dev, dev); dev_mc_unsync(phy_dev, dev); rcu_read_lock(); list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_del(addr); rcu_read_unlock(); return 0; } static netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) { const struct ipvl_dev *ipvlan = netdev_priv(dev); int skblen = skb->len; int ret; ret = ipvlan_queue_xmit(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct ipvl_pcpu_stats *pcptr; pcptr = this_cpu_ptr(ipvlan->pcpu_stats); u64_stats_update_begin(&pcptr->syncp); u64_stats_inc(&pcptr->tx_pkts); u64_stats_add(&pcptr->tx_bytes, skblen); u64_stats_update_end(&pcptr->syncp); } else { this_cpu_inc(ipvlan->pcpu_stats->tx_drps); } return ret; } static netdev_features_t ipvlan_fix_features(struct net_device *dev, netdev_features_t features) { struct ipvl_dev *ipvlan = netdev_priv(dev); features |= NETIF_F_ALL_FOR_ALL; features &= (ipvlan->sfeatures | ~IPVLAN_FEATURES); features = netdev_increment_features(ipvlan->phy_dev->features, features, features); features |= IPVLAN_ALWAYS_ON; features &= (IPVLAN_FEATURES | IPVLAN_ALWAYS_ON); return features; } static void ipvlan_change_rx_flags(struct net_device *dev, int change) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct net_device *phy_dev = ipvlan->phy_dev; if (change & IFF_ALLMULTI) dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1); } static void ipvlan_set_multicast_mac_filter(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE); } else { struct netdev_hw_addr *ha; DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE); bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE); netdev_for_each_mc_addr(ha, dev) __set_bit(ipvlan_mac_hash(ha->addr), mc_filters); /* Turn-on broadcast bit irrespective of address family, * since broadcast is deferred to a work-queue, hence no * impact on fast-path processing. */ __set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters); bitmap_copy(ipvlan->mac_filters, mc_filters, IPVLAN_MAC_FILTER_SIZE); } dev_uc_sync(ipvlan->phy_dev, dev); dev_mc_sync(ipvlan->phy_dev, dev); } static void ipvlan_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *s) { struct ipvl_dev *ipvlan = netdev_priv(dev); if (ipvlan->pcpu_stats) { struct ipvl_pcpu_stats *pcptr; u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes; u32 rx_errs = 0, tx_drps = 0; u32 strt; int idx; for_each_possible_cpu(idx) { pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx); do { strt = u64_stats_fetch_begin(&pcptr->syncp); rx_pkts = u64_stats_read(&pcptr->rx_pkts); rx_bytes = u64_stats_read(&pcptr->rx_bytes); rx_mcast = u64_stats_read(&pcptr->rx_mcast); tx_pkts = u64_stats_read(&pcptr->tx_pkts); tx_bytes = u64_stats_read(&pcptr->tx_bytes); } while (u64_stats_fetch_retry(&pcptr->syncp, strt)); s->rx_packets += rx_pkts; s->rx_bytes += rx_bytes; s->multicast += rx_mcast; s->tx_packets += tx_pkts; s->tx_bytes += tx_bytes; /* u32 values are updated without syncp protection. */ rx_errs += READ_ONCE(pcptr->rx_errs); tx_drps += READ_ONCE(pcptr->tx_drps); } s->rx_errors = rx_errs; s->rx_dropped = rx_errs; s->tx_dropped = tx_drps; } s->tx_errors = DEV_STATS_READ(dev, tx_errors); } static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct net_device *phy_dev = ipvlan->phy_dev; return vlan_vid_add(phy_dev, proto, vid); } static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct net_device *phy_dev = ipvlan->phy_dev; vlan_vid_del(phy_dev, proto, vid); return 0; } static int ipvlan_get_iflink(const struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); return READ_ONCE(ipvlan->phy_dev->ifindex); } static const struct net_device_ops ipvlan_netdev_ops = { .ndo_init = ipvlan_init, .ndo_uninit = ipvlan_uninit, .ndo_open = ipvlan_open, .ndo_stop = ipvlan_stop, .ndo_start_xmit = ipvlan_start_xmit, .ndo_fix_features = ipvlan_fix_features, .ndo_change_rx_flags = ipvlan_change_rx_flags, .ndo_set_rx_mode = ipvlan_set_multicast_mac_filter, .ndo_get_stats64 = ipvlan_get_stats64, .ndo_vlan_rx_add_vid = ipvlan_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ipvlan_vlan_rx_kill_vid, .ndo_get_iflink = ipvlan_get_iflink, }; static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len) { const struct ipvl_dev *ipvlan = netdev_priv(dev); struct net_device *phy_dev = ipvlan->phy_dev; /* TODO Probably use a different field than dev_addr so that the * mac-address on the virtual device is portable and can be carried * while the packets use the mac-addr on the physical device. */ return dev_hard_header(skb, phy_dev, type, daddr, saddr ? : phy_dev->dev_addr, len); } static const struct header_ops ipvlan_header_ops = { .create = ipvlan_hard_header, .parse = eth_header_parse, .cache = eth_header_cache, .cache_update = eth_header_cache_update, .parse_protocol = eth_header_parse_protocol, }; static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) { ipvlan->dev->mtu = dev->mtu; } static bool netif_is_ipvlan(const struct net_device *dev) { /* both ipvlan and ipvtap devices use the same netdev_ops */ return dev->netdev_ops == &ipvlan_netdev_ops; } static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { const struct ipvl_dev *ipvlan = netdev_priv(dev); return __ethtool_get_link_ksettings(ipvlan->phy_dev, cmd); } static void ipvlan_ethtool_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { strscpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver)); strscpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version)); } static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev) { const struct ipvl_dev *ipvlan = netdev_priv(dev); return ipvlan->msg_enable; } static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value) { struct ipvl_dev *ipvlan = netdev_priv(dev); ipvlan->msg_enable = value; } static const struct ethtool_ops ipvlan_ethtool_ops = { .get_link = ethtool_op_get_link, .get_link_ksettings = ipvlan_ethtool_get_link_ksettings, .get_drvinfo = ipvlan_ethtool_get_drvinfo, .get_msglevel = ipvlan_ethtool_get_msglevel, .set_msglevel = ipvlan_ethtool_set_msglevel, }; static int ipvlan_nl_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); int err = 0; if (!data) return 0; if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; if (data[IFLA_IPVLAN_MODE]) { u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); err = ipvlan_set_port_mode(port, nmode, extack); } if (!err && data[IFLA_IPVLAN_FLAGS]) { u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); if (flags & IPVLAN_F_PRIVATE) ipvlan_mark_private(port); else ipvlan_clear_private(port); if (flags & IPVLAN_F_VEPA) ipvlan_mark_vepa(port); else ipvlan_clear_vepa(port); } return err; } static size_t ipvlan_nl_getsize(const struct net_device *dev) { return (0 + nla_total_size(2) /* IFLA_IPVLAN_MODE */ + nla_total_size(2) /* IFLA_IPVLAN_FLAGS */ ); } static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (!data) return 0; if (data[IFLA_IPVLAN_MODE]) { u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); if (mode >= IPVLAN_MODE_MAX) return -EINVAL; } if (data[IFLA_IPVLAN_FLAGS]) { u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); /* Only two bits are used at this moment. */ if (flags & ~(IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) return -EINVAL; /* Also both flags can't be active at the same time. */ if ((flags & (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) == (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) return -EINVAL; } return 0; } static int ipvlan_nl_fillinfo(struct sk_buff *skb, const struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); int ret = -EINVAL; if (!port) goto err; ret = -EMSGSIZE; if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode)) goto err; if (nla_put_u16(skb, IFLA_IPVLAN_FLAGS, port->flags)) goto err; return 0; err: return ret; } int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct net *link_net = rtnl_newlink_link_net(params); struct ipvl_dev *ipvlan = netdev_priv(dev); struct nlattr **data = params->data; struct nlattr **tb = params->tb; struct ipvl_port *port; struct net_device *phy_dev; int err; u16 mode = IPVLAN_MODE_L3; if (!tb[IFLA_LINK]) return -EINVAL; phy_dev = __dev_get_by_index(link_net, nla_get_u32(tb[IFLA_LINK])); if (!phy_dev) return -ENODEV; if (netif_is_ipvlan(phy_dev)) { struct ipvl_dev *tmp = netdev_priv(phy_dev); phy_dev = tmp->phy_dev; if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; } else if (!netif_is_ipvlan_port(phy_dev)) { /* Exit early if the underlying link is invalid or busy */ if (phy_dev->type != ARPHRD_ETHER || phy_dev->flags & IFF_LOOPBACK) { netdev_err(phy_dev, "Master is either lo or non-ether device\n"); return -EINVAL; } if (netdev_is_rx_handler_busy(phy_dev)) { netdev_err(phy_dev, "Device is already in use.\n"); return -EBUSY; } } ipvlan->phy_dev = phy_dev; ipvlan->dev = dev; ipvlan->sfeatures = IPVLAN_FEATURES; if (!tb[IFLA_MTU]) ipvlan_adjust_mtu(ipvlan, phy_dev); INIT_LIST_HEAD(&ipvlan->addrs); spin_lock_init(&ipvlan->addrs_lock); /* TODO Probably put random address here to be presented to the * world but keep using the physical-dev address for the outgoing * packets. */ eth_hw_addr_set(dev, phy_dev->dev_addr); dev->priv_flags |= IFF_NO_RX_HANDLER; err = register_netdevice(dev); if (err < 0) return err; /* ipvlan_init() would have created the port, if required */ port = ipvlan_port_get_rtnl(phy_dev); ipvlan->port = port; /* If the port-id base is at the MAX value, then wrap it around and * begin from 0x1 again. This may be due to a busy system where lots * of slaves are getting created and deleted. */ if (port->dev_id_start == 0xFFFE) port->dev_id_start = 0x1; /* Since L2 address is shared among all IPvlan slaves including * master, use unique 16 bit dev-ids to differentiate among them. * Assign IDs between 0x1 and 0xFFFE (used by the master) to each * slave link [see addrconf_ifid_eui48()]. */ err = ida_alloc_range(&port->ida, port->dev_id_start, 0xFFFD, GFP_KERNEL); if (err < 0) err = ida_alloc_range(&port->ida, 0x1, port->dev_id_start - 1, GFP_KERNEL); if (err < 0) goto unregister_netdev; dev->dev_id = err; /* Increment id-base to the next slot for the future assignment */ port->dev_id_start = err + 1; err = netdev_upper_dev_link(phy_dev, dev, extack); if (err) goto remove_ida; /* Flags are per port and latest update overrides. User has * to be consistent in setting it just like the mode attribute. */ if (data && data[IFLA_IPVLAN_FLAGS]) port->flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); if (data && data[IFLA_IPVLAN_MODE]) mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); err = ipvlan_set_port_mode(port, mode, extack); if (err) goto unlink_netdev; list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); netif_stacked_transfer_operstate(phy_dev, dev); return 0; unlink_netdev: netdev_upper_dev_unlink(phy_dev, dev); remove_ida: ida_free(&port->ida, dev->dev_id); unregister_netdev: unregister_netdevice(dev); return err; } EXPORT_SYMBOL_GPL(ipvlan_link_new); void ipvlan_link_delete(struct net_device *dev, struct list_head *head) { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_addr *addr, *next; spin_lock_bh(&ipvlan->addrs_lock); list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { ipvlan_ht_addr_del(addr); list_del_rcu(&addr->anode); kfree_rcu(addr, rcu); } spin_unlock_bh(&ipvlan->addrs_lock); ida_free(&ipvlan->port->ida, dev->dev_id); list_del_rcu(&ipvlan->pnode); unregister_netdevice_queue(dev, head); netdev_upper_dev_unlink(ipvlan->phy_dev, dev); } EXPORT_SYMBOL_GPL(ipvlan_link_delete); void ipvlan_link_setup(struct net_device *dev) { ether_setup(dev); dev->max_mtu = ETH_MAX_MTU; dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; dev->netdev_ops = &ipvlan_netdev_ops; dev->needs_free_netdev = true; dev->header_ops = &ipvlan_header_ops; dev->ethtool_ops = &ipvlan_ethtool_ops; } EXPORT_SYMBOL_GPL(ipvlan_link_setup); static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] = { [IFLA_IPVLAN_MODE] = { .type = NLA_U16 }, [IFLA_IPVLAN_FLAGS] = { .type = NLA_U16 }, }; static struct net *ipvlan_get_link_net(const struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); return dev_net(ipvlan->phy_dev); } static struct rtnl_link_ops ipvlan_link_ops = { .kind = "ipvlan", .priv_size = sizeof(struct ipvl_dev), .setup = ipvlan_link_setup, .newlink = ipvlan_link_new, .dellink = ipvlan_link_delete, .get_link_net = ipvlan_get_link_net, }; int ipvlan_link_register(struct rtnl_link_ops *ops) { ops->get_size = ipvlan_nl_getsize; ops->policy = ipvlan_nl_policy; ops->validate = ipvlan_nl_validate; ops->fill_info = ipvlan_nl_fillinfo; ops->changelink = ipvlan_nl_changelink; ops->maxtype = IFLA_IPVLAN_MAX; return rtnl_link_register(ops); } EXPORT_SYMBOL_GPL(ipvlan_link_register); static int ipvlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct netdev_notifier_pre_changeaddr_info *prechaddr_info; struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ipvl_dev *ipvlan, *next; struct ipvl_port *port; LIST_HEAD(lst_kill); int err; if (!netif_is_ipvlan_port(dev)) return NOTIFY_DONE; port = ipvlan_port_get_rtnl(dev); switch (event) { case NETDEV_UP: case NETDEV_DOWN: case NETDEV_CHANGE: list_for_each_entry(ipvlan, &port->ipvlans, pnode) netif_stacked_transfer_operstate(ipvlan->phy_dev, ipvlan->dev); break; case NETDEV_REGISTER: { struct net *oldnet, *newnet = dev_net(dev); oldnet = read_pnet(&port->pnet); if (net_eq(newnet, oldnet)) break; write_pnet(&port->pnet, newnet); if (port->mode == IPVLAN_MODE_L3S) ipvlan_migrate_l3s_hook(oldnet, newnet); break; } case NETDEV_UNREGISTER: if (dev->reg_state != NETREG_UNREGISTERING) break; list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode) ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev, &lst_kill); unregister_netdevice_many(&lst_kill); break; case NETDEV_FEAT_CHANGE: list_for_each_entry(ipvlan, &port->ipvlans, pnode) { netif_inherit_tso_max(ipvlan->dev, dev); netdev_update_features(ipvlan->dev); } break; case NETDEV_CHANGEMTU: list_for_each_entry(ipvlan, &port->ipvlans, pnode) ipvlan_adjust_mtu(ipvlan, dev); break; case NETDEV_PRE_CHANGEADDR: prechaddr_info = ptr; list_for_each_entry(ipvlan, &port->ipvlans, pnode) { err = dev_pre_changeaddr_notify(ipvlan->dev, prechaddr_info->dev_addr, extack); if (err) return notifier_from_errno(err); } break; case NETDEV_CHANGEADDR: list_for_each_entry(ipvlan, &port->ipvlans, pnode) { eth_hw_addr_set(ipvlan->dev, dev->dev_addr); call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev); } break; case NETDEV_PRE_TYPE_CHANGE: /* Forbid underlying device to change its type. */ return NOTIFY_BAD; case NETDEV_NOTIFY_PEERS: case NETDEV_BONDING_FAILOVER: case NETDEV_RESEND_IGMP: list_for_each_entry(ipvlan, &port->ipvlans, pnode) call_netdevice_notifiers(event, ipvlan->dev); } return NOTIFY_DONE; } /* the caller must held the addrs lock */ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) { struct ipvl_addr *addr; addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC); if (!addr) return -ENOMEM; addr->master = ipvlan; if (!is_v6) { memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr)); addr->atype = IPVL_IPV4; #if IS_ENABLED(CONFIG_IPV6) } else { memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr)); addr->atype = IPVL_IPV6; #endif } list_add_tail_rcu(&addr->anode, &ipvlan->addrs); /* If the interface is not up, the address will be added to the hash * list by ipvlan_open. */ if (netif_running(ipvlan->dev)) ipvlan_ht_addr_add(ipvlan, addr); return 0; } static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) { struct ipvl_addr *addr; spin_lock_bh(&ipvlan->addrs_lock); addr = ipvlan_find_addr(ipvlan, iaddr, is_v6); if (!addr) { spin_unlock_bh(&ipvlan->addrs_lock); return; } ipvlan_ht_addr_del(addr); list_del_rcu(&addr->anode); spin_unlock_bh(&ipvlan->addrs_lock); kfree_rcu(addr, rcu); } static bool ipvlan_is_valid_dev(const struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); if (!netif_is_ipvlan(dev)) return false; if (!ipvlan || !ipvlan->port) return false; return true; } #if IS_ENABLED(CONFIG_IPV6) static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) { int ret = -EINVAL; spin_lock_bh(&ipvlan->addrs_lock); if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv6=%pI6c addr for %s intf\n", ip6_addr, ipvlan->dev->name); else ret = ipvlan_add_addr(ipvlan, ip6_addr, true); spin_unlock_bh(&ipvlan->addrs_lock); return ret; } static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) { return ipvlan_del_addr(ipvlan, ip6_addr, true); } static int ipvlan_addr6_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr; struct net_device *dev = (struct net_device *)if6->idev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UP: if (ipvlan_add_addr6(ipvlan, &if6->addr)) return NOTIFY_BAD; break; case NETDEV_DOWN: ipvlan_del_addr6(ipvlan, &if6->addr); break; } return NOTIFY_OK; } static int ipvlan_addr6_validator_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr; struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UP: if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) { NL_SET_ERR_MSG(i6vi->extack, "Address already assigned to an ipvlan device"); return notifier_from_errno(-EADDRINUSE); } break; } return NOTIFY_OK; } #endif static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) { int ret = -EINVAL; spin_lock_bh(&ipvlan->addrs_lock); if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv4=%pI4 on %s intf.\n", ip4_addr, ipvlan->dev->name); else ret = ipvlan_add_addr(ipvlan, ip4_addr, false); spin_unlock_bh(&ipvlan->addrs_lock); return ret; } static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) { return ipvlan_del_addr(ipvlan, ip4_addr, false); } static int ipvlan_addr4_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct in_ifaddr *if4 = (struct in_ifaddr *)ptr; struct net_device *dev = (struct net_device *)if4->ifa_dev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); struct in_addr ip4_addr; if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UP: ip4_addr.s_addr = if4->ifa_address; if (ipvlan_add_addr4(ipvlan, &ip4_addr)) return NOTIFY_BAD; break; case NETDEV_DOWN: ip4_addr.s_addr = if4->ifa_address; ipvlan_del_addr4(ipvlan, &ip4_addr); break; } return NOTIFY_OK; } static int ipvlan_addr4_validator_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct in_validator_info *ivi = (struct in_validator_info *)ptr; struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UP: if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) { NL_SET_ERR_MSG(ivi->extack, "Address already assigned to an ipvlan device"); return notifier_from_errno(-EADDRINUSE); } break; } return NOTIFY_OK; } static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = { .notifier_call = ipvlan_addr4_event, }; static struct notifier_block ipvlan_addr4_vtor_notifier_block __read_mostly = { .notifier_call = ipvlan_addr4_validator_event, }; static struct notifier_block ipvlan_notifier_block __read_mostly = { .notifier_call = ipvlan_device_event, }; #if IS_ENABLED(CONFIG_IPV6) static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = { .notifier_call = ipvlan_addr6_event, }; static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = { .notifier_call = ipvlan_addr6_validator_event, }; #endif static int __init ipvlan_init_module(void) { int err; ipvlan_init_secret(); register_netdevice_notifier(&ipvlan_notifier_block); #if IS_ENABLED(CONFIG_IPV6) register_inet6addr_notifier(&ipvlan_addr6_notifier_block); register_inet6addr_validator_notifier( &ipvlan_addr6_vtor_notifier_block); #endif register_inetaddr_notifier(&ipvlan_addr4_notifier_block); register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block); err = ipvlan_l3s_init(); if (err < 0) goto error; err = ipvlan_link_register(&ipvlan_link_ops); if (err < 0) { ipvlan_l3s_cleanup(); goto error; } return 0; error: unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); unregister_inetaddr_validator_notifier( &ipvlan_addr4_vtor_notifier_block); #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); unregister_inet6addr_validator_notifier( &ipvlan_addr6_vtor_notifier_block); #endif unregister_netdevice_notifier(&ipvlan_notifier_block); return err; } static void __exit ipvlan_cleanup_module(void) { rtnl_link_unregister(&ipvlan_link_ops); ipvlan_l3s_cleanup(); unregister_netdevice_notifier(&ipvlan_notifier_block); unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); unregister_inetaddr_validator_notifier( &ipvlan_addr4_vtor_notifier_block); #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); unregister_inet6addr_validator_notifier( &ipvlan_addr6_vtor_notifier_block); #endif } module_init(ipvlan_init_module); module_exit(ipvlan_cleanup_module); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>"); MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs"); MODULE_ALIAS_RTNL_LINK("ipvlan");
3 3 4 4 4 1 5 1 5 4 15 2 24 19 23 1 5 15 3 11 7 7 29 1 1 5 1 1 3 2 17 11 7 8 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 /* * linux/fs/nls/nls_euc-jp.c * * Added `OSF/JVC Recommended Code Set Conversion Specification * between Japanese EUC and Shift-JIS' support: <hirofumi@mail.parknet.co.jp> * (http://www.opengroup.or.jp/jvc/cde/sjis-euc-e.html) */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static struct nls_table *p_nls; #define IS_SJIS_LOW_BYTE(l) ((0x40 <= (l)) && ((l) <= 0xFC) && ((l) != 0x7F)) /* JIS X 0208 (include NEC spesial characters) */ #define IS_SJIS_JISX0208(h, l) ((((0x81 <= (h)) && ((h) <= 0x9F)) \ || ((0xE0 <= (h)) && ((h) <= 0xEA))) \ && IS_SJIS_LOW_BYTE(l)) #define IS_SJIS_JISX0201KANA(c) ((0xA1 <= (c)) && ((c) <= 0xDF)) #define IS_SJIS_UDC_LOW(h, l) (((0xF0 <= (h)) && ((h) <= 0xF4)) \ && IS_SJIS_LOW_BYTE(l)) #define IS_SJIS_UDC_HI(h, l) (((0xF5 <= (h)) && ((h) <= 0xF9)) \ && IS_SJIS_LOW_BYTE(l)) #define IS_SJIS_IBM(h, l) (((0xFA <= (h)) && ((h) <= 0xFC)) \ && IS_SJIS_LOW_BYTE(l)) #define IS_SJIS_NECIBM(h, l) (((0xED <= (h)) && ((h) <= 0xEE)) \ && IS_SJIS_LOW_BYTE(l)) #define MAP_SJIS2EUC(sjis_hi, sjis_lo, sjis_p, euc_hi, euc_lo, euc_p) { \ if ((sjis_lo) >= 0x9F) { \ (euc_hi) = (sjis_hi) * 2 - (((sjis_p) * 2 - (euc_p)) - 1); \ (euc_lo) = (sjis_lo) + 2; \ } else { \ (euc_hi) = (sjis_hi) * 2 - ((sjis_p) * 2 - (euc_p)); \ (euc_lo) = (sjis_lo) + ((sjis_lo) >= 0x7F ? 0x60 : 0x61); \ } \ } while(0) #define SS2 (0x8E) /* Single Shift 2 */ #define SS3 (0x8F) /* Single Shift 3 */ #define IS_EUC_BYTE(c) ((0xA1 <= (c)) && ((c) <= 0xFE)) #define IS_EUC_JISX0208(h, l) (IS_EUC_BYTE(h) && IS_EUC_BYTE(l)) #define IS_EUC_JISX0201KANA(h, l) (((h) == SS2) && (0xA1 <= (l) && (l) <= 0xDF)) #define IS_EUC_UDC_LOW(h, l) (((0xF5 <= (h)) && ((h) <= 0xFE)) \ && IS_EUC_BYTE(l)) #define IS_EUC_UDC_HI(h, l) IS_EUC_UDC_LOW(h, l) /* G3 block */ #define MAP_EUC2SJIS(euc_hi, euc_lo, euc_p, sjis_hi, sjis_lo, sjis_p) { \ if ((euc_hi) & 1) { \ (sjis_hi) = (euc_hi) / 2 + ((sjis_p) - (euc_p) / 2); \ (sjis_lo) = (euc_lo) - ((euc_lo) >= 0xE0 ? 0x60 : 0x61); \ } else { \ (sjis_hi) = (euc_hi) / 2 + (((sjis_p) - (euc_p) / 2) - 1); \ (sjis_lo) = (euc_lo) - 2; \ } \ } while(0) /* SJIS IBM extended characters to EUC map */ static const unsigned char sjisibm2euc_map[][2] = { {0xF3, 0xF3}, {0xF3, 0xF4}, {0xF3, 0xF5}, {0xF3, 0xF6}, {0xF3, 0xF7}, {0xF3, 0xF8}, {0xF3, 0xF9}, {0xF3, 0xFA}, {0xF3, 0xFB}, {0xF3, 0xFC}, {0xF3, 0xFD}, {0xF3, 0xFE}, {0xF4, 0xA1}, {0xF4, 0xA2}, {0xF4, 0xA3}, {0xF4, 0xA4}, {0xF4, 0xA5}, {0xF4, 0xA6}, {0xF4, 0xA7}, {0xF4, 0xA8}, {0xA2, 0xCC}, {0xA2, 0xC3}, {0xF4, 0xA9}, {0xF4, 0xAA}, {0xF4, 0xAB}, {0xF4, 0xAC}, {0xF4, 0xAD}, {0xA2, 0xE8}, {0xD4, 0xE3}, {0xDC, 0xDF}, {0xE4, 0xE9}, {0xE3, 0xF8}, {0xD9, 0xA1}, {0xB1, 0xBB}, {0xF4, 0xAE}, {0xC2, 0xAD}, {0xC3, 0xFC}, {0xE4, 0xD0}, {0xC2, 0xBF}, {0xBC, 0xF4}, {0xB0, 0xA9}, {0xB0, 0xC8}, {0xF4, 0xAF}, {0xB0, 0xD2}, {0xB0, 0xD4}, {0xB0, 0xE3}, {0xB0, 0xEE}, {0xB1, 0xA7}, {0xB1, 0xA3}, {0xB1, 0xAC}, {0xB1, 0xA9}, {0xB1, 0xBE}, {0xB1, 0xDF}, {0xB1, 0xD8}, {0xB1, 0xC8}, {0xB1, 0xD7}, {0xB1, 0xE3}, {0xB1, 0xF4}, {0xB1, 0xE1}, {0xB2, 0xA3}, {0xF4, 0xB0}, {0xB2, 0xBB}, {0xB2, 0xE6}, {0x00, 0x00}, {0xB2, 0xED}, {0xB2, 0xF5}, {0xB2, 0xFC}, {0xF4, 0xB1}, {0xB3, 0xB5}, {0xB3, 0xD8}, {0xB3, 0xDB}, {0xB3, 0xE5}, {0xB3, 0xEE}, {0xB3, 0xFB}, {0xF4, 0xB2}, {0xF4, 0xB3}, {0xB4, 0xC0}, {0xB4, 0xC7}, {0xB4, 0xD0}, {0xB4, 0xDE}, {0xF4, 0xB4}, {0xB5, 0xAA}, {0xF4, 0xB5}, {0xB5, 0xAF}, {0xB5, 0xC4}, {0xB5, 0xE8}, {0xF4, 0xB6}, {0xB7, 0xC2}, {0xB7, 0xE4}, {0xB7, 0xE8}, {0xB7, 0xE7}, {0xF4, 0xB7}, {0xF4, 0xB8}, {0xF4, 0xB9}, {0xB8, 0xCE}, {0xB8, 0xE1}, {0xB8, 0xF5}, {0xB8, 0xF7}, {0xB8, 0xF8}, {0xB8, 0xFC}, {0xB9, 0xAF}, {0xB9, 0xB7}, {0xBA, 0xBE}, {0xBA, 0xDB}, {0xCD, 0xAA}, {0xBA, 0xE1}, {0xF4, 0xBA}, {0xBA, 0xEB}, {0xBB, 0xB3}, {0xBB, 0xB8}, {0xF4, 0xBB}, {0xBB, 0xCA}, {0xF4, 0xBC}, {0xF4, 0xBD}, {0xBB, 0xD0}, {0xBB, 0xDE}, {0xBB, 0xF4}, {0xBB, 0xF5}, {0xBB, 0xF9}, {0xBC, 0xE4}, {0xBC, 0xED}, {0xBC, 0xFE}, {0xF4, 0xBE}, {0xBD, 0xC2}, {0xBD, 0xE7}, {0xF4, 0xBF}, {0xBD, 0xF0}, {0xBE, 0xB0}, {0xBE, 0xAC}, {0xF4, 0xC0}, {0xBE, 0xB3}, {0xBE, 0xBD}, {0xBE, 0xCD}, {0xBE, 0xC9}, {0xBE, 0xE4}, {0xBF, 0xA8}, {0xBF, 0xC9}, {0xC0, 0xC4}, {0xC0, 0xE4}, {0xC0, 0xF4}, {0xC1, 0xA6}, {0xF4, 0xC1}, {0xC1, 0xF5}, {0xC1, 0xFC}, {0xF4, 0xC2}, {0xC1, 0xF8}, {0xC2, 0xAB}, {0xC2, 0xA1}, {0xC2, 0xA5}, {0xF4, 0xC3}, {0xC2, 0xB8}, {0xC2, 0xBA}, {0xF4, 0xC4}, {0xC2, 0xC4}, {0xC2, 0xD2}, {0xC2, 0xD7}, {0xC2, 0xDB}, {0xC2, 0xDE}, {0xC2, 0xED}, {0xC2, 0xF0}, {0xF4, 0xC5}, {0xC3, 0xA1}, {0xC3, 0xB5}, {0xC3, 0xC9}, {0xC3, 0xB9}, {0xF4, 0xC6}, {0xC3, 0xD8}, {0xC3, 0xFE}, {0xF4, 0xC7}, {0xC4, 0xCC}, {0xF4, 0xC8}, {0xC4, 0xD9}, {0xC4, 0xEA}, {0xC4, 0xFD}, {0xF4, 0xC9}, {0xC5, 0xA7}, {0xC5, 0xB5}, {0xC5, 0xB6}, {0xF4, 0xCA}, {0xC5, 0xD5}, {0xC6, 0xB8}, {0xC6, 0xD7}, {0xC6, 0xE0}, {0xC6, 0xEA}, {0xC6, 0xE3}, {0xC7, 0xA1}, {0xC7, 0xAB}, {0xC7, 0xC7}, {0xC7, 0xC3}, {0xC7, 0xCB}, {0xC7, 0xCF}, {0xC7, 0xD9}, {0xF4, 0xCB}, {0xF4, 0xCC}, {0xC7, 0xE6}, {0xC7, 0xEE}, {0xC7, 0xFC}, {0xC7, 0xEB}, {0xC7, 0xF0}, {0xC8, 0xB1}, {0xC8, 0xE5}, {0xC8, 0xF8}, {0xC9, 0xA6}, {0xC9, 0xAB}, {0xC9, 0xAD}, {0xF4, 0xCD}, {0xC9, 0xCA}, {0xC9, 0xD3}, {0xC9, 0xE9}, {0xC9, 0xE3}, {0xC9, 0xFC}, {0xC9, 0xF4}, {0xC9, 0xF5}, {0xF4, 0xCE}, {0xCA, 0xB3}, {0xCA, 0xBD}, {0xCA, 0xEF}, {0xCA, 0xF1}, {0xCB, 0xAE}, {0xF4, 0xCF}, {0xCB, 0xCA}, {0xCB, 0xE6}, {0xCB, 0xEA}, {0xCB, 0xF0}, {0xCB, 0xF4}, {0xCB, 0xEE}, {0xCC, 0xA5}, {0xCB, 0xF9}, {0xCC, 0xAB}, {0xCC, 0xAE}, {0xCC, 0xAD}, {0xCC, 0xB2}, {0xCC, 0xC2}, {0xCC, 0xD0}, {0xCC, 0xD9}, {0xF4, 0xD0}, {0xCD, 0xBB}, {0xF4, 0xD1}, {0xCE, 0xBB}, {0xF4, 0xD2}, {0xCE, 0xBA}, {0xCE, 0xC3}, {0xF4, 0xD3}, {0xCE, 0xF2}, {0xB3, 0xDD}, {0xCF, 0xD5}, {0xCF, 0xE2}, {0xCF, 0xE9}, {0xCF, 0xED}, {0xF4, 0xD4}, {0xF4, 0xD5}, {0xF4, 0xD6}, {0x00, 0x00}, {0xF4, 0xD7}, {0xD0, 0xE5}, {0xF4, 0xD8}, {0xD0, 0xE9}, {0xD1, 0xE8}, {0xF4, 0xD9}, {0xF4, 0xDA}, {0xD1, 0xEC}, {0xD2, 0xBB}, {0xF4, 0xDB}, {0xD3, 0xE1}, {0xD3, 0xE8}, {0xD4, 0xA7}, {0xF4, 0xDC}, {0xF4, 0xDD}, {0xD4, 0xD4}, {0xD4, 0xF2}, {0xD5, 0xAE}, {0xF4, 0xDE}, {0xD7, 0xDE}, {0xF4, 0xDF}, {0xD8, 0xA2}, {0xD8, 0xB7}, {0xD8, 0xC1}, {0xD8, 0xD1}, {0xD8, 0xF4}, {0xD9, 0xC6}, {0xD9, 0xC8}, {0xD9, 0xD1}, {0xF4, 0xE0}, {0xF4, 0xE1}, {0xF4, 0xE2}, {0xF4, 0xE3}, {0xF4, 0xE4}, {0xDC, 0xD3}, {0xDD, 0xC8}, {0xDD, 0xD4}, {0xDD, 0xEA}, {0xDD, 0xFA}, {0xDE, 0xA4}, {0xDE, 0xB0}, {0xF4, 0xE5}, {0xDE, 0xB5}, {0xDE, 0xCB}, {0xF4, 0xE6}, {0xDF, 0xB9}, {0xF4, 0xE7}, {0xDF, 0xC3}, {0xF4, 0xE8}, {0xF4, 0xE9}, {0xE0, 0xD9}, {0xF4, 0xEA}, {0xF4, 0xEB}, {0xE1, 0xE2}, {0xF4, 0xEC}, {0xF4, 0xED}, {0xF4, 0xEE}, {0xE2, 0xC7}, {0xE3, 0xA8}, {0xE3, 0xA6}, {0xE3, 0xA9}, {0xE3, 0xAF}, {0xE3, 0xB0}, {0xE3, 0xAA}, {0xE3, 0xAB}, {0xE3, 0xBC}, {0xE3, 0xC1}, {0xE3, 0xBF}, {0xE3, 0xD5}, {0xE3, 0xD8}, {0xE3, 0xD6}, {0xE3, 0xDF}, {0xE3, 0xE3}, {0xE3, 0xE1}, {0xE3, 0xD4}, {0xE3, 0xE9}, {0xE4, 0xA6}, {0xE3, 0xF1}, {0xE3, 0xF2}, {0xE4, 0xCB}, {0xE4, 0xC1}, {0xE4, 0xC3}, {0xE4, 0xBE}, {0xF4, 0xEF}, {0xE4, 0xC0}, {0xE4, 0xC7}, {0xE4, 0xBF}, {0xE4, 0xE0}, {0xE4, 0xDE}, {0xE4, 0xD1}, {0xF4, 0xF0}, {0xE4, 0xDC}, {0xE4, 0xD2}, {0xE4, 0xDB}, {0xE4, 0xD4}, {0xE4, 0xFA}, {0xE4, 0xEF}, {0xE5, 0xB3}, {0xE5, 0xBF}, {0xE5, 0xC9}, {0xE5, 0xD0}, {0xE5, 0xE2}, {0xE5, 0xEA}, {0xE5, 0xEB}, {0xF4, 0xF1}, {0xF4, 0xF2}, {0xF4, 0xF3}, {0xE6, 0xE8}, {0xE6, 0xEF}, {0xE7, 0xAC}, {0xF4, 0xF4}, {0xE7, 0xAE}, {0xF4, 0xF5}, {0xE7, 0xB1}, {0xF4, 0xF6}, {0xE7, 0xB2}, {0xE8, 0xB1}, {0xE8, 0xB6}, {0xF4, 0xF7}, {0xF4, 0xF8}, {0xE8, 0xDD}, {0xF4, 0xF9}, {0xF4, 0xFA}, {0xE9, 0xD1}, {0xF4, 0xFB}, {0xE9, 0xED}, {0xEA, 0xCD}, {0xF4, 0xFC}, {0xEA, 0xDB}, {0xEA, 0xE6}, {0xEA, 0xEA}, {0xEB, 0xA5}, {0xEB, 0xFB}, {0xEB, 0xFA}, {0xF4, 0xFD}, {0xEC, 0xD6}, {0xF4, 0xFE}, }; #define IS_EUC_IBM2JISX0208(h, l) \ (((h) == 0xA2 && (l) == 0xCC) || ((h) == 0xA2 && (l) == 0xE8)) /* EUC to SJIS IBM extended characters map (G3 JIS X 0212 block) */ static struct { unsigned short euc; unsigned char sjis[2]; } euc2sjisibm_jisx0212_map[] = { {0xA2C3, {0xFA, 0x55}}, {0xB0A9, {0xFA, 0x68}}, {0xB0C8, {0xFA, 0x69}}, {0xB0D2, {0xFA, 0x6B}}, {0xB0D4, {0xFA, 0x6C}}, {0xB0E3, {0xFA, 0x6D}}, {0xB0EE, {0xFA, 0x6E}}, {0xB1A3, {0xFA, 0x70}}, {0xB1A7, {0xFA, 0x6F}}, {0xB1A9, {0xFA, 0x72}}, {0xB1AC, {0xFA, 0x71}}, {0xB1BB, {0xFA, 0x61}}, {0xB1BE, {0xFA, 0x73}}, {0xB1C8, {0xFA, 0x76}}, {0xB1D7, {0xFA, 0x77}}, {0xB1D8, {0xFA, 0x75}}, {0xB1DF, {0xFA, 0x74}}, {0xB1E1, {0xFA, 0x7A}}, {0xB1E3, {0xFA, 0x78}}, {0xB1F4, {0xFA, 0x79}}, {0xB2A3, {0xFA, 0x7B}}, {0xB2BB, {0xFA, 0x7D}}, {0xB2E6, {0xFA, 0x7E}}, {0xB2ED, {0xFA, 0x80}}, {0xB2F5, {0xFA, 0x81}}, {0xB2FC, {0xFA, 0x82}}, {0xB3B5, {0xFA, 0x84}}, {0xB3D8, {0xFA, 0x85}}, {0xB3DB, {0xFA, 0x86}}, {0xB3DD, {0xFB, 0x77}}, {0xB3E5, {0xFA, 0x87}}, {0xB3EE, {0xFA, 0x88}}, {0xB3FB, {0xFA, 0x89}}, {0xB4C0, {0xFA, 0x8C}}, {0xB4C7, {0xFA, 0x8D}}, {0xB4D0, {0xFA, 0x8E}}, {0xB4DE, {0xFA, 0x8F}}, {0xB5AA, {0xFA, 0x91}}, {0xB5AF, {0xFA, 0x93}}, {0xB5C4, {0xFA, 0x94}}, {0xB5E8, {0xFA, 0x95}}, {0xB7C2, {0xFA, 0x97}}, {0xB7E4, {0xFA, 0x98}}, {0xB7E7, {0xFA, 0x9A}}, {0xB7E8, {0xFA, 0x99}}, {0xB8CE, {0xFA, 0x9E}}, {0xB8E1, {0xFA, 0x9F}}, {0xB8F5, {0xFA, 0xA0}}, {0xB8F7, {0xFA, 0xA1}}, {0xB8F8, {0xFA, 0xA2}}, {0xB8FC, {0xFA, 0xA3}}, {0xB9AF, {0xFA, 0xA4}}, {0xB9B7, {0xFA, 0xA5}}, {0xBABE, {0xFA, 0xA6}}, {0xBADB, {0xFA, 0xA7}}, {0xBAE1, {0xFA, 0xA9}}, {0xBAEB, {0xFA, 0xAB}}, {0xBBB3, {0xFA, 0xAC}}, {0xBBB8, {0xFA, 0xAD}}, {0xBBCA, {0xFA, 0xAF}}, {0xBBD0, {0xFA, 0xB2}}, {0xBBDE, {0xFA, 0xB3}}, {0xBBF4, {0xFA, 0xB4}}, {0xBBF5, {0xFA, 0xB5}}, {0xBBF9, {0xFA, 0xB6}}, {0xBCE4, {0xFA, 0xB7}}, {0xBCED, {0xFA, 0xB8}}, {0xBCF4, {0xFA, 0x67}}, {0xBCFE, {0xFA, 0xB9}}, {0xBDC2, {0xFA, 0xBB}}, {0xBDE7, {0xFA, 0xBC}}, {0xBDF0, {0xFA, 0xBE}}, {0xBEAC, {0xFA, 0xC0}}, {0xBEB0, {0xFA, 0xBF}}, {0xBEB3, {0xFA, 0xC2}}, {0xBEBD, {0xFA, 0xC3}}, {0xBEC9, {0xFA, 0xC5}}, {0xBECD, {0xFA, 0xC4}}, {0xBEE4, {0xFA, 0xC6}}, {0xBFA8, {0xFA, 0xC7}}, {0xBFC9, {0xFA, 0xC8}}, {0xC0C4, {0xFA, 0xC9}}, {0xC0E4, {0xFA, 0xCA}}, {0xC0F4, {0xFA, 0xCB}}, {0xC1A6, {0xFA, 0xCC}}, {0xC1F5, {0xFA, 0xCE}}, {0xC1F8, {0xFA, 0xD1}}, {0xC1FC, {0xFA, 0xCF}}, {0xC2A1, {0xFA, 0xD3}}, {0xC2A5, {0xFA, 0xD4}}, {0xC2AB, {0xFA, 0xD2}}, {0xC2AD, {0xFA, 0x63}}, {0xC2B8, {0xFA, 0xD6}}, {0xC2BA, {0xFA, 0xD7}}, {0xC2BF, {0xFA, 0x66}}, {0xC2C4, {0xFA, 0xD9}}, {0xC2D2, {0xFA, 0xDA}}, {0xC2D7, {0xFA, 0xDB}}, {0xC2DB, {0xFA, 0xDC}}, {0xC2DE, {0xFA, 0xDD}}, {0xC2ED, {0xFA, 0xDE}}, {0xC2F0, {0xFA, 0xDF}}, {0xC3A1, {0xFA, 0xE1}}, {0xC3B5, {0xFA, 0xE2}}, {0xC3B9, {0xFA, 0xE4}}, {0xC3C9, {0xFA, 0xE3}}, {0xC3D8, {0xFA, 0xE6}}, {0xC3FC, {0xFA, 0x64}}, {0xC3FE, {0xFA, 0xE7}}, {0xC4CC, {0xFA, 0xE9}}, {0xC4D9, {0xFA, 0xEB}}, {0xC4EA, {0xFA, 0xEC}}, {0xC4FD, {0xFA, 0xED}}, {0xC5A7, {0xFA, 0xEF}}, {0xC5B5, {0xFA, 0xF0}}, {0xC5B6, {0xFA, 0xF1}}, {0xC5D5, {0xFA, 0xF3}}, {0xC6B8, {0xFA, 0xF4}}, {0xC6D7, {0xFA, 0xF5}}, {0xC6E0, {0xFA, 0xF6}}, {0xC6E3, {0xFA, 0xF8}}, {0xC6EA, {0xFA, 0xF7}}, {0xC7A1, {0xFA, 0xF9}}, {0xC7AB, {0xFA, 0xFA}}, {0xC7C3, {0xFA, 0xFC}}, {0xC7C7, {0xFA, 0xFB}}, {0xC7CB, {0xFB, 0x40}}, {0xC7CF, {0xFB, 0x41}}, {0xC7D9, {0xFB, 0x42}}, {0xC7E6, {0xFB, 0x45}}, {0xC7EB, {0xFB, 0x48}}, {0xC7EE, {0xFB, 0x46}}, {0xC7F0, {0xFB, 0x49}}, {0xC7FC, {0xFB, 0x47}}, {0xC8B1, {0xFB, 0x4A}}, {0xC8E5, {0xFB, 0x4B}}, {0xC8F8, {0xFB, 0x4C}}, {0xC9A6, {0xFB, 0x4D}}, {0xC9AB, {0xFB, 0x4E}}, {0xC9AD, {0xFB, 0x4F}}, {0xC9CA, {0xFB, 0x51}}, {0xC9D3, {0xFB, 0x52}}, {0xC9E3, {0xFB, 0x54}}, {0xC9E9, {0xFB, 0x53}}, {0xC9F4, {0xFB, 0x56}}, {0xC9F5, {0xFB, 0x57}}, {0xC9FC, {0xFB, 0x55}}, {0xCAB3, {0xFB, 0x59}}, {0xCABD, {0xFB, 0x5A}}, {0xCAEF, {0xFB, 0x5B}}, {0xCAF1, {0xFB, 0x5C}}, {0xCBAE, {0xFB, 0x5D}}, {0xCBCA, {0xFB, 0x5F}}, {0xCBE6, {0xFB, 0x60}}, {0xCBEA, {0xFB, 0x61}}, {0xCBEE, {0xFB, 0x64}}, {0xCBF0, {0xFB, 0x62}}, {0xCBF4, {0xFB, 0x63}}, {0xCBF9, {0xFB, 0x66}}, {0xCCA5, {0xFB, 0x65}}, {0xCCAB, {0xFB, 0x67}}, {0xCCAD, {0xFB, 0x69}}, {0xCCAE, {0xFB, 0x68}}, {0xCCB2, {0xFB, 0x6A}}, {0xCCC2, {0xFB, 0x6B}}, {0xCCD0, {0xFB, 0x6C}}, {0xCCD9, {0xFB, 0x6D}}, {0xCDAA, {0xFA, 0xA8}}, {0xCDBB, {0xFB, 0x6F}}, {0xCEBA, {0xFB, 0x73}}, {0xCEBB, {0xFB, 0x71}}, {0xCEC3, {0xFB, 0x74}}, {0xCEF2, {0xFB, 0x76}}, {0xCFD5, {0xFB, 0x78}}, {0xCFE2, {0xFB, 0x79}}, {0xCFE9, {0xFB, 0x7A}}, {0xCFED, {0xFB, 0x7B}}, {0xD0E5, {0xFB, 0x81}}, {0xD0E9, {0xFB, 0x83}}, {0xD1E8, {0xFB, 0x84}}, {0xD1EC, {0xFB, 0x87}}, {0xD2BB, {0xFB, 0x88}}, {0xD3E1, {0xFB, 0x8A}}, {0xD3E8, {0xFB, 0x8B}}, {0xD4A7, {0xFB, 0x8C}}, {0xD4D4, {0xFB, 0x8F}}, {0xD4E3, {0xFA, 0x5C}}, {0xD4F2, {0xFB, 0x90}}, {0xD5AE, {0xFB, 0x91}}, {0xD7DE, {0xFB, 0x93}}, {0xD8A2, {0xFB, 0x95}}, {0xD8B7, {0xFB, 0x96}}, {0xD8C1, {0xFB, 0x97}}, {0xD8D1, {0xFB, 0x98}}, {0xD8F4, {0xFB, 0x99}}, {0xD9A1, {0xFA, 0x60}}, {0xD9C6, {0xFB, 0x9A}}, {0xD9C8, {0xFB, 0x9B}}, {0xD9D1, {0xFB, 0x9C}}, {0xDCD3, {0xFB, 0xA2}}, {0xDCDF, {0xFA, 0x5D}}, {0xDDC8, {0xFB, 0xA3}}, {0xDDD4, {0xFB, 0xA4}}, {0xDDEA, {0xFB, 0xA5}}, {0xDDFA, {0xFB, 0xA6}}, {0xDEA4, {0xFB, 0xA7}}, {0xDEB0, {0xFB, 0xA8}}, {0xDEB5, {0xFB, 0xAA}}, {0xDECB, {0xFB, 0xAB}}, {0xDFB9, {0xFB, 0xAD}}, {0xDFC3, {0xFB, 0xAF}}, {0xE0D9, {0xFB, 0xB2}}, {0xE1E2, {0xFB, 0xB5}}, {0xE2C7, {0xFB, 0xB9}}, {0xE3A6, {0xFB, 0xBB}}, {0xE3A8, {0xFB, 0xBA}}, {0xE3A9, {0xFB, 0xBC}}, {0xE3AA, {0xFB, 0xBF}}, {0xE3AB, {0xFB, 0xC0}}, {0xE3AF, {0xFB, 0xBD}}, {0xE3B0, {0xFB, 0xBE}}, {0xE3BC, {0xFB, 0xC1}}, {0xE3BF, {0xFB, 0xC3}}, {0xE3C1, {0xFB, 0xC2}}, {0xE3D4, {0xFB, 0xCA}}, {0xE3D5, {0xFB, 0xC4}}, {0xE3D6, {0xFB, 0xC6}}, {0xE3D8, {0xFB, 0xC5}}, {0xE3DF, {0xFB, 0xC7}}, {0xE3E1, {0xFB, 0xC9}}, {0xE3E3, {0xFB, 0xC8}}, {0xE3E9, {0xFB, 0xCB}}, {0xE3F1, {0xFB, 0xCD}}, {0xE3F2, {0xFB, 0xCE}}, {0xE3F8, {0xFA, 0x5F}}, {0xE4A6, {0xFB, 0xCC}}, {0xE4BE, {0xFB, 0xD2}}, {0xE4BF, {0xFB, 0xD6}}, {0xE4C0, {0xFB, 0xD4}}, {0xE4C1, {0xFB, 0xD0}}, {0xE4C3, {0xFB, 0xD1}}, {0xE4C7, {0xFB, 0xD5}}, {0xE4CB, {0xFB, 0xCF}}, {0xE4D0, {0xFA, 0x65}}, {0xE4D1, {0xFB, 0xD9}}, {0xE4D2, {0xFB, 0xDC}}, {0xE4D4, {0xFB, 0xDE}}, {0xE4DB, {0xFB, 0xDD}}, {0xE4DC, {0xFB, 0xDB}}, {0xE4DE, {0xFB, 0xD8}}, {0xE4E0, {0xFB, 0xD7}}, {0xE4E9, {0xFA, 0x5E}}, {0xE4EF, {0xFB, 0xE0}}, {0xE4FA, {0xFB, 0xDF}}, {0xE5B3, {0xFB, 0xE1}}, {0xE5BF, {0xFB, 0xE2}}, {0xE5C9, {0xFB, 0xE3}}, {0xE5D0, {0xFB, 0xE4}}, {0xE5E2, {0xFB, 0xE5}}, {0xE5EA, {0xFB, 0xE6}}, {0xE5EB, {0xFB, 0xE7}}, {0xE6E8, {0xFB, 0xEB}}, {0xE6EF, {0xFB, 0xEC}}, {0xE7AC, {0xFB, 0xED}}, {0xE7AE, {0xFB, 0xEF}}, {0xE7B1, {0xFB, 0xF1}}, {0xE7B2, {0xFB, 0xF3}}, {0xE8B1, {0xFB, 0xF4}}, {0xE8B6, {0xFB, 0xF5}}, {0xE8DD, {0xFB, 0xF8}}, {0xE9D1, {0xFB, 0xFB}}, {0xE9ED, {0xFC, 0x40}}, {0xEACD, {0xFC, 0x41}}, {0xEADB, {0xFC, 0x43}}, {0xEAE6, {0xFC, 0x44}}, {0xEAEA, {0xFC, 0x45}}, {0xEBA5, {0xFC, 0x46}}, {0xEBFA, {0xFC, 0x48}}, {0xEBFB, {0xFC, 0x47}}, {0xECD6, {0xFC, 0x4A}}, }; /* EUC to SJIS IBM extended characters map (G3 Upper block) */ static const unsigned char euc2sjisibm_g3upper_map[][2] = { {0xFA, 0x40}, {0xFA, 0x41}, {0xFA, 0x42}, {0xFA, 0x43}, {0xFA, 0x44}, {0xFA, 0x45}, {0xFA, 0x46}, {0xFA, 0x47}, {0xFA, 0x48}, {0xFA, 0x49}, {0xFA, 0x4A}, {0xFA, 0x4B}, {0xFA, 0x4C}, {0xFA, 0x4D}, {0xFA, 0x4E}, {0xFA, 0x4F}, {0xFA, 0x50}, {0xFA, 0x51}, {0xFA, 0x52}, {0xFA, 0x53}, {0xFA, 0x56}, {0xFA, 0x57}, {0xFA, 0x58}, {0xFA, 0x59}, {0xFA, 0x5A}, {0xFA, 0x62}, {0xFA, 0x6A}, {0xFA, 0x7C}, {0xFA, 0x83}, {0xFA, 0x8A}, {0xFA, 0x8B}, {0xFA, 0x90}, {0xFA, 0x92}, {0xFA, 0x96}, {0xFA, 0x9B}, {0xFA, 0x9C}, {0xFA, 0x9D}, {0xFA, 0xAA}, {0xFA, 0xAE}, {0xFA, 0xB0}, {0xFA, 0xB1}, {0xFA, 0xBA}, {0xFA, 0xBD}, {0xFA, 0xC1}, {0xFA, 0xCD}, {0xFA, 0xD0}, {0xFA, 0xD5}, {0xFA, 0xD8}, {0xFA, 0xE0}, {0xFA, 0xE5}, {0xFA, 0xE8}, {0xFA, 0xEA}, {0xFA, 0xEE}, {0xFA, 0xF2}, {0xFB, 0x43}, {0xFB, 0x44}, {0xFB, 0x50}, {0xFB, 0x58}, {0xFB, 0x5E}, {0xFB, 0x6E}, {0xFB, 0x70}, {0xFB, 0x72}, {0xFB, 0x75}, {0xFB, 0x7C}, {0xFB, 0x7D}, {0xFB, 0x7E}, {0xFB, 0x80}, {0xFB, 0x82}, {0xFB, 0x85}, {0xFB, 0x86}, {0xFB, 0x89}, {0xFB, 0x8D}, {0xFB, 0x8E}, {0xFB, 0x92}, {0xFB, 0x94}, {0xFB, 0x9D}, {0xFB, 0x9E}, {0xFB, 0x9F}, {0xFB, 0xA0}, {0xFB, 0xA1}, {0xFB, 0xA9}, {0xFB, 0xAC}, {0xFB, 0xAE}, {0xFB, 0xB0}, {0xFB, 0xB1}, {0xFB, 0xB3}, {0xFB, 0xB4}, {0xFB, 0xB6}, {0xFB, 0xB7}, {0xFB, 0xB8}, {0xFB, 0xD3}, {0xFB, 0xDA}, {0xFB, 0xE8}, {0xFB, 0xE9}, {0xFB, 0xEA}, {0xFB, 0xEE}, {0xFB, 0xF0}, {0xFB, 0xF2}, {0xFB, 0xF6}, {0xFB, 0xF7}, {0xFB, 0xF9}, {0xFB, 0xFA}, {0xFB, 0xFC}, {0xFC, 0x42}, {0xFC, 0x49}, {0xFC, 0x4B}, }; static inline int sjisibm2euc(unsigned char *euc, const unsigned char sjis_hi, const unsigned char sjis_lo); static inline int euc2sjisibm_jisx0212(unsigned char *sjis, const unsigned char euc_hi, const unsigned char euc_lo); static inline int euc2sjisibm_g3upper(unsigned char *sjis, const unsigned char euc_hi, const unsigned char euc_lo); static inline int euc2sjisibm(unsigned char *sjis, const unsigned char euc_hi, const unsigned char euc_lo); static inline int sjisnec2sjisibm(unsigned char *sjisibm, const unsigned char sjisnec_hi, const unsigned char sjisnec_lo); /* SJIS IBM extended characters to EUC */ static inline int sjisibm2euc(unsigned char *euc, const unsigned char sjis_hi, const unsigned char sjis_lo) { int index; index = ((sjis_hi - 0xFA) * (0xFD - 0x40)) + (sjis_lo - 0x40); if (IS_EUC_IBM2JISX0208(sjisibm2euc_map[index][0], sjisibm2euc_map[index][1])) { euc[0] = sjisibm2euc_map[index][0]; euc[1] = sjisibm2euc_map[index][1]; return 2; } else { euc[0] = SS3; euc[1] = sjisibm2euc_map[index][0]; euc[2] = sjisibm2euc_map[index][1]; return 3; } } /* EUC to SJIS IBM extended characters (G3 JIS X 0212 block) */ static inline int euc2sjisibm_jisx0212(unsigned char *sjis, const unsigned char euc_hi, const unsigned char euc_lo) { int index, min_index, max_index; unsigned short euc; min_index = 0; max_index = ARRAY_SIZE(euc2sjisibm_jisx0212_map) - 1; euc = (euc_hi << 8) | euc_lo; while (min_index <= max_index) { index = (min_index + max_index) / 2; if (euc < euc2sjisibm_jisx0212_map[index].euc) max_index = index - 1; else min_index = index + 1; if (euc == euc2sjisibm_jisx0212_map[index].euc) { sjis[0] = euc2sjisibm_jisx0212_map[index].sjis[0]; sjis[1] = euc2sjisibm_jisx0212_map[index].sjis[1]; return 3; } } return 0; } /* EUC to SJIS IBM extended characters (G3 Upper block) */ static inline int euc2sjisibm_g3upper(unsigned char *sjis, const unsigned char euc_hi, const unsigned char euc_lo) { int index; if (euc_hi == 0xF3) index = ((euc_hi << 8) | euc_lo) - 0xF3F3; else index = ((euc_hi << 8) | euc_lo) - 0xF4A1 + 12; if ((index < 0) || (index >= ARRAY_SIZE(euc2sjisibm_g3upper_map))) return 0; sjis[0] = euc2sjisibm_g3upper_map[index][0]; sjis[1] = euc2sjisibm_g3upper_map[index][1]; return 3; } /* EUC to SJIS IBM extended characters (G3 block) */ static inline int euc2sjisibm(unsigned char *sjis, const unsigned char euc_hi, const unsigned char euc_lo) { int n; #if 0 if ((euc_hi == 0xA2) && (euc_lo == 0xCC)) { sjis[0] = 0xFA; sjis[1] = 0x54; return 2; } else if ((euc_hi == 0xA2) && (euc_lo == 0xE8)) { sjis[0] = 0xFA; sjis[1] = 0x5B; return 2; } #endif if ((n = euc2sjisibm_g3upper(sjis, euc_hi, euc_lo))) { return n; } else if ((n = euc2sjisibm_jisx0212(sjis, euc_hi, euc_lo))) { return n; } return 0; } /* NEC/IBM extended characters to IBM extended characters */ static inline int sjisnec2sjisibm(unsigned char *sjisibm, const unsigned char sjisnec_hi, const unsigned char sjisnec_lo) { int count; if (! IS_SJIS_NECIBM(sjisnec_hi, sjisnec_lo)) return 0; if ((sjisnec_hi == 0xEE) && (sjisnec_lo == 0xF9)) { sjisibm[0] = 0x81; sjisibm[1] = 0xCA; return 2; } if ((sjisnec_hi == 0xEE) && (sjisnec_lo >= 0xEF)) { count = (sjisnec_hi << 8 | sjisnec_lo) - (sjisnec_lo <= 0xF9 ? 0xEEEF : (0xEEEF - 10)); } else { count = (sjisnec_hi - 0xED) * (0xFC - 0x40) + (sjisnec_lo - 0x40) + (0x5C - 0x40); if (sjisnec_lo >= 0x7F) count--; } sjisibm[0] = 0xFA + (count / (0xFC - 0x40)); sjisibm[1] = 0x40 + (count % (0xFC - 0x40)); if (sjisibm[1] >= 0x7F) sjisibm[1]++; return 2; } static int uni2char(const wchar_t uni, unsigned char *out, int boundlen) { int n; if (!p_nls) return -EINVAL; if ((n = p_nls->uni2char(uni, out, boundlen)) < 0) return n; /* translate SJIS into EUC-JP */ if (n == 1) { if (IS_SJIS_JISX0201KANA(out[0])) { /* JIS X 0201 KANA */ if (boundlen < 2) return -ENAMETOOLONG; out[1] = out[0]; out[0] = SS2; return 2; } } else if (n == 2) { /* NEC/IBM extended characters to IBM extended characters */ sjisnec2sjisibm(out, out[0], out[1]); if (IS_SJIS_UDC_LOW(out[0], out[1])) { /* User defined characters half low */ MAP_SJIS2EUC(out[0], out[1], 0xF0, out[0], out[1], 0xF5); } else if (IS_SJIS_UDC_HI(out[0], out[1])) { /* User defined characters half high */ unsigned char ch, cl; if (boundlen < 3) return -ENAMETOOLONG; n = 3; ch = out[0]; cl = out[1]; out[0] = SS3; MAP_SJIS2EUC(ch, cl, 0xF5, out[1], out[2], 0xF5); } else if (IS_SJIS_IBM(out[0], out[1])) { /* IBM extended characters */ unsigned char euc[3], i; n = sjisibm2euc(euc, out[0], out[1]); if (boundlen < n) return -ENAMETOOLONG; for (i = 0; i < n; i++) out[i] = euc[i]; } else if (IS_SJIS_JISX0208(out[0], out[1])) { /* JIS X 0208 (include NEC special characters) */ out[0] = (out[0]^0xA0)*2 + 0x5F; if (out[1] > 0x9E) out[0]++; if (out[1] < 0x7F) out[1] = out[1] + 0x61; else if (out[1] < 0x9F) out[1] = out[1] + 0x60; else out[1] = out[1] + 0x02; } else { /* Invalid characters */ return -EINVAL; } } else return -EINVAL; return n; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { unsigned char sjis_temp[2]; int euc_offset, n; if ( !p_nls ) return -EINVAL; if (boundlen <= 0) return -ENAMETOOLONG; /* translate EUC-JP into SJIS */ if (rawstring[0] > 0x7F) { if (rawstring[0] == SS3) { if (boundlen < 3) return -EINVAL; euc_offset = 3; if (IS_EUC_UDC_HI(rawstring[1], rawstring[2])) { /* User defined characters half high */ MAP_EUC2SJIS(rawstring[1], rawstring[2], 0xF5, sjis_temp[0], sjis_temp[1], 0xF5); } else if (euc2sjisibm(sjis_temp,rawstring[1],rawstring[2])) { /* IBM extended characters */ } else { /* JIS X 0212 and Invalid characters*/ return -EINVAL; /* 'GETA' with SJIS coding */ /* sjis_temp[0] = 0x81; */ /* sjis_temp[1] = 0xAC; */ } } else { if (boundlen < 2) return -EINVAL; euc_offset = 2; if (IS_EUC_JISX0201KANA(rawstring[0], rawstring[1])) { /* JIS X 0201 KANA */ sjis_temp[0] = rawstring[1]; sjis_temp[1] = 0x00; } else if (IS_EUC_UDC_LOW(rawstring[0], rawstring[1])) { /* User defined characters half low */ MAP_EUC2SJIS(rawstring[0], rawstring[1], 0xF5, sjis_temp[0], sjis_temp[1], 0xF0); } else if (IS_EUC_JISX0208(rawstring[0], rawstring[1])) { /* JIS X 0208 (include NEC spesial characters) */ sjis_temp[0] = ((rawstring[0]-0x5f)/2) ^ 0xA0; if (!(rawstring[0] & 1)) sjis_temp[1] = rawstring[1] - 0x02; else if (rawstring[1] < 0xE0) sjis_temp[1] = rawstring[1] - 0x61; else sjis_temp[1] = rawstring[1] - 0x60; } else { /* Invalid characters */ return -EINVAL; } } } else { euc_offset = 1; /* JIS X 0201 ROMAJI */ sjis_temp[0] = rawstring[0]; sjis_temp[1] = 0x00; } if ( (n = p_nls->char2uni(sjis_temp, sizeof(sjis_temp), uni)) < 0) return n; return euc_offset; } static struct nls_table table = { .charset = "euc-jp", .uni2char = uni2char, .char2uni = char2uni, }; static int __init init_nls_euc_jp(void) { p_nls = load_nls("cp932"); if (p_nls) { table.charset2upper = p_nls->charset2upper; table.charset2lower = p_nls->charset2lower; return register_nls(&table); } return -EINVAL; } static void __exit exit_nls_euc_jp(void) { unregister_nls(&table); unload_nls(p_nls); } module_init(init_nls_euc_jp) module_exit(exit_nls_euc_jp) MODULE_DESCRIPTION("NLS Japanese charset (EUC-JP)"); MODULE_LICENSE("Dual BSD/GPL");
88 88 88 6 109 114 114 88 113 88 183 183 183 182 183 183 185 173 88 186 183 99 182 186 183 183 183 186 186 171 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 // SPDX-License-Identifier: GPL-2.0 #include "eytzinger.h" /** * is_aligned - is this pointer & size okay for word-wide copying? * @base: pointer to data * @size: size of each element * @align: required alignment (typically 4 or 8) * * Returns true if elements can be copied using word loads and stores. * The size must be a multiple of the alignment, and the base address must * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. * * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)" * to "if ((a | b) & mask)", so we do that by hand. */ __attribute_const__ __always_inline static bool is_aligned(const void *base, size_t size, unsigned char align) { unsigned char lsbits = (unsigned char)size; (void)base; #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS lsbits |= (unsigned char)(uintptr_t)base; #endif return (lsbits & (align - 1)) == 0; } /** * swap_words_32 - swap two elements in 32-bit chunks * @a: pointer to the first element to swap * @b: pointer to the second element to swap * @n: element size (must be a multiple of 4) * * Exchange the two objects in memory. This exploits base+index addressing, * which basically all CPUs have, to minimize loop overhead computations. * * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the * bottom of the loop, even though the zero flag is still valid from the * subtract (since the intervening mov instructions don't alter the flags). * Gcc 8.1.0 doesn't have that problem. */ static void swap_words_32(void *a, void *b, size_t n) { do { u32 t = *(u32 *)(a + (n -= 4)); *(u32 *)(a + n) = *(u32 *)(b + n); *(u32 *)(b + n) = t; } while (n); } /** * swap_words_64 - swap two elements in 64-bit chunks * @a: pointer to the first element to swap * @b: pointer to the second element to swap * @n: element size (must be a multiple of 8) * * Exchange the two objects in memory. This exploits base+index * addressing, which basically all CPUs have, to minimize loop overhead * computations. * * We'd like to use 64-bit loads if possible. If they're not, emulating * one requires base+index+4 addressing which x86 has but most other * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads, * but it's possible to have 64-bit loads without 64-bit pointers (e.g. * x32 ABI). Are there any cases the kernel needs to worry about? */ static void swap_words_64(void *a, void *b, size_t n) { do { #ifdef CONFIG_64BIT u64 t = *(u64 *)(a + (n -= 8)); *(u64 *)(a + n) = *(u64 *)(b + n); *(u64 *)(b + n) = t; #else /* Use two 32-bit transfers to avoid base+index+4 addressing */ u32 t = *(u32 *)(a + (n -= 4)); *(u32 *)(a + n) = *(u32 *)(b + n); *(u32 *)(b + n) = t; t = *(u32 *)(a + (n -= 4)); *(u32 *)(a + n) = *(u32 *)(b + n); *(u32 *)(b + n) = t; #endif } while (n); } /** * swap_bytes - swap two elements a byte at a time * @a: pointer to the first element to swap * @b: pointer to the second element to swap * @n: element size * * This is the fallback if alignment doesn't allow using larger chunks. */ static void swap_bytes(void *a, void *b, size_t n) { do { char t = ((char *)a)[--n]; ((char *)a)[n] = ((char *)b)[n]; ((char *)b)[n] = t; } while (n); } /* * The values are arbitrary as long as they can't be confused with * a pointer, but small integers make for the smallest compare * instructions. */ #define SWAP_WORDS_64 (swap_r_func_t)0 #define SWAP_WORDS_32 (swap_r_func_t)1 #define SWAP_BYTES (swap_r_func_t)2 #define SWAP_WRAPPER (swap_r_func_t)3 struct wrapper { cmp_func_t cmp; swap_func_t swap_func; }; /* * The function pointer is last to make tail calls most efficient if the * compiler decides not to inline this function. */ static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv) { if (swap_func == SWAP_WRAPPER) { ((const struct wrapper *)priv)->swap_func(a, b, (int)size); return; } if (swap_func == SWAP_WORDS_64) swap_words_64(a, b, size); else if (swap_func == SWAP_WORDS_32) swap_words_32(a, b, size); else if (swap_func == SWAP_BYTES) swap_bytes(a, b, size); else swap_func(a, b, (int)size, priv); } #define _CMP_WRAPPER ((cmp_r_func_t)0L) static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv) { if (cmp == _CMP_WRAPPER) return ((const struct wrapper *)priv)->cmp(a, b); return cmp(a, b, priv); } static inline int eytzinger1_do_cmp(void *base1, size_t n, size_t size, cmp_r_func_t cmp_func, const void *priv, size_t l, size_t r) { return do_cmp(base1 + inorder_to_eytzinger1(l, n) * size, base1 + inorder_to_eytzinger1(r, n) * size, cmp_func, priv); } static inline void eytzinger1_do_swap(void *base1, size_t n, size_t size, swap_r_func_t swap_func, const void *priv, size_t l, size_t r) { do_swap(base1 + inorder_to_eytzinger1(l, n) * size, base1 + inorder_to_eytzinger1(r, n) * size, size, swap_func, priv); } static void eytzinger1_sort_r(void *base1, size_t n, size_t size, cmp_r_func_t cmp_func, swap_r_func_t swap_func, const void *priv) { unsigned i, j, k; /* called from 'sort' without swap function, let's pick the default */ if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func) swap_func = NULL; if (!swap_func) { if (is_aligned(base1, size, 8)) swap_func = SWAP_WORDS_64; else if (is_aligned(base1, size, 4)) swap_func = SWAP_WORDS_32; else swap_func = SWAP_BYTES; } /* heapify */ for (i = n / 2; i >= 1; --i) { /* Find the sift-down path all the way to the leaves. */ for (j = i; k = j * 2, k < n;) j = eytzinger1_do_cmp(base1, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1; /* Special case for the last leaf with no sibling. */ if (j * 2 == n) j *= 2; /* Backtrack to the correct location. */ while (j != i && eytzinger1_do_cmp(base1, n, size, cmp_func, priv, i, j) >= 0) j /= 2; /* Shift the element into its correct place. */ for (k = j; j != i;) { j /= 2; eytzinger1_do_swap(base1, n, size, swap_func, priv, j, k); } } /* sort */ for (i = n; i > 1; --i) { eytzinger1_do_swap(base1, n, size, swap_func, priv, 1, i); /* Find the sift-down path all the way to the leaves. */ for (j = 1; k = j * 2, k + 1 < i;) j = eytzinger1_do_cmp(base1, n, size, cmp_func, priv, k, k + 1) > 0 ? k : k + 1; /* Special case for the last leaf with no sibling. */ if (j * 2 + 1 == i) j *= 2; /* Backtrack to the correct location. */ while (j >= 1 && eytzinger1_do_cmp(base1, n, size, cmp_func, priv, 1, j) >= 0) j /= 2; /* Shift the element into its correct place. */ for (k = j; j > 1;) { j /= 2; eytzinger1_do_swap(base1, n, size, swap_func, priv, j, k); } } } void eytzinger0_sort_r(void *base, size_t n, size_t size, cmp_r_func_t cmp_func, swap_r_func_t swap_func, const void *priv) { void *base1 = base - size; return eytzinger1_sort_r(base1, n, size, cmp_func, swap_func, priv); } void eytzinger0_sort(void *base, size_t n, size_t size, cmp_func_t cmp_func, swap_func_t swap_func) { struct wrapper w = { .cmp = cmp_func, .swap_func = swap_func, }; return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); } #if 0 #include <linux/slab.h> #include <linux/random.h> #include <linux/ktime.h> static u64 cmp_count; static int mycmp(const void *a, const void *b) { u32 _a = *(u32 *)a; u32 _b = *(u32 *)b; cmp_count++; if (_a < _b) return -1; else if (_a > _b) return 1; else return 0; } static int test(void) { size_t N, i; ktime_t start, end; s64 delta; u32 *arr; for (N = 10000; N <= 100000; N += 10000) { arr = kmalloc_array(N, sizeof(u32), GFP_KERNEL); cmp_count = 0; for (i = 0; i < N; i++) arr[i] = get_random_u32(); start = ktime_get(); eytzinger0_sort(arr, N, sizeof(u32), mycmp, NULL); end = ktime_get(); delta = ktime_us_delta(end, start); printk(KERN_INFO "time: %lld\n", delta); printk(KERN_INFO "comparisons: %lld\n", cmp_count); u32 prev = 0; eytzinger0_for_each(i, N) { if (prev > arr[i]) goto err; prev = arr[i]; } kfree(arr); } return 0; err: kfree(arr); return -1; } #endif
3651 5609 19 19 19 19 19 19 385 386 166 387 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Fast and scalable bitmaps. * * Copyright (C) 2016 Facebook * Copyright (C) 2013-2014 Jens Axboe */ #ifndef __LINUX_SCALE_BITMAP_H #define __LINUX_SCALE_BITMAP_H #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/cache.h> #include <linux/list.h> #include <linux/log2.h> #include <linux/minmax.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/smp.h> #include <linux/types.h> #include <linux/wait.h> struct seq_file; /** * struct sbitmap_word - Word in a &struct sbitmap. */ struct sbitmap_word { /** * @word: word holding free bits */ unsigned long word; /** * @cleared: word holding cleared bits */ unsigned long cleared ____cacheline_aligned_in_smp; /** * @swap_lock: serializes simultaneous updates of ->word and ->cleared */ raw_spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** * struct sbitmap - Scalable bitmap. * * A &struct sbitmap is spread over multiple cachelines to avoid ping-pong. This * trades off higher memory usage for better scalability. */ struct sbitmap { /** * @depth: Number of bits used in the whole bitmap. */ unsigned int depth; /** * @shift: log2(number of bits used per word) */ unsigned int shift; /** * @map_nr: Number of words (cachelines) being used for the bitmap. */ unsigned int map_nr; /** * @round_robin: Allocate bits in strict round-robin order. */ bool round_robin; /** * @map: Allocated bitmap. */ struct sbitmap_word *map; /* * @alloc_hint: Cache of last successfully allocated or freed bit. * * This is per-cpu, which allows multiple users to stick to different * cachelines until the map is exhausted. */ unsigned int __percpu *alloc_hint; }; #define SBQ_WAIT_QUEUES 8 #define SBQ_WAKE_BATCH 8 /** * struct sbq_wait_state - Wait queue in a &struct sbitmap_queue. */ struct sbq_wait_state { /** * @wait: Wait queue. */ wait_queue_head_t wait; } ____cacheline_aligned_in_smp; /** * struct sbitmap_queue - Scalable bitmap with the added ability to wait on free * bits. * * A &struct sbitmap_queue uses multiple wait queues and rolling wakeups to * avoid contention on the wait queue spinlock. This ensures that we don't hit a * scalability wall when we run out of free bits and have to start putting tasks * to sleep. */ struct sbitmap_queue { /** * @sb: Scalable bitmap. */ struct sbitmap sb; /** * @wake_batch: Number of bits which must be freed before we wake up any * waiters. */ unsigned int wake_batch; /** * @wake_index: Next wait queue in @ws to wake up. */ atomic_t wake_index; /** * @ws: Wait queues. */ struct sbq_wait_state *ws; /* * @ws_active: count of currently active ws waitqueues */ atomic_t ws_active; /** * @min_shallow_depth: The minimum shallow depth which may be passed to * sbitmap_queue_get_shallow() */ unsigned int min_shallow_depth; /** * @completion_cnt: Number of bits cleared passed to the * wakeup function. */ atomic_t completion_cnt; /** * @wakeup_cnt: Number of thread wake ups issued. */ atomic_t wakeup_cnt; }; /** * sbitmap_init_node() - Initialize a &struct sbitmap on a specific memory node. * @sb: Bitmap to initialize. * @depth: Number of bits to allocate. * @shift: Use 2^@shift bits per word in the bitmap; if a negative number if * given, a good default is chosen. * @flags: Allocation flags. * @node: Memory node to allocate on. * @round_robin: If true, be stricter about allocation order; always allocate * starting from the last allocated bit. This is less efficient * than the default behavior (false). * @alloc_hint: If true, apply percpu hint for where to start searching for * a free bit. * * Return: Zero on success or negative errno on failure. */ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node, bool round_robin, bool alloc_hint); /* sbitmap internal helper */ static inline unsigned int __map_depth(const struct sbitmap *sb, int index) { if (index == sb->map_nr - 1) return sb->depth - (index << sb->shift); return 1U << sb->shift; } /** * sbitmap_free() - Free memory used by a &struct sbitmap. * @sb: Bitmap to free. */ static inline void sbitmap_free(struct sbitmap *sb) { free_percpu(sb->alloc_hint); kvfree(sb->map); sb->map = NULL; } /** * sbitmap_resize() - Resize a &struct sbitmap. * @sb: Bitmap to resize. * @depth: New number of bits to resize to. * * Doesn't reallocate anything. It's up to the caller to ensure that the new * depth doesn't exceed the depth that the sb was initialized with. */ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); /** * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap. * @sb: Bitmap to allocate from. * * This operation provides acquire barrier semantics if it succeeds. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int sbitmap_get(struct sbitmap *sb); /** * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, * limiting the depth used from each word. * @sb: Bitmap to allocate from. * @shallow_depth: The maximum number of bits to allocate from a single word. * * This rather specific operation allows for having multiple users with * different allocation limits. E.g., there can be a high-priority class that * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority * class can only allocate half of the total bits in the bitmap, preventing it * from starving out the high-priority class. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth); /** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. * @sb: Bitmap to check. * * Return: true if any bit in the bitmap is set, false otherwise. */ bool sbitmap_any_bit_set(const struct sbitmap *sb); #define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift) #define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U)) typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *); /** * __sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. * @start: Where to start the iteration. * @sb: Bitmap to iterate over. * @fn: Callback. Should return true to continue or false to break early. * @data: Pointer to pass to callback. * * This is inline even though it's non-trivial so that the function calls to the * callback will hopefully get optimized away. */ static inline void __sbitmap_for_each_set(struct sbitmap *sb, unsigned int start, sb_for_each_fn fn, void *data) { unsigned int index; unsigned int nr; unsigned int scanned = 0; if (start >= sb->depth) start = 0; index = SB_NR_TO_INDEX(sb, start); nr = SB_NR_TO_BIT(sb, start); while (scanned < sb->depth) { unsigned long word; unsigned int depth = min_t(unsigned int, __map_depth(sb, index) - nr, sb->depth - scanned); scanned += depth; word = sb->map[index].word & ~sb->map[index].cleared; if (!word) goto next; /* * On the first iteration of the outer loop, we need to add the * bit offset back to the size of the word for find_next_bit(). * On all other iterations, nr is zero, so this is a noop. */ depth += nr; while (1) { nr = find_next_bit(&word, depth, nr); if (nr >= depth) break; if (!fn(sb, (index << sb->shift) + nr, data)) return; nr++; } next: nr = 0; if (++index >= sb->map_nr) index = 0; } } /** * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. * @sb: Bitmap to iterate over. * @fn: Callback. Should return true to continue or false to break early. * @data: Pointer to pass to callback. */ static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn, void *data) { __sbitmap_for_each_set(sb, 0, fn, data); } static inline unsigned long *__sbitmap_word(struct sbitmap *sb, unsigned int bitnr) { return &sb->map[SB_NR_TO_INDEX(sb, bitnr)].word; } /* Helpers equivalent to the operations in asm/bitops.h and linux/bitmap.h */ static inline void sbitmap_set_bit(struct sbitmap *sb, unsigned int bitnr) { set_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) { clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } /* * This one is special, since it doesn't actually clear the bit, rather it * sets the corresponding bit in the ->cleared mask instead. Paired with * the caller doing sbitmap_deferred_clear() if a given index is full, which * will clear the previously freed entries in the corresponding ->word. */ static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr) { unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared; set_bit(SB_NR_TO_BIT(sb, bitnr), addr); } /* * Pair of sbitmap_get, and this one applies both cleared bit and * allocation hint. */ static inline void sbitmap_put(struct sbitmap *sb, unsigned int bitnr) { sbitmap_deferred_clear_bit(sb, bitnr); if (likely(sb->alloc_hint && !sb->round_robin && bitnr < sb->depth)) *raw_cpu_ptr(sb->alloc_hint) = bitnr; } static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) { return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } static inline int sbitmap_calculate_shift(unsigned int depth) { int shift = ilog2(BITS_PER_LONG); /* * If the bitmap is small, shrink the number of bits per word so * we spread over a few cachelines, at least. If less than 4 * bits, just forget about it, it's not going to work optimally * anyway. */ if (depth >= 4) { while ((4U << shift) > depth) shift--; } return shift; } /** * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. * @sb: Bitmap to show. * @m: struct seq_file to write to. * * This is intended for debugging. The format may change at any time. */ void sbitmap_show(struct sbitmap *sb, struct seq_file *m); /** * sbitmap_weight() - Return how many set and not cleared bits in a &struct * sbitmap. * @sb: Bitmap to check. * * Return: How many set and not cleared bits set */ unsigned int sbitmap_weight(const struct sbitmap *sb); /** * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct * seq_file. * @sb: Bitmap to show. * @m: struct seq_file to write to. * * This is intended for debugging. The output isn't guaranteed to be internally * consistent. */ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m); /** * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific * memory node. * @sbq: Bitmap queue to initialize. * @depth: See sbitmap_init_node(). * @shift: See sbitmap_init_node(). * @round_robin: See sbitmap_get(). * @flags: Allocation flags. * @node: Memory node to allocate on. * * Return: Zero on success or negative errno on failure. */ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, int shift, bool round_robin, gfp_t flags, int node); /** * sbitmap_queue_free() - Free memory used by a &struct sbitmap_queue. * * @sbq: Bitmap queue to free. */ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) { kfree(sbq->ws); sbitmap_free(&sbq->sb); } /** * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch * @sbq: Bitmap queue to recalculate wake batch. * @users: Number of shares. * * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch * by depth. This interface is for HCTX shared tags or queue shared tags. */ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, unsigned int users); /** * sbitmap_queue_resize() - Resize a &struct sbitmap_queue. * @sbq: Bitmap queue to resize. * @depth: New number of bits to resize to. * * Like sbitmap_resize(), this doesn't reallocate anything. It has to do * some extra work on the &struct sbitmap_queue, so it's not safe to just * resize the underlying &struct sbitmap. */ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); /** * __sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue with preemption already disabled. * @sbq: Bitmap queue to allocate from. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int __sbitmap_queue_get(struct sbitmap_queue *sbq); /** * __sbitmap_queue_get_batch() - Try to allocate a batch of free bits * @sbq: Bitmap queue to allocate from. * @nr_tags: number of tags requested * @offset: offset to add to returned bits * * Return: Mask of allocated tags, 0 if none are found. Each tag allocated is * a bit in the mask returned, and the caller must add @offset to the value to * get the absolute tag value. */ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, unsigned int *offset); /** * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. * @shallow_depth: The maximum number of bits to allocate from a single word. * See sbitmap_get_shallow(). * * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after * initializing @sbq. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int shallow_depth); /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue. * @sbq: Bitmap queue to allocate from. * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to * sbitmap_queue_clear()). * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, unsigned int *cpu) { int nr; *cpu = get_cpu(); nr = __sbitmap_queue_get(sbq); put_cpu(); return nr; } /** * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the * minimum shallow depth that will be used. * @sbq: Bitmap queue in question. * @min_shallow_depth: The minimum shallow depth that will be passed to * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow(). * * sbitmap_queue_clear() batches wakeups as an optimization. The batch size * depends on the depth of the bitmap. Since the shallow allocation functions * effectively operate with a different depth, the shallow depth must be taken * into account when calculating the batch size. This function must be called * with the minimum shallow depth that will be used. Failure to do so can result * in missed wakeups. */ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, unsigned int min_shallow_depth); /** * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a * &struct sbitmap_queue. * @sbq: Bitmap to free from. * @nr: Bit number to free. * @cpu: CPU the bit was allocated on. */ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu); /** * sbitmap_queue_clear_batch() - Free a batch of allocated bits * &struct sbitmap_queue. * @sbq: Bitmap to free from. * @offset: offset for each tag in array * @tags: array of tags * @nr_tags: number of tags in array */ void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, int *tags, int nr_tags); static inline int sbq_index_inc(int index) { return (index + 1) & (SBQ_WAIT_QUEUES - 1); } static inline void sbq_index_atomic_inc(atomic_t *index) { int old = atomic_read(index); int new = sbq_index_inc(old); atomic_cmpxchg(index, old, new); } /** * sbq_wait_ptr() - Get the next wait queue to use for a &struct * sbitmap_queue. * @sbq: Bitmap queue to wait on. * @wait_index: A counter per "user" of @sbq. */ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq, atomic_t *wait_index) { struct sbq_wait_state *ws; ws = &sbq->ws[atomic_read(wait_index)]; sbq_index_atomic_inc(wait_index); return ws; } /** * sbitmap_queue_wake_all() - Wake up everything waiting on a &struct * sbitmap_queue. * @sbq: Bitmap queue to wake up. */ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); /** * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue * on a &struct sbitmap_queue. * @sbq: Bitmap queue to wake up. * @nr: Number of bits cleared. */ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr); /** * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct * seq_file. * @sbq: Bitmap queue to show. * @m: struct seq_file to write to. * * This is intended for debugging. The format may change at any time. */ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); struct sbq_wait { struct sbitmap_queue *sbq; /* if set, sbq_wait is accounted */ struct wait_queue_entry wait; }; #define DEFINE_SBQ_WAIT(name) \ struct sbq_wait name = { \ .sbq = NULL, \ .wait = { \ .private = current, \ .func = autoremove_wake_function, \ .entry = LIST_HEAD_INIT((name).wait.entry), \ } \ } /* * Wrapper around prepare_to_wait_exclusive(), which maintains some extra * internal state. */ void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait, int state); /* * Must be paired with sbitmap_prepare_to_wait(). */ void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait); /* * Wrapper around add_wait_queue(), which maintains some extra internal state */ void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait); /* * Must be paired with sbitmap_add_wait_queue() */ void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait); #endif /* __LINUX_SCALE_BITMAP_H */
6 6 6 6 3 6 6 6 3 3 6 6 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> */ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/pagemap.h> #include <linux/mutex.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" #include "page_actor.h" /* Read separately compressed datablock directly into page cache */ int squashfs_readpage_block(struct folio *folio, u64 block, int bsize, int expected) { struct page *target_page = &folio->page; struct inode *inode = folio->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; loff_t file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT; int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; loff_t start_index = folio->index & ~mask; loff_t end_index = start_index | mask; loff_t index; int i, pages, bytes, res = -ENOMEM; struct page **page, *last_page; struct squashfs_page_actor *actor; void *pageaddr; if (end_index > file_end) end_index = file_end; pages = end_index - start_index + 1; page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL); if (page == NULL) return res; /* Try to grab all the pages covered by the Squashfs block */ for (i = 0, index = start_index; index <= end_index; index++) { page[i] = (index == folio->index) ? target_page : grab_cache_page_nowait(folio->mapping, index); if (page[i] == NULL) continue; if (PageUptodate(page[i])) { unlock_page(page[i]); put_page(page[i]); continue; } i++; } pages = i; /* * Create a "page actor" which will kmap and kunmap the * page cache pages appropriately within the decompressor */ actor = squashfs_page_actor_init_special(msblk, page, pages, expected, start_index << PAGE_SHIFT); if (actor == NULL) goto out; /* Decompress directly into the page cache buffers */ res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); last_page = squashfs_page_actor_free(actor); if (res < 0) goto mark_errored; if (res != expected || IS_ERR(last_page)) { res = -EIO; goto mark_errored; } /* Last page (if present) may have trailing bytes not filled */ bytes = res % PAGE_SIZE; if (end_index == file_end && last_page && bytes) { pageaddr = kmap_local_page(last_page); memset(pageaddr + bytes, 0, PAGE_SIZE - bytes); kunmap_local(pageaddr); } /* Mark pages as uptodate, unlock and release */ for (i = 0; i < pages; i++) { flush_dcache_page(page[i]); SetPageUptodate(page[i]); unlock_page(page[i]); if (page[i] != target_page) put_page(page[i]); } kfree(page); return 0; mark_errored: /* Decompression failed. Target_page is * dealt with by the caller */ for (i = 0; i < pages; i++) { if (page[i] == NULL || page[i] == target_page) continue; flush_dcache_page(page[i]); unlock_page(page[i]); put_page(page[i]); } out: kfree(page); return res; }
227 4 2 33 4 92 4 74 26 25 1 75 75 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2002 * Portions Copyright (C) Christoph Hellwig, 2001-2002 */ #ifndef _H_JFS_UNICODE #define _H_JFS_UNICODE #include <linux/slab.h> #include <asm/byteorder.h> #include "../nls/nls_ucs2_data.h" #include "jfs_types.h" extern int get_UCSname(struct component_name *, struct dentry *); extern int jfs_strfromUCS_le(char *, const __le16 *, int, struct nls_table *); #define free_UCSname(COMP) kfree((COMP)->name) /* * UniStrcpy: Copy a string */ static inline wchar_t *UniStrcpy(wchar_t * ucs1, const wchar_t * ucs2) { wchar_t *anchor = ucs1; /* save the start of result string */ while ((*ucs1++ = *ucs2++)); return anchor; } /* * UniStrncpy: Copy length limited string with pad */ static inline __le16 *UniStrncpy_le(__le16 * ucs1, const __le16 * ucs2, size_t n) { __le16 *anchor = ucs1; while (n-- && *ucs2) /* Copy the strings */ *ucs1++ = *ucs2++; n++; while (n--) /* Pad with nulls */ *ucs1++ = 0; return anchor; } /* * UniStrncmp_le: Compare length limited string - native to little-endian */ static inline int UniStrncmp_le(const wchar_t * ucs1, const __le16 * ucs2, size_t n) { if (!n) return 0; /* Null strings are equal */ while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) { ucs1++; ucs2++; } return (int) *ucs1 - (int) __le16_to_cpu(*ucs2); } /* * UniStrncpy_to_le: Copy length limited string with pad to little-endian */ static inline __le16 *UniStrncpy_to_le(__le16 * ucs1, const wchar_t * ucs2, size_t n) { __le16 *anchor = ucs1; while (n-- && *ucs2) /* Copy the strings */ *ucs1++ = cpu_to_le16(*ucs2++); n++; while (n--) /* Pad with nulls */ *ucs1++ = 0; return anchor; } /* * UniStrncpy_from_le: Copy length limited string with pad from little-endian */ static inline wchar_t *UniStrncpy_from_le(wchar_t * ucs1, const __le16 * ucs2, size_t n) { wchar_t *anchor = ucs1; while (n-- && *ucs2) /* Copy the strings */ *ucs1++ = __le16_to_cpu(*ucs2++); n++; while (n--) /* Pad with nulls */ *ucs1++ = 0; return anchor; } /* * UniToupper: Convert a unicode character to upper case */ static inline wchar_t UniToupper(wchar_t uc) { const struct UniCaseRange *rp; if (uc < sizeof(NlsUniUpperTable)) { /* Latin characters */ return uc + NlsUniUpperTable[uc]; /* Use base tables */ } else { rp = NlsUniUpperRange; /* Use range tables */ while (rp->start) { if (uc < rp->start) /* Before start of range */ return uc; /* Uppercase = input */ if (uc <= rp->end) /* In range */ return uc + rp->table[uc - rp->start]; rp++; /* Try next range */ } } return uc; /* Past last range */ } /* * UniStrupr: Upper case a unicode string */ static inline wchar_t *UniStrupr(wchar_t * upin) { wchar_t *up; up = upin; while (*up) { /* For all characters */ *up = UniToupper(*up); up++; } return upin; /* Return input pointer */ } #endif /* !_H_JFS_UNICODE */
2 2 2 2 6 6 26 26 20 25 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_fs.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_sysfs.h" #include "xfs_inode.h" #ifdef DEBUG static unsigned int xfs_errortag_random_default[] = { XFS_RANDOM_DEFAULT, XFS_RANDOM_IFLUSH_1, XFS_RANDOM_IFLUSH_2, XFS_RANDOM_IFLUSH_3, XFS_RANDOM_IFLUSH_4, XFS_RANDOM_IFLUSH_5, XFS_RANDOM_IFLUSH_6, XFS_RANDOM_DA_READ_BUF, XFS_RANDOM_BTREE_CHECK_LBLOCK, XFS_RANDOM_BTREE_CHECK_SBLOCK, XFS_RANDOM_ALLOC_READ_AGF, XFS_RANDOM_IALLOC_READ_AGI, XFS_RANDOM_ITOBP_INOTOBP, XFS_RANDOM_IUNLINK, XFS_RANDOM_IUNLINK_REMOVE, XFS_RANDOM_DIR_INO_VALIDATE, XFS_RANDOM_BULKSTAT_READ_CHUNK, XFS_RANDOM_IODONE_IOERR, XFS_RANDOM_STRATREAD_IOERR, XFS_RANDOM_STRATCMPL_IOERR, XFS_RANDOM_DIOWRITE_IOERR, XFS_RANDOM_BMAPIFORMAT, XFS_RANDOM_FREE_EXTENT, XFS_RANDOM_RMAP_FINISH_ONE, XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE, XFS_RANDOM_REFCOUNT_FINISH_ONE, XFS_RANDOM_BMAP_FINISH_ONE, XFS_RANDOM_AG_RESV_CRITICAL, 0, /* XFS_RANDOM_DROP_WRITES has been removed */ XFS_RANDOM_LOG_BAD_CRC, XFS_RANDOM_LOG_ITEM_PIN, XFS_RANDOM_BUF_LRU_REF, XFS_RANDOM_FORCE_SCRUB_REPAIR, XFS_RANDOM_FORCE_SUMMARY_RECALC, XFS_RANDOM_IUNLINK_FALLBACK, XFS_RANDOM_BUF_IOERROR, XFS_RANDOM_REDUCE_MAX_IEXTENTS, XFS_RANDOM_BMAP_ALLOC_MINLEN_EXTENT, XFS_RANDOM_AG_RESV_FAIL, XFS_RANDOM_LARP, XFS_RANDOM_DA_LEAF_SPLIT, XFS_RANDOM_ATTR_LEAF_TO_NODE, XFS_RANDOM_WB_DELAY_MS, XFS_RANDOM_WRITE_DELAY_MS, XFS_RANDOM_EXCHMAPS_FINISH_ONE, XFS_RANDOM_METAFILE_RESV_CRITICAL, }; struct xfs_errortag_attr { struct attribute attr; unsigned int tag; }; static inline struct xfs_errortag_attr * to_attr(struct attribute *attr) { return container_of(attr, struct xfs_errortag_attr, attr); } static inline struct xfs_mount * to_mp(struct kobject *kobject) { struct xfs_kobj *kobj = to_kobj(kobject); return container_of(kobj, struct xfs_mount, m_errortag_kobj); } STATIC ssize_t xfs_errortag_attr_store( struct kobject *kobject, struct attribute *attr, const char *buf, size_t count) { struct xfs_mount *mp = to_mp(kobject); struct xfs_errortag_attr *xfs_attr = to_attr(attr); int ret; unsigned int val; if (strcmp(buf, "default") == 0) { val = xfs_errortag_random_default[xfs_attr->tag]; } else { ret = kstrtouint(buf, 0, &val); if (ret) return ret; } ret = xfs_errortag_set(mp, xfs_attr->tag, val); if (ret) return ret; return count; } STATIC ssize_t xfs_errortag_attr_show( struct kobject *kobject, struct attribute *attr, char *buf) { struct xfs_mount *mp = to_mp(kobject); struct xfs_errortag_attr *xfs_attr = to_attr(attr); return snprintf(buf, PAGE_SIZE, "%u\n", xfs_errortag_get(mp, xfs_attr->tag)); } static const struct sysfs_ops xfs_errortag_sysfs_ops = { .show = xfs_errortag_attr_show, .store = xfs_errortag_attr_store, }; #define XFS_ERRORTAG_ATTR_RW(_name, _tag) \ static struct xfs_errortag_attr xfs_errortag_attr_##_name = { \ .attr = {.name = __stringify(_name), \ .mode = VERIFY_OCTAL_PERMISSIONS(S_IWUSR | S_IRUGO) }, \ .tag = (_tag), \ } #define XFS_ERRORTAG_ATTR_LIST(_name) &xfs_errortag_attr_##_name.attr XFS_ERRORTAG_ATTR_RW(noerror, XFS_ERRTAG_NOERROR); XFS_ERRORTAG_ATTR_RW(iflush1, XFS_ERRTAG_IFLUSH_1); XFS_ERRORTAG_ATTR_RW(iflush2, XFS_ERRTAG_IFLUSH_2); XFS_ERRORTAG_ATTR_RW(iflush3, XFS_ERRTAG_IFLUSH_3); XFS_ERRORTAG_ATTR_RW(iflush4, XFS_ERRTAG_IFLUSH_4); XFS_ERRORTAG_ATTR_RW(iflush5, XFS_ERRTAG_IFLUSH_5); XFS_ERRORTAG_ATTR_RW(iflush6, XFS_ERRTAG_IFLUSH_6); XFS_ERRORTAG_ATTR_RW(dareadbuf, XFS_ERRTAG_DA_READ_BUF); XFS_ERRORTAG_ATTR_RW(btree_chk_lblk, XFS_ERRTAG_BTREE_CHECK_LBLOCK); XFS_ERRORTAG_ATTR_RW(btree_chk_sblk, XFS_ERRTAG_BTREE_CHECK_SBLOCK); XFS_ERRORTAG_ATTR_RW(readagf, XFS_ERRTAG_ALLOC_READ_AGF); XFS_ERRORTAG_ATTR_RW(readagi, XFS_ERRTAG_IALLOC_READ_AGI); XFS_ERRORTAG_ATTR_RW(itobp, XFS_ERRTAG_ITOBP_INOTOBP); XFS_ERRORTAG_ATTR_RW(iunlink, XFS_ERRTAG_IUNLINK); XFS_ERRORTAG_ATTR_RW(iunlinkrm, XFS_ERRTAG_IUNLINK_REMOVE); XFS_ERRORTAG_ATTR_RW(dirinovalid, XFS_ERRTAG_DIR_INO_VALIDATE); XFS_ERRORTAG_ATTR_RW(bulkstat, XFS_ERRTAG_BULKSTAT_READ_CHUNK); XFS_ERRORTAG_ATTR_RW(logiodone, XFS_ERRTAG_IODONE_IOERR); XFS_ERRORTAG_ATTR_RW(stratread, XFS_ERRTAG_STRATREAD_IOERR); XFS_ERRORTAG_ATTR_RW(stratcmpl, XFS_ERRTAG_STRATCMPL_IOERR); XFS_ERRORTAG_ATTR_RW(diowrite, XFS_ERRTAG_DIOWRITE_IOERR); XFS_ERRORTAG_ATTR_RW(bmapifmt, XFS_ERRTAG_BMAPIFORMAT); XFS_ERRORTAG_ATTR_RW(free_extent, XFS_ERRTAG_FREE_EXTENT); XFS_ERRORTAG_ATTR_RW(rmap_finish_one, XFS_ERRTAG_RMAP_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE); XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL); XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC); XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN); XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF); XFS_ERRORTAG_ATTR_RW(force_repair, XFS_ERRTAG_FORCE_SCRUB_REPAIR); XFS_ERRORTAG_ATTR_RW(bad_summary, XFS_ERRTAG_FORCE_SUMMARY_RECALC); XFS_ERRORTAG_ATTR_RW(iunlink_fallback, XFS_ERRTAG_IUNLINK_FALLBACK); XFS_ERRORTAG_ATTR_RW(buf_ioerror, XFS_ERRTAG_BUF_IOERROR); XFS_ERRORTAG_ATTR_RW(reduce_max_iextents, XFS_ERRTAG_REDUCE_MAX_IEXTENTS); XFS_ERRORTAG_ATTR_RW(bmap_alloc_minlen_extent, XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT); XFS_ERRORTAG_ATTR_RW(ag_resv_fail, XFS_ERRTAG_AG_RESV_FAIL); XFS_ERRORTAG_ATTR_RW(larp, XFS_ERRTAG_LARP); XFS_ERRORTAG_ATTR_RW(da_leaf_split, XFS_ERRTAG_DA_LEAF_SPLIT); XFS_ERRORTAG_ATTR_RW(attr_leaf_to_node, XFS_ERRTAG_ATTR_LEAF_TO_NODE); XFS_ERRORTAG_ATTR_RW(wb_delay_ms, XFS_ERRTAG_WB_DELAY_MS); XFS_ERRORTAG_ATTR_RW(write_delay_ms, XFS_ERRTAG_WRITE_DELAY_MS); XFS_ERRORTAG_ATTR_RW(exchmaps_finish_one, XFS_ERRTAG_EXCHMAPS_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(metafile_resv_crit, XFS_ERRTAG_METAFILE_RESV_CRITICAL); static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(noerror), XFS_ERRORTAG_ATTR_LIST(iflush1), XFS_ERRORTAG_ATTR_LIST(iflush2), XFS_ERRORTAG_ATTR_LIST(iflush3), XFS_ERRORTAG_ATTR_LIST(iflush4), XFS_ERRORTAG_ATTR_LIST(iflush5), XFS_ERRORTAG_ATTR_LIST(iflush6), XFS_ERRORTAG_ATTR_LIST(dareadbuf), XFS_ERRORTAG_ATTR_LIST(btree_chk_lblk), XFS_ERRORTAG_ATTR_LIST(btree_chk_sblk), XFS_ERRORTAG_ATTR_LIST(readagf), XFS_ERRORTAG_ATTR_LIST(readagi), XFS_ERRORTAG_ATTR_LIST(itobp), XFS_ERRORTAG_ATTR_LIST(iunlink), XFS_ERRORTAG_ATTR_LIST(iunlinkrm), XFS_ERRORTAG_ATTR_LIST(dirinovalid), XFS_ERRORTAG_ATTR_LIST(bulkstat), XFS_ERRORTAG_ATTR_LIST(logiodone), XFS_ERRORTAG_ATTR_LIST(stratread), XFS_ERRORTAG_ATTR_LIST(stratcmpl), XFS_ERRORTAG_ATTR_LIST(diowrite), XFS_ERRORTAG_ATTR_LIST(bmapifmt), XFS_ERRORTAG_ATTR_LIST(free_extent), XFS_ERRORTAG_ATTR_LIST(rmap_finish_one), XFS_ERRORTAG_ATTR_LIST(refcount_continue_update), XFS_ERRORTAG_ATTR_LIST(refcount_finish_one), XFS_ERRORTAG_ATTR_LIST(bmap_finish_one), XFS_ERRORTAG_ATTR_LIST(ag_resv_critical), XFS_ERRORTAG_ATTR_LIST(log_bad_crc), XFS_ERRORTAG_ATTR_LIST(log_item_pin), XFS_ERRORTAG_ATTR_LIST(buf_lru_ref), XFS_ERRORTAG_ATTR_LIST(force_repair), XFS_ERRORTAG_ATTR_LIST(bad_summary), XFS_ERRORTAG_ATTR_LIST(iunlink_fallback), XFS_ERRORTAG_ATTR_LIST(buf_ioerror), XFS_ERRORTAG_ATTR_LIST(reduce_max_iextents), XFS_ERRORTAG_ATTR_LIST(bmap_alloc_minlen_extent), XFS_ERRORTAG_ATTR_LIST(ag_resv_fail), XFS_ERRORTAG_ATTR_LIST(larp), XFS_ERRORTAG_ATTR_LIST(da_leaf_split), XFS_ERRORTAG_ATTR_LIST(attr_leaf_to_node), XFS_ERRORTAG_ATTR_LIST(wb_delay_ms), XFS_ERRORTAG_ATTR_LIST(write_delay_ms), XFS_ERRORTAG_ATTR_LIST(exchmaps_finish_one), XFS_ERRORTAG_ATTR_LIST(metafile_resv_crit), NULL, }; ATTRIBUTE_GROUPS(xfs_errortag); static const struct kobj_type xfs_errortag_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_errortag_sysfs_ops, .default_groups = xfs_errortag_groups, }; int xfs_errortag_init( struct xfs_mount *mp) { int ret; mp->m_errortag = kzalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!mp->m_errortag) return -ENOMEM; ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, &mp->m_kobj, "errortag"); if (ret) kfree(mp->m_errortag); return ret; } void xfs_errortag_del( struct xfs_mount *mp) { xfs_sysfs_del(&mp->m_errortag_kobj); kfree(mp->m_errortag); } static bool xfs_errortag_valid( unsigned int error_tag) { if (error_tag >= XFS_ERRTAG_MAX) return false; /* Error out removed injection types */ if (error_tag == XFS_ERRTAG_DROP_WRITES) return false; return true; } bool xfs_errortag_enabled( struct xfs_mount *mp, unsigned int tag) { if (!mp->m_errortag) return false; if (!xfs_errortag_valid(tag)) return false; return mp->m_errortag[tag] != 0; } bool xfs_errortag_test( struct xfs_mount *mp, const char *expression, const char *file, int line, unsigned int error_tag) { unsigned int randfactor; /* * To be able to use error injection anywhere, we need to ensure error * injection mechanism is already initialized. * * Code paths like I/O completion can be called before the * initialization is complete, but be able to inject errors in such * places is still useful. */ if (!mp->m_errortag) return false; if (!xfs_errortag_valid(error_tag)) return false; randfactor = mp->m_errortag[error_tag]; if (!randfactor || get_random_u32_below(randfactor)) return false; xfs_warn_ratelimited(mp, "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", expression, file, line, mp->m_super->s_id); return true; } int xfs_errortag_get( struct xfs_mount *mp, unsigned int error_tag) { if (!xfs_errortag_valid(error_tag)) return -EINVAL; return mp->m_errortag[error_tag]; } int xfs_errortag_set( struct xfs_mount *mp, unsigned int error_tag, unsigned int tag_value) { if (!xfs_errortag_valid(error_tag)) return -EINVAL; mp->m_errortag[error_tag] = tag_value; return 0; } int xfs_errortag_add( struct xfs_mount *mp, unsigned int error_tag) { BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX); if (!xfs_errortag_valid(error_tag)) return -EINVAL; return xfs_errortag_set(mp, error_tag, xfs_errortag_random_default[error_tag]); } int xfs_errortag_clearall( struct xfs_mount *mp) { memset(mp->m_errortag, 0, sizeof(unsigned int) * XFS_ERRTAG_MAX); return 0; } #endif /* DEBUG */ void xfs_error_report( const char *tag, int level, struct xfs_mount *mp, const char *filename, int linenum, xfs_failaddr_t failaddr) { if (level <= xfs_error_level) { xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, "Internal error %s at line %d of file %s. Caller %pS", tag, linenum, filename, failaddr); xfs_stack_trace(); } } void xfs_corruption_error( const char *tag, int level, struct xfs_mount *mp, const void *buf, size_t bufsize, const char *filename, int linenum, xfs_failaddr_t failaddr) { if (buf && level <= xfs_error_level) xfs_hex_dump(buf, bufsize); xfs_error_report(tag, level, mp, filename, linenum, failaddr); xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); } /* * Complain about the kinds of metadata corruption that we can't detect from a * verifier, such as incorrect inter-block relationship data. Does not set * bp->b_error. * * Call xfs_buf_mark_corrupt, not this function. */ void xfs_buf_corruption_error( struct xfs_buf *bp, xfs_failaddr_t fa) { struct xfs_mount *mp = bp->b_mount; xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR, "Metadata corruption detected at %pS, %s block 0x%llx", fa, bp->b_ops->name, xfs_buf_daddr(bp)); xfs_alert(mp, "Unmount and run xfs_repair"); if (xfs_error_level >= XFS_ERRLEVEL_HIGH) xfs_stack_trace(); } /* * Warnings specifically for verifier errors. Differentiate CRC vs. invalid * values, and omit the stack trace unless the error level is tuned high. */ void xfs_buf_verifier_error( struct xfs_buf *bp, int error, const char *name, const void *buf, size_t bufsz, xfs_failaddr_t failaddr) { struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; int sz; fa = failaddr ? failaddr : __return_address; __xfs_buf_ioerror(bp, error, fa); xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR, "Metadata %s detected at %pS, %s block 0x%llx %s", bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", fa, bp->b_ops->name, xfs_buf_daddr(bp), name); xfs_alert(mp, "Unmount and run xfs_repair"); if (xfs_error_level >= XFS_ERRLEVEL_LOW) { sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz); xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", sz); xfs_hex_dump(buf, sz); } if (xfs_error_level >= XFS_ERRLEVEL_HIGH) xfs_stack_trace(); } /* * Warnings specifically for verifier errors. Differentiate CRC vs. invalid * values, and omit the stack trace unless the error level is tuned high. */ void xfs_verifier_error( struct xfs_buf *bp, int error, xfs_failaddr_t failaddr) { return xfs_buf_verifier_error(bp, error, "", xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN, failaddr); } /* * Warnings for inode corruption problems. Don't bother with the stack * trace unless the error level is turned up high. */ void xfs_inode_verifier_error( struct xfs_inode *ip, int error, const char *name, const void *buf, size_t bufsz, xfs_failaddr_t failaddr) { struct xfs_mount *mp = ip->i_mount; xfs_failaddr_t fa; int sz; fa = failaddr ? failaddr : __return_address; xfs_alert(mp, "Metadata %s detected at %pS, inode 0x%llx %s", error == -EFSBADCRC ? "CRC error" : "corruption", fa, ip->i_ino, name); xfs_alert(mp, "Unmount and run xfs_repair"); if (buf && xfs_error_level >= XFS_ERRLEVEL_LOW) { sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz); xfs_alert(mp, "First %d bytes of corrupted metadata buffer:", sz); xfs_hex_dump(buf, sz); } if (xfs_error_level >= XFS_ERRLEVEL_HIGH) xfs_stack_trace(); }
4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. */ /*-************************************* * Dependencies ***************************************/ #define ZSTD_DEPS_NEED_MALLOC #include "error_private.h" #include "zstd_internal.h" /*-**************************************** * Version ******************************************/ unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } /*-**************************************** * ZSTD Error Management ******************************************/ #undef ZSTD_isError /* defined within zstd_internal.h */ /*! ZSTD_isError() : * tells if a return value is an error code * symbol is required for external callers */ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } /*! ZSTD_getErrorName() : * provides error code string from function result (useful for debugging) */ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } /*! ZSTD_getError() : * convert a `size_t` function result into a proper ZSTD_errorCode enum */ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } /*! ZSTD_getErrorString() : * provides error code string from enum */ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. */ /* * Implements Extendible Hashing as described in: * "Extendible Hashing" by Fagin, et al in * __ACM Trans. on Database Systems__, Sept 1979. * * * Here's the layout of dirents which is essentially the same as that of ext2 * within a single block. The field de_name_len is the number of bytes * actually required for the name (no null terminator). The field de_rec_len * is the number of bytes allocated to the dirent. The offset of the next * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is * deleted, the preceding dirent inherits its allocated space, ie * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained * by adding de_rec_len to the current dirent, this essentially causes the * deleted dirent to get jumped over when iterating through all the dirents. * * When deleting the first dirent in a block, there is no previous dirent so * the field de_ino is set to zero to designate it as deleted. When allocating * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the * first dirent has (de_ino == 0) and de_rec_len is large enough, this first * dirent is allocated. Otherwise it must go through all the 'used' dirents * searching for one in which the amount of total space minus the amount of * used space will provide enough space for the new dirent. * * There are two types of blocks in which dirents reside. In a stuffed dinode, * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the * beginning of the leaf block. The dirents reside in leaves when * * dip->i_diskflags & GFS2_DIF_EXHASH is true * * Otherwise, the dirents are "linear", within a single stuffed dinode block. * * When the dirents are in leaves, the actual contents of the directory file are * used as an array of 64-bit block pointers pointing to the leaf blocks. The * dirents are NOT in the directory file itself. There can be more than one * block pointer in the array that points to the same leaf. In fact, when a * directory is first converted from linear to exhash, all of the pointers * point to the same leaf. * * When a leaf is completely full, the size of the hash table can be * doubled unless it is already at the maximum size which is hard coded into * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list, * but never before the maximum hash table size has been reached. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/buffer_head.h> #include <linux/sort.h> #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> #include <linux/vmalloc.h> #include <linux/bio.h> #include "gfs2.h" #include "incore.h" #include "dir.h" #include "glock.h" #include "inode.h" #include "meta_io.h" #include "quota.h" #include "rgrp.h" #include "trans.h" #include "bmap.h" #include "util.h" #define MAX_RA_BLOCKS 32 /* max read-ahead blocks */ #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) #define GFS2_HASH_INDEX_MASK 0xffffc000 #define GFS2_USE_HASH_FLAG 0x2000 struct qstr gfs2_qdot __read_mostly; struct qstr gfs2_qdotdot __read_mostly; typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque); int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp) { struct buffer_head *bh; bh = gfs2_meta_new(ip->i_gl, block); gfs2_trans_add_meta(ip->i_gl, bh); gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); *bhp = bh; return 0; } static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp) { struct buffer_head *bh; int error; error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, 0, &bh); if (error) return error; if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) { brelse(bh); return -EIO; } *bhp = bh; return 0; } static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, unsigned int offset, unsigned int size) { struct buffer_head *dibh; int error; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) return error; gfs2_trans_add_meta(ip->i_gl, dibh); memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); if (ip->i_inode.i_size < offset + size) i_size_write(&ip->i_inode, offset + size); inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); return size; } /** * gfs2_dir_write_data - Write directory information to the inode * @ip: The GFS2 inode * @buf: The buffer containing information to be written * @offset: The file offset to start writing at * @size: The amount of data to write * * Returns: The number of bytes correctly written or error code */ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, u64 offset, unsigned int size) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; u64 lblock, dblock; u32 extlen = 0; unsigned int o; int copied = 0; int error = 0; bool new = false; if (!size) return 0; if (gfs2_is_stuffed(ip) && offset + size <= gfs2_max_stuffed_size(ip)) return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset, size); if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) return -EINVAL; if (gfs2_is_stuffed(ip)) { error = gfs2_unstuff_dinode(ip); if (error) return error; } lblock = offset; o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); while (copied < size) { unsigned int amount; struct buffer_head *bh; amount = size - copied; if (amount > sdp->sd_sb.sb_bsize - o) amount = sdp->sd_sb.sb_bsize - o; if (!extlen) { extlen = 1; error = gfs2_alloc_extent(&ip->i_inode, lblock, &dblock, &extlen, &new); if (error) goto fail; error = -EIO; if (gfs2_assert_withdraw(sdp, dblock)) goto fail; } if (amount == sdp->sd_jbsize || new) error = gfs2_dir_get_new_buffer(ip, dblock, &bh); else error = gfs2_dir_get_existing_buffer(ip, dblock, &bh); if (error) goto fail; gfs2_trans_add_meta(ip->i_gl, bh); memcpy(bh->b_data + o, buf, amount); brelse(bh); buf += amount; copied += amount; lblock++; dblock++; extlen--; o = sizeof(struct gfs2_meta_header); } out: error = gfs2_meta_inode_buffer(ip, &dibh); if (error) return error; if (ip->i_inode.i_size < offset + copied) i_size_write(&ip->i_inode, offset + copied); inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); return copied; fail: if (copied) goto out; return error; } static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, __be64 *buf, unsigned int size) { struct buffer_head *dibh; int error; error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size); brelse(dibh); } return (error) ? error : size; } /** * gfs2_dir_read_data - Read a data from a directory inode * @ip: The GFS2 Inode * @buf: The buffer to place result into * @size: Amount of data to transfer * * Returns: The amount of data actually copied or the error */ static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf, unsigned int size) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); u64 lblock, dblock; u32 extlen = 0; unsigned int o; int copied = 0; int error = 0; if (gfs2_is_stuffed(ip)) return gfs2_dir_read_stuffed(ip, buf, size); if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) return -EINVAL; lblock = 0; o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); while (copied < size) { unsigned int amount; struct buffer_head *bh; amount = size - copied; if (amount > sdp->sd_sb.sb_bsize - o) amount = sdp->sd_sb.sb_bsize - o; if (!extlen) { extlen = 32; error = gfs2_get_extent(&ip->i_inode, lblock, &dblock, &extlen); if (error || !dblock) goto fail; BUG_ON(extlen < 1); bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); } else { error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, 0, &bh); if (error) goto fail; } error = gfs2_metatype_check(sdp, bh, GFS2_METATYPE_JD); if (error) { brelse(bh); goto fail; } dblock++; extlen--; memcpy(buf, bh->b_data + o, amount); brelse(bh); buf += (amount/sizeof(__be64)); copied += amount; lblock++; o = sizeof(struct gfs2_meta_header); } return copied; fail: return (copied) ? copied : error; } /** * gfs2_dir_get_hash_table - Get pointer to the dir hash table * @ip: The inode in question * * Returns: The hash table or an error */ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) { struct inode *inode = &ip->i_inode; int ret; u32 hsize; __be64 *hc; BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH)); hc = ip->i_hash_cache; if (hc) return hc; hsize = BIT(ip->i_depth); hsize *= sizeof(__be64); if (hsize != i_size_read(&ip->i_inode)) { gfs2_consist_inode(ip); return ERR_PTR(-EIO); } hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN); if (hc == NULL) hc = __vmalloc(hsize, GFP_NOFS); if (hc == NULL) return ERR_PTR(-ENOMEM); ret = gfs2_dir_read_data(ip, hc, hsize); if (ret < 0) { kvfree(hc); return ERR_PTR(ret); } spin_lock(&inode->i_lock); if (likely(!ip->i_hash_cache)) { ip->i_hash_cache = hc; hc = NULL; } spin_unlock(&inode->i_lock); kvfree(hc); return ip->i_hash_cache; } /** * gfs2_dir_hash_inval - Invalidate dir hash * @ip: The directory inode * * Must be called with an exclusive glock, or during glock invalidation. */ void gfs2_dir_hash_inval(struct gfs2_inode *ip) { __be64 *hc; spin_lock(&ip->i_inode.i_lock); hc = ip->i_hash_cache; ip->i_hash_cache = NULL; spin_unlock(&ip->i_inode.i_lock); kvfree(hc); } static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) { return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; } static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent, const struct qstr *name, int ret) { if (!gfs2_dirent_sentinel(dent) && be32_to_cpu(dent->de_hash) == name->hash && be16_to_cpu(dent->de_name_len) == name->len && memcmp(dent+1, name->name, name->len) == 0) return ret; return 0; } static int gfs2_dirent_find(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque) { return __gfs2_dirent_find(dent, name, 1); } static int gfs2_dirent_prev(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque) { return __gfs2_dirent_find(dent, name, 2); } /* * name->name holds ptr to start of block. * name->len holds size of block. */ static int gfs2_dirent_last(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque) { const char *start = name->name; const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len); if (name->len == (end - start)) return 1; return 0; } /* Look for the dirent that contains the offset specified in data. Once we * find that dirent, there must be space available there for the new dirent */ static int gfs2_dirent_find_offset(const struct gfs2_dirent *dent, const struct qstr *name, void *ptr) { unsigned required = GFS2_DIRENT_SIZE(name->len); unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); unsigned totlen = be16_to_cpu(dent->de_rec_len); if (ptr < (void *)dent || ptr >= (void *)dent + totlen) return 0; if (gfs2_dirent_sentinel(dent)) actual = 0; if (ptr < (void *)dent + actual) return -1; if ((void *)dent + totlen >= ptr + required) return 1; return -1; } static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque) { unsigned required = GFS2_DIRENT_SIZE(name->len); unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); unsigned totlen = be16_to_cpu(dent->de_rec_len); if (gfs2_dirent_sentinel(dent)) actual = 0; if (totlen - actual >= required) return 1; return 0; } struct dirent_gather { const struct gfs2_dirent **pdent; unsigned offset; }; static int gfs2_dirent_gather(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque) { struct dirent_gather *g = opaque; if (!gfs2_dirent_sentinel(dent)) { g->pdent[g->offset++] = dent; } return 0; } /* * Other possible things to check: * - Inode located within filesystem size (and on valid block) * - Valid directory entry type * Not sure how heavy-weight we want to make this... could also check * hash is correct for example, but that would take a lot of extra time. * For now the most important thing is to check that the various sizes * are correct. */ static int gfs2_check_dirent(struct gfs2_sbd *sdp, struct gfs2_dirent *dent, unsigned int offset, unsigned int size, unsigned int len, int first) { const char *msg = "gfs2_dirent too small"; if (unlikely(size < sizeof(struct gfs2_dirent))) goto error; msg = "gfs2_dirent misaligned"; if (unlikely(offset & 0x7)) goto error; msg = "gfs2_dirent points beyond end of block"; if (unlikely(offset + size > len)) goto error; msg = "zero inode number"; if (unlikely(!first && gfs2_dirent_sentinel(dent))) goto error; msg = "name length is greater than space in dirent"; if (!gfs2_dirent_sentinel(dent) && unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) > size)) goto error; return 0; error: fs_warn(sdp, "%s: %s (%s)\n", __func__, msg, first ? "first in block" : "not first in block"); return -EIO; } static int gfs2_dirent_offset(struct gfs2_sbd *sdp, const void *buf) { const struct gfs2_meta_header *h = buf; int offset; BUG_ON(buf == NULL); switch(be32_to_cpu(h->mh_type)) { case GFS2_METATYPE_LF: offset = sizeof(struct gfs2_leaf); break; case GFS2_METATYPE_DI: offset = sizeof(struct gfs2_dinode); break; default: goto wrong_type; } return offset; wrong_type: fs_warn(sdp, "%s: wrong block type %u\n", __func__, be32_to_cpu(h->mh_type)); return -1; } static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, unsigned int len, gfs2_dscan_t scan, const struct qstr *name, void *opaque) { struct gfs2_dirent *dent, *prev; unsigned offset; unsigned size; int ret = 0; ret = gfs2_dirent_offset(GFS2_SB(inode), buf); if (ret < 0) { gfs2_consist_inode(GFS2_I(inode)); return ERR_PTR(-EIO); } offset = ret; prev = NULL; dent = buf + offset; size = be16_to_cpu(dent->de_rec_len); if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, len, 1)) { gfs2_consist_inode(GFS2_I(inode)); return ERR_PTR(-EIO); } do { ret = scan(dent, name, opaque); if (ret) break; offset += size; if (offset == len) break; prev = dent; dent = buf + offset; size = be16_to_cpu(dent->de_rec_len); if (gfs2_check_dirent(GFS2_SB(inode), dent, offset, size, len, 0)) { gfs2_consist_inode(GFS2_I(inode)); return ERR_PTR(-EIO); } } while(1); switch(ret) { case 0: return NULL; case 1: return dent; case 2: return prev ? prev : dent; default: BUG_ON(ret > 0); return ERR_PTR(ret); } } static int dirent_check_reclen(struct gfs2_inode *dip, const struct gfs2_dirent *d, const void *end_p) { const void *ptr = d; u16 rec_len = be16_to_cpu(d->de_rec_len); if (unlikely(rec_len < sizeof(struct gfs2_dirent))) { gfs2_consist_inode(dip); return -EIO; } ptr += rec_len; if (ptr < end_p) return rec_len; if (ptr == end_p) return -ENOENT; gfs2_consist_inode(dip); return -EIO; } /** * dirent_next - Next dirent * @dip: the directory * @bh: The buffer * @dent: Pointer to list of dirents * * Returns: 0 on success, error code otherwise */ static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh, struct gfs2_dirent **dent) { struct gfs2_dirent *cur = *dent, *tmp; char *bh_end = bh->b_data + bh->b_size; int ret; ret = dirent_check_reclen(dip, cur, bh_end); if (ret < 0) return ret; tmp = (void *)cur + ret; ret = dirent_check_reclen(dip, tmp, bh_end); if (ret == -EIO) return ret; /* Only the first dent could ever have de_inum.no_addr == 0 */ if (gfs2_dirent_sentinel(tmp)) { gfs2_consist_inode(dip); return -EIO; } *dent = tmp; return 0; } /** * dirent_del - Delete a dirent * @dip: The GFS2 inode * @bh: The buffer * @prev: The previous dirent * @cur: The current dirent * */ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, struct gfs2_dirent *prev, struct gfs2_dirent *cur) { u16 cur_rec_len, prev_rec_len; if (gfs2_dirent_sentinel(cur)) { gfs2_consist_inode(dip); return; } gfs2_trans_add_meta(dip->i_gl, bh); /* If there is no prev entry, this is the first entry in the block. The de_rec_len is already as big as it needs to be. Just zero out the inode number and return. */ if (!prev) { cur->de_inum.no_addr = 0; cur->de_inum.no_formal_ino = 0; return; } /* Combine this dentry with the previous one. */ prev_rec_len = be16_to_cpu(prev->de_rec_len); cur_rec_len = be16_to_cpu(cur->de_rec_len); if ((char *)prev + prev_rec_len != (char *)cur) gfs2_consist_inode(dip); if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size) gfs2_consist_inode(dip); prev_rec_len += cur_rec_len; prev->de_rec_len = cpu_to_be16(prev_rec_len); } static struct gfs2_dirent *do_init_dirent(struct inode *inode, struct gfs2_dirent *dent, const struct qstr *name, struct buffer_head *bh, unsigned offset) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_dirent *ndent; unsigned totlen; totlen = be16_to_cpu(dent->de_rec_len); BUG_ON(offset + name->len > totlen); gfs2_trans_add_meta(ip->i_gl, bh); ndent = (struct gfs2_dirent *)((char *)dent + offset); dent->de_rec_len = cpu_to_be16(offset); gfs2_qstr2dirent(name, totlen - offset, ndent); return ndent; } /* * Takes a dent from which to grab space as an argument. Returns the * newly created dent. */ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode, struct gfs2_dirent *dent, const struct qstr *name, struct buffer_head *bh) { unsigned offset = 0; if (!gfs2_dirent_sentinel(dent)) offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); return do_init_dirent(inode, dent, name, bh, offset); } static struct gfs2_dirent *gfs2_dirent_split_alloc(struct inode *inode, struct buffer_head *bh, const struct qstr *name, void *ptr) { struct gfs2_dirent *dent; dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, gfs2_dirent_find_offset, name, ptr); if (IS_ERR_OR_NULL(dent)) return dent; return do_init_dirent(inode, dent, name, bh, (unsigned)(ptr - (void *)dent)); } static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, struct buffer_head **bhp) { int error; error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, 0, bhp); if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { /* pr_info("block num=%llu\n", leaf_no); */ error = -EIO; } return error; } /** * get_leaf_nr - Get a leaf number associated with the index * @dip: The GFS2 inode * @index: hash table index of the targeted leaf * @leaf_out: Resulting leaf block number * * Returns: 0 on success, error code otherwise */ static int get_leaf_nr(struct gfs2_inode *dip, u32 index, u64 *leaf_out) { __be64 *hash; int error; hash = gfs2_dir_get_hash_table(dip); error = PTR_ERR_OR_ZERO(hash); if (!error) *leaf_out = be64_to_cpu(*(hash + index)); return error; } static int get_first_leaf(struct gfs2_inode *dip, u32 index, struct buffer_head **bh_out) { u64 leaf_no; int error; error = get_leaf_nr(dip, index, &leaf_no); if (!error) error = get_leaf(dip, leaf_no, bh_out); return error; } static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, const struct qstr *name, gfs2_dscan_t scan, struct buffer_head **pbh) { struct buffer_head *bh; struct gfs2_dirent *dent; struct gfs2_inode *ip = GFS2_I(inode); int error; if (ip->i_diskflags & GFS2_DIF_EXHASH) { struct gfs2_leaf *leaf; unsigned int hsize = BIT(ip->i_depth); unsigned int index; u64 ln; if (hsize * sizeof(u64) != i_size_read(inode)) { gfs2_consist_inode(ip); return ERR_PTR(-EIO); } index = name->hash >> (32 - ip->i_depth); error = get_first_leaf(ip, index, &bh); if (error) return ERR_PTR(error); do { dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL); if (dent) goto got_dent; leaf = (struct gfs2_leaf *)bh->b_data; ln = be64_to_cpu(leaf->lf_next); brelse(bh); if (!ln) break; error = get_leaf(ip, ln, &bh); } while(!error); return error ? ERR_PTR(error) : NULL; } error = gfs2_meta_inode_buffer(ip, &bh); if (error) return ERR_PTR(error); dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL); got_dent: if (IS_ERR_OR_NULL(dent)) { brelse(bh); bh = NULL; } *pbh = bh; return dent; } static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth) { struct gfs2_inode *ip = GFS2_I(inode); unsigned int n = 1; u64 bn; int error; struct buffer_head *bh; struct gfs2_leaf *leaf; struct gfs2_dirent *dent; struct timespec64 tv = current_time(inode); error = gfs2_alloc_blocks(ip, &bn, &n, 0); if (error) return NULL; bh = gfs2_meta_new(ip->i_gl, bn); if (!bh) return NULL; gfs2_trans_remove_revoke(GFS2_SB(inode), bn, 1); gfs2_trans_add_meta(ip->i_gl, bh); gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); leaf = (struct gfs2_leaf *)bh->b_data; leaf->lf_depth = cpu_to_be16(depth); leaf->lf_entries = 0; leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); leaf->lf_next = 0; leaf->lf_inode = cpu_to_be64(ip->i_no_addr); leaf->lf_dist = cpu_to_be32(1); leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); leaf->lf_sec = cpu_to_be64(tv.tv_sec); memset(leaf->lf_reserved2, 0, sizeof(leaf->lf_reserved2)); dent = (struct gfs2_dirent *)(leaf+1); gfs2_qstr2dirent(&empty_name, bh->b_size - sizeof(struct gfs2_leaf), dent); *pbh = bh; return leaf; } /** * dir_make_exhash - Convert a stuffed directory into an ExHash directory * @inode: The directory inode to be converted to exhash * * Returns: 0 on success, error code otherwise */ static int dir_make_exhash(struct inode *inode) { struct gfs2_inode *dip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_dirent *dent; struct qstr args; struct buffer_head *bh, *dibh; struct gfs2_leaf *leaf; int y; u32 x; __be64 *lp; u64 bn; int error; error = gfs2_meta_inode_buffer(dip, &dibh); if (error) return error; /* Turn over a new leaf */ leaf = new_leaf(inode, &bh, 0); if (!leaf) return -ENOSPC; bn = bh->b_blocknr; gfs2_assert(sdp, dip->i_entries < BIT(16)); leaf->lf_entries = cpu_to_be16(dip->i_entries); /* Copy dirents */ gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh, sizeof(struct gfs2_dinode)); /* Find last entry */ x = 0; args.len = bh->b_size - sizeof(struct gfs2_dinode) + sizeof(struct gfs2_leaf); args.name = bh->b_data; dent = gfs2_dirent_scan(&dip->i_inode, bh->b_data, bh->b_size, gfs2_dirent_last, &args, NULL); if (!dent) { brelse(bh); brelse(dibh); return -EIO; } if (IS_ERR(dent)) { brelse(bh); brelse(dibh); return PTR_ERR(dent); } /* Adjust the last dirent's record length (Remember that dent still points to the last entry.) */ dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) + sizeof(struct gfs2_dinode) - sizeof(struct gfs2_leaf)); brelse(bh); /* We're done with the new leaf block, now setup the new hash table. */ gfs2_trans_add_meta(dip->i_gl, dibh); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); for (x = sdp->sd_hash_ptrs; x--; lp++) *lp = cpu_to_be64(bn); i_size_write(inode, sdp->sd_sb.sb_bsize / 2); gfs2_add_inode_blocks(&dip->i_inode, 1); dip->i_diskflags |= GFS2_DIF_EXHASH; for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; dip->i_depth = y; gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); return 0; } /** * dir_split_leaf - Split a leaf block into two * @inode: The directory inode to be split * @name: name of the dirent we're trying to insert * * Returns: 0 on success, error code on failure */ static int dir_split_leaf(struct inode *inode, const struct qstr *name) { struct gfs2_inode *dip = GFS2_I(inode); struct buffer_head *nbh, *obh, *dibh; struct gfs2_leaf *nleaf, *oleaf; struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new; u32 start, len, half_len, divider; u64 bn, leaf_no; __be64 *lp; u32 index; int x; int error; index = name->hash >> (32 - dip->i_depth); error = get_leaf_nr(dip, index, &leaf_no); if (error) return error; /* Get the old leaf block */ error = get_leaf(dip, leaf_no, &obh); if (error) return error; oleaf = (struct gfs2_leaf *)obh->b_data; if (dip->i_depth == be16_to_cpu(oleaf->lf_depth)) { brelse(obh); return 1; /* can't split */ } gfs2_trans_add_meta(dip->i_gl, obh); nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1); if (!nleaf) { brelse(obh); return -ENOSPC; } bn = nbh->b_blocknr; /* Compute the start and len of leaf pointers in the hash table. */ len = BIT(dip->i_depth - be16_to_cpu(oleaf->lf_depth)); half_len = len >> 1; if (!half_len) { fs_warn(GFS2_SB(inode), "i_depth %u lf_depth %u index %u\n", dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); gfs2_consist_inode(dip); error = -EIO; goto fail_brelse; } start = (index & ~(len - 1)); /* Change the pointers. Don't bother distinguishing stuffed from non-stuffed. This code is complicated enough already. */ lp = kmalloc_array(half_len, sizeof(__be64), GFP_NOFS); if (!lp) { error = -ENOMEM; goto fail_brelse; } /* Change the pointers */ for (x = 0; x < half_len; x++) lp[x] = cpu_to_be64(bn); gfs2_dir_hash_inval(dip); error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), half_len * sizeof(u64)); if (error != half_len * sizeof(u64)) { if (error >= 0) error = -EIO; goto fail_lpfree; } kfree(lp); /* Compute the divider */ divider = (start + half_len) << (32 - dip->i_depth); /* Copy the entries */ dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf)); do { next = dent; if (dirent_next(dip, obh, &next)) next = NULL; if (!gfs2_dirent_sentinel(dent) && be32_to_cpu(dent->de_hash) < divider) { struct qstr str; void *ptr = ((char *)dent - obh->b_data) + nbh->b_data; str.name = (char*)(dent+1); str.len = be16_to_cpu(dent->de_name_len); str.hash = be32_to_cpu(dent->de_hash); new = gfs2_dirent_split_alloc(inode, nbh, &str, ptr); if (IS_ERR(new)) { error = PTR_ERR(new); break; } new->de_inum = dent->de_inum; /* No endian worries */ new->de_type = dent->de_type; /* No endian worries */ be16_add_cpu(&nleaf->lf_entries, 1); dirent_del(dip, obh, prev, dent); if (!oleaf->lf_entries) gfs2_consist_inode(dip); be16_add_cpu(&oleaf->lf_entries, -1); if (!prev) prev = dent; } else { prev = dent; } dent = next; } while (dent); oleaf->lf_depth = nleaf->lf_depth; error = gfs2_meta_inode_buffer(dip, &dibh); if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { gfs2_trans_add_meta(dip->i_gl, dibh); gfs2_add_inode_blocks(&dip->i_inode, 1); gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); } brelse(obh); brelse(nbh); return error; fail_lpfree: kfree(lp); fail_brelse: brelse(obh); brelse(nbh); return error; } /** * dir_double_exhash - Double size of ExHash table * @dip: The GFS2 dinode * * Returns: 0 on success, error code on failure */ static int dir_double_exhash(struct gfs2_inode *dip) { struct buffer_head *dibh; u32 hsize; u32 hsize_bytes; __be64 *hc; __be64 *hc2, *h; int x; int error = 0; hsize = BIT(dip->i_depth); hsize_bytes = hsize * sizeof(__be64); hc = gfs2_dir_get_hash_table(dip); if (IS_ERR(hc)) return PTR_ERR(hc); hc2 = kmalloc_array(hsize_bytes, 2, GFP_NOFS | __GFP_NOWARN); if (hc2 == NULL) hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS); if (!hc2) return -ENOMEM; h = hc2; error = gfs2_meta_inode_buffer(dip, &dibh); if (error) goto out_kfree; for (x = 0; x < hsize; x++) { *h++ = *hc; *h++ = *hc; hc++; } error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2); if (error != (hsize_bytes * 2)) goto fail; gfs2_dir_hash_inval(dip); dip->i_hash_cache = hc2; dip->i_depth++; gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); return 0; fail: /* Replace original hash table & size */ gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes); i_size_write(&dip->i_inode, hsize_bytes); gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); out_kfree: kvfree(hc2); return error; } /** * compare_dents - compare directory entries by hash value * @a: first dent * @b: second dent * * When comparing the hash entries of @a to @b: * gt: returns 1 * lt: returns -1 * eq: returns 0 */ static int compare_dents(const void *a, const void *b) { const struct gfs2_dirent *dent_a, *dent_b; u32 hash_a, hash_b; int ret = 0; dent_a = *(const struct gfs2_dirent **)a; hash_a = dent_a->de_cookie; dent_b = *(const struct gfs2_dirent **)b; hash_b = dent_b->de_cookie; if (hash_a > hash_b) ret = 1; else if (hash_a < hash_b) ret = -1; else { unsigned int len_a = be16_to_cpu(dent_a->de_name_len); unsigned int len_b = be16_to_cpu(dent_b->de_name_len); if (len_a > len_b) ret = 1; else if (len_a < len_b) ret = -1; else ret = memcmp(dent_a + 1, dent_b + 1, len_a); } return ret; } /** * do_filldir_main - read out directory entries * @dip: The GFS2 inode * @ctx: what to feed the entries to * @darr: an array of struct gfs2_dirent pointers to read * @entries: the number of entries in darr * @sort_start: index of the directory array to start our sort * @copied: pointer to int that's non-zero if a entry has been copied out * * Jump through some hoops to make sure that if there are hash collsions, * they are read out at the beginning of a buffer. We want to minimize * the possibility that they will fall into different readdir buffers or * that someone will want to seek to that location. * * Returns: errno, >0 if the actor tells you to stop */ static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx, struct gfs2_dirent **darr, u32 entries, u32 sort_start, int *copied) { const struct gfs2_dirent *dent, *dent_next; u64 off, off_next; unsigned int x, y; int run = 0; if (sort_start < entries) sort(&darr[sort_start], entries - sort_start, sizeof(struct gfs2_dirent *), compare_dents, NULL); dent_next = darr[0]; off_next = dent_next->de_cookie; for (x = 0, y = 1; x < entries; x++, y++) { dent = dent_next; off = off_next; if (y < entries) { dent_next = darr[y]; off_next = dent_next->de_cookie; if (off < ctx->pos) continue; ctx->pos = off; if (off_next == off) { if (*copied && !run) return 1; run = 1; } else run = 0; } else { if (off < ctx->pos) continue; ctx->pos = off; } if (!dir_emit(ctx, (const char *)(dent + 1), be16_to_cpu(dent->de_name_len), be64_to_cpu(dent->de_inum.no_addr), be16_to_cpu(dent->de_type))) return 1; *copied = 1; } /* Increment the ctx->pos by one, so the next time we come into the do_filldir fxn, we get the next entry instead of the last one in the current leaf */ ctx->pos++; return 0; } static void *gfs2_alloc_sort_buffer(unsigned size) { void *ptr = NULL; if (size < KMALLOC_MAX_SIZE) ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN); if (!ptr) ptr = __vmalloc(size, GFP_NOFS); return ptr; } static int gfs2_set_cookies(struct gfs2_sbd *sdp, struct buffer_head *bh, unsigned leaf_nr, struct gfs2_dirent **darr, unsigned entries) { int sort_id = -1; int i; for (i = 0; i < entries; i++) { unsigned offset; darr[i]->de_cookie = be32_to_cpu(darr[i]->de_hash); darr[i]->de_cookie = gfs2_disk_hash2offset(darr[i]->de_cookie); if (!sdp->sd_args.ar_loccookie) continue; offset = (char *)(darr[i]) - (bh->b_data + gfs2_dirent_offset(sdp, bh->b_data)); offset /= GFS2_MIN_DIRENT_SIZE; offset += leaf_nr * sdp->sd_max_dents_per_leaf; if (offset >= GFS2_USE_HASH_FLAG || leaf_nr >= GFS2_USE_HASH_FLAG) { darr[i]->de_cookie |= GFS2_USE_HASH_FLAG; if (sort_id < 0) sort_id = i; continue; } darr[i]->de_cookie &= GFS2_HASH_INDEX_MASK; darr[i]->de_cookie |= offset; } return sort_id; } static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, int *copied, unsigned *depth, u64 leaf_no) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *bh; struct gfs2_leaf *lf; unsigned entries = 0, entries2 = 0; unsigned leaves = 0, leaf = 0, offset, sort_offset; struct gfs2_dirent **darr, *dent; struct dirent_gather g; struct buffer_head **larr; int error, i, need_sort = 0, sort_id; u64 lfn = leaf_no; do { error = get_leaf(ip, lfn, &bh); if (error) goto out; lf = (struct gfs2_leaf *)bh->b_data; if (leaves == 0) *depth = be16_to_cpu(lf->lf_depth); entries += be16_to_cpu(lf->lf_entries); leaves++; lfn = be64_to_cpu(lf->lf_next); brelse(bh); } while(lfn); if (*depth < GFS2_DIR_MAX_DEPTH || !sdp->sd_args.ar_loccookie) { need_sort = 1; sort_offset = 0; } if (!entries) return 0; error = -ENOMEM; /* * The extra 99 entries are not normally used, but are a buffer * zone in case the number of entries in the leaf is corrupt. * 99 is the maximum number of entries that can fit in a single * leaf block. */ larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *)); if (!larr) goto out; darr = (struct gfs2_dirent **)(larr + leaves); g.pdent = (const struct gfs2_dirent **)darr; g.offset = 0; lfn = leaf_no; do { error = get_leaf(ip, lfn, &bh); if (error) goto out_free; lf = (struct gfs2_leaf *)bh->b_data; lfn = be64_to_cpu(lf->lf_next); if (lf->lf_entries) { offset = g.offset; entries2 += be16_to_cpu(lf->lf_entries); dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, gfs2_dirent_gather, NULL, &g); error = PTR_ERR(dent); if (IS_ERR(dent)) goto out_free; if (entries2 != g.offset) { fs_warn(sdp, "Number of entries corrupt in dir " "leaf %llu, entries2 (%u) != " "g.offset (%u)\n", (unsigned long long)bh->b_blocknr, entries2, g.offset); gfs2_consist_inode(ip); error = -EIO; goto out_free; } error = 0; sort_id = gfs2_set_cookies(sdp, bh, leaf, &darr[offset], be16_to_cpu(lf->lf_entries)); if (!need_sort && sort_id >= 0) { need_sort = 1; sort_offset = offset + sort_id; } larr[leaf++] = bh; } else { larr[leaf++] = NULL; brelse(bh); } } while(lfn); BUG_ON(entries2 != entries); error = do_filldir_main(ip, ctx, darr, entries, need_sort ? sort_offset : entries, copied); out_free: for(i = 0; i < leaf; i++) brelse(larr[i]); kvfree(larr); out: return error; } /** * gfs2_dir_readahead - Issue read-ahead requests for leaf blocks. * @inode: the directory inode * @hsize: hash table size * @index: index into the hash table * @f_ra: read-ahead parameters * * Note: we can't calculate each index like dir_e_read can because we don't * have the leaf, and therefore we don't have the depth, and therefore we * don't have the length. So we have to just read enough ahead to make up * for the loss of information. */ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, struct file_ra_state *f_ra) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_glock *gl = ip->i_gl; struct buffer_head *bh; u64 blocknr = 0, last; unsigned count; /* First check if we've already read-ahead for the whole range. */ if (index + MAX_RA_BLOCKS < f_ra->start) return; f_ra->start = max((pgoff_t)index, f_ra->start); for (count = 0; count < MAX_RA_BLOCKS; count++) { if (f_ra->start >= hsize) /* if exceeded the hash table */ break; last = blocknr; blocknr = be64_to_cpu(ip->i_hash_cache[f_ra->start]); f_ra->start++; if (blocknr == last) continue; bh = gfs2_getbuf(gl, blocknr, 1); if (trylock_buffer(bh)) { if (buffer_uptodate(bh)) { unlock_buffer(bh); brelse(bh); continue; } bh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | REQ_PRIO, bh); continue; } brelse(bh); } } /** * dir_e_read - Reads the entries from a directory into a filldir buffer * @inode: the directory inode * @ctx: actor to feed the entries to * @f_ra: read-ahead parameters * * Returns: errno */ static int dir_e_read(struct inode *inode, struct dir_context *ctx, struct file_ra_state *f_ra) { struct gfs2_inode *dip = GFS2_I(inode); u32 hsize, len = 0; u32 hash, index; __be64 *lp; int copied = 0; int error = 0; unsigned depth = 0; hsize = BIT(dip->i_depth); hash = gfs2_dir_offset2hash(ctx->pos); index = hash >> (32 - dip->i_depth); if (dip->i_hash_cache == NULL) f_ra->start = 0; lp = gfs2_dir_get_hash_table(dip); if (IS_ERR(lp)) return PTR_ERR(lp); gfs2_dir_readahead(inode, hsize, index, f_ra); while (index < hsize) { error = gfs2_dir_read_leaf(inode, ctx, &copied, &depth, be64_to_cpu(lp[index])); if (error) break; len = BIT(dip->i_depth - depth); index = (index & ~(len - 1)) + len; } if (error > 0) error = 0; return error; } int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, struct file_ra_state *f_ra) { struct gfs2_inode *dip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct dirent_gather g; struct gfs2_dirent **darr, *dent; struct buffer_head *dibh; int copied = 0; int error; if (!dip->i_entries) return 0; if (dip->i_diskflags & GFS2_DIF_EXHASH) return dir_e_read(inode, ctx, f_ra); if (!gfs2_is_stuffed(dip)) { gfs2_consist_inode(dip); return -EIO; } error = gfs2_meta_inode_buffer(dip, &dibh); if (error) return error; error = -ENOMEM; /* 96 is max number of dirents which can be stuffed into an inode */ darr = kmalloc_array(96, sizeof(struct gfs2_dirent *), GFP_NOFS); if (darr) { g.pdent = (const struct gfs2_dirent **)darr; g.offset = 0; dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size, gfs2_dirent_gather, NULL, &g); if (IS_ERR(dent)) { error = PTR_ERR(dent); goto out; } if (dip->i_entries != g.offset) { fs_warn(sdp, "Number of entries corrupt in dir %llu, " "ip->i_entries (%u) != g.offset (%u)\n", (unsigned long long)dip->i_no_addr, dip->i_entries, g.offset); gfs2_consist_inode(dip); error = -EIO; goto out; } gfs2_set_cookies(sdp, dibh, 0, darr, dip->i_entries); error = do_filldir_main(dip, ctx, darr, dip->i_entries, 0, &copied); out: kfree(darr); } if (error > 0) error = 0; brelse(dibh); return error; } /** * gfs2_dir_search - Search a directory * @dir: The GFS2 directory inode * @name: The name we are looking up * @fail_on_exist: Fail if the name exists rather than looking it up * * This routine searches a directory for a file or another directory. * Assumes a glock is held on dip. * * Returns: errno */ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name, bool fail_on_exist) { struct buffer_head *bh; struct gfs2_dirent *dent; u64 addr, formal_ino; u16 dtype; dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); if (dent) { struct inode *inode; u16 rahead; if (IS_ERR(dent)) return ERR_CAST(dent); dtype = be16_to_cpu(dent->de_type); rahead = be16_to_cpu(dent->de_rahead); addr = be64_to_cpu(dent->de_inum.no_addr); formal_ino = be64_to_cpu(dent->de_inum.no_formal_ino); brelse(bh); if (fail_on_exist) return ERR_PTR(-EEXIST); inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, GFS2_BLKST_FREE /* ignore */); if (!IS_ERR(inode)) GFS2_I(inode)->i_rahead = rahead; return inode; } return ERR_PTR(-ENOENT); } int gfs2_dir_check(struct inode *dir, const struct qstr *name, const struct gfs2_inode *ip) { struct buffer_head *bh; struct gfs2_dirent *dent; int ret = -ENOENT; dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); if (dent) { if (IS_ERR(dent)) return PTR_ERR(dent); if (ip) { if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr) goto out; if (be64_to_cpu(dent->de_inum.no_formal_ino) != ip->i_no_formal_ino) goto out; if (unlikely(IF2DT(ip->i_inode.i_mode) != be16_to_cpu(dent->de_type))) { gfs2_consist_inode(GFS2_I(dir)); ret = -EIO; goto out; } } ret = 0; out: brelse(bh); } return ret; } /** * dir_new_leaf - Add a new leaf onto hash chain * @inode: The directory * @name: The name we are adding * * This adds a new dir leaf onto an existing leaf when there is not * enough space to add a new dir entry. This is a last resort after * we've expanded the hash table to max size and also split existing * leaf blocks, so it will only occur for very large directories. * * The dist parameter is set to 1 for leaf blocks directly attached * to the hash table, 2 for one layer of indirection, 3 for two layers * etc. We are thus able to tell the difference between an old leaf * with dist set to zero (i.e. "don't know") and a new one where we * set this information for debug/fsck purposes. * * Returns: 0 on success, or -ve on error */ static int dir_new_leaf(struct inode *inode, const struct qstr *name) { struct buffer_head *bh, *obh; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_leaf *leaf, *oleaf; u32 dist = 1; int error; u32 index; u64 bn; index = name->hash >> (32 - ip->i_depth); error = get_first_leaf(ip, index, &obh); if (error) return error; do { dist++; oleaf = (struct gfs2_leaf *)obh->b_data; bn = be64_to_cpu(oleaf->lf_next); if (!bn) break; brelse(obh); error = get_leaf(ip, bn, &obh); if (error) return error; } while(1); gfs2_trans_add_meta(ip->i_gl, obh); leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth)); if (!leaf) { brelse(obh); return -ENOSPC; } leaf->lf_dist = cpu_to_be32(dist); oleaf->lf_next = cpu_to_be64(bh->b_blocknr); brelse(bh); brelse(obh); error = gfs2_meta_inode_buffer(ip, &bh); if (error) return error; gfs2_trans_add_meta(ip->i_gl, bh); gfs2_add_inode_blocks(&ip->i_inode, 1); gfs2_dinode_out(ip, bh->b_data); brelse(bh); return 0; } static u16 gfs2_inode_ra_len(const struct gfs2_inode *ip) { u64 where = ip->i_no_addr + 1; if (ip->i_eattr == where) return 1; return 0; } /** * gfs2_dir_add - Add new filename into directory * @inode: The directory inode * @name: The new name * @nip: The GFS2 inode to be linked in to the directory * @da: The directory addition info * * If the call to gfs2_diradd_alloc_required resulted in there being * no need to allocate any new directory blocks, then it will contain * a pointer to the directory entry and the bh in which it resides. We * can use that without having to repeat the search. If there was no * free space, then we must now create more space. * * Returns: 0 on success, error code on failure */ int gfs2_dir_add(struct inode *inode, const struct qstr *name, const struct gfs2_inode *nip, struct gfs2_diradd *da) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *bh = da->bh; struct gfs2_dirent *dent = da->dent; struct timespec64 tv; struct gfs2_leaf *leaf; int error; while(1) { if (da->bh == NULL) { dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); } if (dent) { if (IS_ERR(dent)) return PTR_ERR(dent); dent = gfs2_init_dirent(inode, dent, name, bh); gfs2_inum_out(nip, dent); dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip)); tv = inode_set_ctime_current(&ip->i_inode); if (ip->i_diskflags & GFS2_DIF_EXHASH) { leaf = (struct gfs2_leaf *)bh->b_data; be16_add_cpu(&leaf->lf_entries, 1); leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); leaf->lf_sec = cpu_to_be64(tv.tv_sec); } da->dent = NULL; da->bh = NULL; brelse(bh); ip->i_entries++; inode_set_mtime_to_ts(&ip->i_inode, tv); if (S_ISDIR(nip->i_inode.i_mode)) inc_nlink(&ip->i_inode); mark_inode_dirty(inode); error = 0; break; } if (!(ip->i_diskflags & GFS2_DIF_EXHASH)) { error = dir_make_exhash(inode); if (error) break; continue; } error = dir_split_leaf(inode, name); if (error == 0) continue; if (error < 0) break; if (ip->i_depth < GFS2_DIR_MAX_DEPTH) { error = dir_double_exhash(ip); if (error) break; error = dir_split_leaf(inode, name); if (error < 0) break; if (error == 0) continue; } error = dir_new_leaf(inode, name); if (!error) continue; error = -ENOSPC; break; } return error; } /** * gfs2_dir_del - Delete a directory entry * @dip: The GFS2 inode * @dentry: The directory entry we want to delete * * Returns: 0 on success, error code on failure */ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) { const struct qstr *name = &dentry->d_name; struct gfs2_dirent *dent, *prev = NULL; struct buffer_head *bh; struct timespec64 tv; /* Returns _either_ the entry (if its first in block) or the previous entry otherwise */ dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh); if (!dent) { gfs2_consist_inode(dip); return -EIO; } if (IS_ERR(dent)) { gfs2_consist_inode(dip); return PTR_ERR(dent); } /* If not first in block, adjust pointers accordingly */ if (gfs2_dirent_find(dent, name, NULL) == 0) { prev = dent; dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len)); } dirent_del(dip, bh, prev, dent); tv = inode_set_ctime_current(&dip->i_inode); if (dip->i_diskflags & GFS2_DIF_EXHASH) { struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; u16 entries = be16_to_cpu(leaf->lf_entries); if (!entries) gfs2_consist_inode(dip); leaf->lf_entries = cpu_to_be16(--entries); leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); leaf->lf_sec = cpu_to_be64(tv.tv_sec); } brelse(bh); if (!dip->i_entries) gfs2_consist_inode(dip); dip->i_entries--; inode_set_mtime_to_ts(&dip->i_inode, tv); if (d_is_dir(dentry)) drop_nlink(&dip->i_inode); mark_inode_dirty(&dip->i_inode); return 0; } /** * gfs2_dir_mvino - Change inode number of directory entry * @dip: The GFS2 directory inode * @filename: the filename to be moved * @nip: the new GFS2 inode * @new_type: the de_type of the new dirent * * This routine changes the inode number of a directory entry. It's used * by rename to change ".." when a directory is moved. * Assumes a glock is held on dvp. * * Returns: errno */ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, const struct gfs2_inode *nip, unsigned int new_type) { struct buffer_head *bh; struct gfs2_dirent *dent; dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh); if (!dent) { gfs2_consist_inode(dip); return -EIO; } if (IS_ERR(dent)) return PTR_ERR(dent); gfs2_trans_add_meta(dip->i_gl, bh); gfs2_inum_out(nip, dent); dent->de_type = cpu_to_be16(new_type); brelse(bh); inode_set_mtime_to_ts(&dip->i_inode, inode_set_ctime_current(&dip->i_inode)); mark_inode_dirty_sync(&dip->i_inode); return 0; } /** * leaf_dealloc - Deallocate a directory leaf * @dip: the directory * @index: the hash table offset in the directory * @len: the number of pointers to this leaf * @leaf_no: the leaf number * @leaf_bh: buffer_head for the starting leaf * @last_dealloc: 1 if this is the final dealloc for the leaf, else 0 * * Returns: errno */ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, u64 leaf_no, struct buffer_head *leaf_bh, int last_dealloc) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_leaf *tmp_leaf; struct gfs2_rgrp_list rlist; struct buffer_head *bh, *dibh; u64 blk, nblk; unsigned int rg_blocks = 0, l_blocks = 0; char *ht; unsigned int x, size = len * sizeof(u64); int error; error = gfs2_rindex_update(sdp); if (error) return error; memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN); if (ht == NULL) ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO); if (!ht) return -ENOMEM; error = gfs2_quota_hold(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (error) goto out; /* Count the number of leaves */ bh = leaf_bh; for (blk = leaf_no; blk; blk = nblk) { if (blk != leaf_no) { error = get_leaf(dip, blk, &bh); if (error) goto out_rlist; } tmp_leaf = (struct gfs2_leaf *)bh->b_data; nblk = be64_to_cpu(tmp_leaf->lf_next); if (blk != leaf_no) brelse(bh); gfs2_rlist_add(dip, &rlist, blk); l_blocks++; } gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE); for (x = 0; x < rlist.rl_rgrps; x++) { struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(rlist.rl_ghs[x].gh_gl); rg_blocks += rgd->rd_length; } error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); if (error) goto out_rlist; error = gfs2_trans_begin(sdp, rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) + RES_DINODE + RES_STATFS + RES_QUOTA, RES_DINODE + l_blocks); if (error) goto out_rg_gunlock; bh = leaf_bh; for (blk = leaf_no; blk; blk = nblk) { struct gfs2_rgrpd *rgd; if (blk != leaf_no) { error = get_leaf(dip, blk, &bh); if (error) goto out_end_trans; } tmp_leaf = (struct gfs2_leaf *)bh->b_data; nblk = be64_to_cpu(tmp_leaf->lf_next); if (blk != leaf_no) brelse(bh); rgd = gfs2_blk2rgrpd(sdp, blk, true); gfs2_free_meta(dip, rgd, blk, 1); gfs2_add_inode_blocks(&dip->i_inode, -1); } error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); if (error != size) { if (error >= 0) error = -EIO; goto out_end_trans; } error = gfs2_meta_inode_buffer(dip, &dibh); if (error) goto out_end_trans; gfs2_trans_add_meta(dip->i_gl, dibh); /* On the last dealloc, make this a regular file in case we crash. (We don't want to free these blocks a second time.) */ if (last_dealloc) dip->i_inode.i_mode = S_IFREG; gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); out_end_trans: gfs2_trans_end(sdp); out_rg_gunlock: gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); out_rlist: gfs2_rlist_free(&rlist); gfs2_quota_unhold(dip); out: kvfree(ht); return error; } /** * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory * @dip: the directory * * Dealloc all on-disk directory leaves to FREEMETA state * Change on-disk inode type to "regular file" * * Returns: errno */ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) { struct buffer_head *bh; struct gfs2_leaf *leaf; u32 hsize, len; u32 index = 0, next_index; __be64 *lp; u64 leaf_no; int error = 0, last; hsize = BIT(dip->i_depth); lp = gfs2_dir_get_hash_table(dip); if (IS_ERR(lp)) return PTR_ERR(lp); while (index < hsize) { leaf_no = be64_to_cpu(lp[index]); if (leaf_no) { error = get_leaf(dip, leaf_no, &bh); if (error) goto out; leaf = (struct gfs2_leaf *)bh->b_data; len = BIT(dip->i_depth - be16_to_cpu(leaf->lf_depth)); next_index = (index & ~(len - 1)) + len; last = ((next_index >= hsize) ? 1 : 0); error = leaf_dealloc(dip, index, len, leaf_no, bh, last); brelse(bh); if (error) goto out; index = next_index; } else index++; } if (index != hsize) { gfs2_consist_inode(dip); error = -EIO; } out: return error; } /** * gfs2_diradd_alloc_required - find if adding entry will require an allocation * @inode: the directory inode being written to * @name: the filename that's going to be added * @da: The structure to return dir alloc info * * Returns: 0 if ok, -ve on error */ int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name, struct gfs2_diradd *da) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); const unsigned int extra = sizeof(struct gfs2_dinode) - sizeof(struct gfs2_leaf); struct gfs2_dirent *dent; struct buffer_head *bh; da->nr_blocks = 0; da->bh = NULL; da->dent = NULL; dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); if (!dent) { da->nr_blocks = sdp->sd_max_dirres; if (!(ip->i_diskflags & GFS2_DIF_EXHASH) && (GFS2_DIRENT_SIZE(name->len) < extra)) da->nr_blocks = 1; return 0; } if (IS_ERR(dent)) return PTR_ERR(dent); if (da->save_loc) { da->bh = bh; da->dent = dent; } else { brelse(bh); } return 0; }
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) International Business Machines Corp., 2006 * Copyright (c) Nokia Corporation, 2006, 2007 * * Author: Artem Bityutskiy (Битюцкий Артём) */ /* * UBI input/output sub-system. * * This sub-system provides a uniform way to work with all kinds of the * underlying MTD devices. It also implements handy functions for reading and * writing UBI headers. * * We are trying to have a paranoid mindset and not to trust to what we read * from the flash media in order to be more secure and robust. So this * sub-system validates every single header it reads from the flash media. * * Some words about how the eraseblock headers are stored. * * The erase counter header is always stored at offset zero. By default, the * VID header is stored after the EC header at the closest aligned offset * (i.e. aligned to the minimum I/O unit size). Data starts next to the VID * header at the closest aligned offset. But this default layout may be * changed. For example, for different reasons (e.g., optimization) UBI may be * asked to put the VID header at further offset, and even at an unaligned * offset. Of course, if the offset of the VID header is unaligned, UBI adds * proper padding in front of it. Data offset may also be changed but it has to * be aligned. * * About minimal I/O units. In general, UBI assumes flash device model where * there is only one minimal I/O unit size. E.g., in case of NOR flash it is 1, * in case of NAND flash it is a NAND page, etc. This is reported by MTD in the * @ubi->mtd->writesize field. But as an exception, UBI admits use of another * (smaller) minimal I/O unit size for EC and VID headers to make it possible * to do different optimizations. * * This is extremely useful in case of NAND flashes which admit of several * write operations to one NAND page. In this case UBI can fit EC and VID * headers at one NAND page. Thus, UBI may use "sub-page" size as the minimal * I/O unit for the headers (the @ubi->hdrs_min_io_size field). But it still * reports NAND page size (@ubi->min_io_size) as a minimal I/O unit for the UBI * users. * * Example: some Samsung NANDs with 2KiB pages allow 4x 512-byte writes, so * although the minimal I/O unit is 2K, UBI uses 512 bytes for EC and VID * headers. * * Q: why not just to treat sub-page as a minimal I/O unit of this flash * device, e.g., make @ubi->min_io_size = 512 in the example above? * * A: because when writing a sub-page, MTD still writes a full 2K page but the * bytes which are not relevant to the sub-page are 0xFF. So, basically, * writing 4x512 sub-pages is 4 times slower than writing one 2KiB NAND page. * Thus, we prefer to use sub-pages only for EC and VID headers. * * As it was noted above, the VID header may start at a non-aligned offset. * For example, in case of a 2KiB page NAND flash with a 512 bytes sub-page, * the VID header may reside at offset 1984 which is the last 64 bytes of the * last sub-page (EC header is always at offset zero). This causes some * difficulties when reading and writing VID headers. * * Suppose we have a 64-byte buffer and we read a VID header at it. We change * the data and want to write this VID header out. As we can only write in * 512-byte chunks, we have to allocate one more buffer and copy our VID header * to offset 448 of this buffer. * * The I/O sub-system does the following trick in order to avoid this extra * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. * When the VID header is being written out, it shifts the VID header pointer * back and writes the whole sub-page. */ #include <linux/crc32.h> #include <linux/err.h> #include <linux/slab.h> #include "ubi.h" static int self_check_not_bad(const struct ubi_device *ubi, int pnum); static int self_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum); static int self_check_ec_hdr(const struct ubi_device *ubi, int pnum, const struct ubi_ec_hdr *ec_hdr); static int self_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum); static int self_check_vid_hdr(const struct ubi_device *ubi, int pnum, const struct ubi_vid_hdr *vid_hdr); static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len); /** * ubi_io_read - read data from a physical eraseblock. * @ubi: UBI device description object * @buf: buffer where to store the read data * @pnum: physical eraseblock number to read from * @offset: offset within the physical eraseblock from where to read * @len: how many bytes to read * * This function reads data from offset @offset of physical eraseblock @pnum * and stores the read data in the @buf buffer. The following return codes are * possible: * * o %0 if all the requested data were successfully read; * o %UBI_IO_BITFLIPS if all the requested data were successfully read, but * correctable bit-flips were detected; this is harmless but may indicate * that this eraseblock may become bad soon (but do not have to); * o %-EBADMSG if the MTD subsystem reported about data integrity problems, for * example it can be an ECC error in case of NAND; this most probably means * that the data is corrupted; * o %-EIO if some I/O error occurred; * o other negative error codes in case of other errors. */ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, int len) { int err, retries = 0; size_t read; loff_t addr; dbg_io("read %d bytes from PEB %d:%d", len, pnum, offset); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); ubi_assert(offset >= 0 && offset + len <= ubi->peb_size); ubi_assert(len > 0); err = self_check_not_bad(ubi, pnum); if (err) return err; /* * Deliberately corrupt the buffer to improve robustness. Indeed, if we * do not do this, the following may happen: * 1. The buffer contains data from previous operation, e.g., read from * another PEB previously. The data looks like expected, e.g., if we * just do not read anything and return - the caller would not * notice this. E.g., if we are reading a VID header, the buffer may * contain a valid VID header from another PEB. * 2. The driver is buggy and returns us success or -EBADMSG or * -EUCLEAN, but it does not actually put any data to the buffer. * * This may confuse UBI or upper layers - they may think the buffer * contains valid data while in fact it is just old data. This is * especially possible because UBI (and UBIFS) relies on CRC, and * treats data as correct even in case of ECC errors if the CRC is * correct. * * Try to prevent this situation by changing the first byte of the * buffer. */ *((uint8_t *)buf) ^= 0xFF; addr = (loff_t)pnum * ubi->peb_size + offset; retry: err = mtd_read(ubi->mtd, addr, len, &read, buf); if (err) { const char *errstr = mtd_is_eccerr(err) ? " (ECC error)" : ""; if (mtd_is_bitflip(err)) { /* * -EUCLEAN is reported if there was a bit-flip which * was corrected, so this is harmless. * * We do not report about it here unless debugging is * enabled. A corresponding message will be printed * later, when it is has been scrubbed. */ ubi_msg(ubi, "fixable bit-flip detected at PEB %d", pnum); ubi_assert(len == read); return UBI_IO_BITFLIPS; } if (retries++ < UBI_IO_RETRIES) { ubi_warn(ubi, "error %d%s while reading %d bytes from PEB %d:%d, read only %zd bytes, retry", err, errstr, len, pnum, offset, read); yield(); goto retry; } ubi_err(ubi, "error %d%s while reading %d bytes from PEB %d:%d, read %zd bytes", err, errstr, len, pnum, offset, read); dump_stack(); /* * The driver should never return -EBADMSG if it failed to read * all the requested data. But some buggy drivers might do * this, so we change it to -EIO. */ if (read != len && mtd_is_eccerr(err)) { ubi_assert(0); err = -EIO; } } else { ubi_assert(len == read); if (ubi_dbg_is_bitflip(ubi)) { dbg_gen("bit-flip (emulated)"); return UBI_IO_BITFLIPS; } if (ubi_dbg_is_read_failure(ubi, MASK_READ_FAILURE)) { ubi_warn(ubi, "cannot read %d bytes from PEB %d:%d (emulated)", len, pnum, offset); return -EIO; } if (ubi_dbg_is_eccerr(ubi)) { ubi_warn(ubi, "ECC error (emulated) while reading %d bytes from PEB %d:%d, read %zd bytes", len, pnum, offset, read); return -EBADMSG; } } return err; } /** * ubi_io_write - write data to a physical eraseblock. * @ubi: UBI device description object * @buf: buffer with the data to write * @pnum: physical eraseblock number to write to * @offset: offset within the physical eraseblock where to write * @len: how many bytes to write * * This function writes @len bytes of data from buffer @buf to offset @offset * of physical eraseblock @pnum. If all the data were successfully written, * zero is returned. If an error occurred, this function returns a negative * error code. If %-EIO is returned, the physical eraseblock most probably went * bad. * * Note, in case of an error, it is possible that something was still written * to the flash media, but may be some garbage. */ int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len) { int err; size_t written; loff_t addr; dbg_io("write %d bytes to PEB %d:%d", len, pnum, offset); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); ubi_assert(offset >= 0 && offset + len <= ubi->peb_size); ubi_assert(offset % ubi->hdrs_min_io_size == 0); ubi_assert(len > 0 && len % ubi->hdrs_min_io_size == 0); if (ubi->ro_mode) { ubi_err(ubi, "read-only mode"); return -EROFS; } err = self_check_not_bad(ubi, pnum); if (err) return err; /* The area we are writing to has to contain all 0xFF bytes */ err = ubi_self_check_all_ff(ubi, pnum, offset, len); if (err) return err; if (offset >= ubi->leb_start) { /* * We write to the data area of the physical eraseblock. Make * sure it has valid EC and VID headers. */ err = self_check_peb_ec_hdr(ubi, pnum); if (err) return err; err = self_check_peb_vid_hdr(ubi, pnum); if (err) return err; } if (ubi_dbg_is_write_failure(ubi)) { ubi_err(ubi, "cannot write %d bytes to PEB %d:%d (emulated)", len, pnum, offset); dump_stack(); return -EIO; } addr = (loff_t)pnum * ubi->peb_size + offset; err = mtd_write(ubi->mtd, addr, len, &written, buf); if (err) { ubi_err(ubi, "error %d while writing %d bytes to PEB %d:%d, written %zd bytes", err, len, pnum, offset, written); dump_stack(); ubi_dump_flash(ubi, pnum, offset, len); } else ubi_assert(written == len); if (!err) { err = self_check_write(ubi, buf, pnum, offset, len); if (err) return err; /* * Since we always write sequentially, the rest of the PEB has * to contain only 0xFF bytes. */ offset += len; len = ubi->peb_size - offset; if (len) err = ubi_self_check_all_ff(ubi, pnum, offset, len); } return err; } /** * do_sync_erase - synchronously erase a physical eraseblock. * @ubi: UBI device description object * @pnum: the physical eraseblock number to erase * * This function synchronously erases physical eraseblock @pnum and returns * zero in case of success and a negative error code in case of failure. If * %-EIO is returned, the physical eraseblock most probably went bad. */ static int do_sync_erase(struct ubi_device *ubi, int pnum) { int err, retries = 0; struct erase_info ei; dbg_io("erase PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); if (ubi->ro_mode) { ubi_err(ubi, "read-only mode"); return -EROFS; } retry: memset(&ei, 0, sizeof(struct erase_info)); ei.addr = (loff_t)pnum * ubi->peb_size; ei.len = ubi->peb_size; err = mtd_erase(ubi->mtd, &ei); if (err) { if (retries++ < UBI_IO_RETRIES) { ubi_warn(ubi, "error %d while erasing PEB %d, retry", err, pnum); yield(); goto retry; } ubi_err(ubi, "cannot erase PEB %d, error %d", pnum, err); dump_stack(); return err; } err = ubi_self_check_all_ff(ubi, pnum, 0, ubi->peb_size); if (err) return err; if (ubi_dbg_is_erase_failure(ubi)) { ubi_err(ubi, "cannot erase PEB %d (emulated)", pnum); return -EIO; } return 0; } /* Patterns to write to a physical eraseblock when torturing it */ static uint8_t patterns[] = {0xa5, 0x5a, 0x0}; /** * torture_peb - test a supposedly bad physical eraseblock. * @ubi: UBI device description object * @pnum: the physical eraseblock number to test * * This function returns %-EIO if the physical eraseblock did not pass the * test, a positive number of erase operations done if the test was * successfully passed, and other negative error codes in case of other errors. */ static int torture_peb(struct ubi_device *ubi, int pnum) { int err, i, patt_count; ubi_msg(ubi, "run torture test for PEB %d", pnum); patt_count = ARRAY_SIZE(patterns); ubi_assert(patt_count > 0); mutex_lock(&ubi->buf_mutex); for (i = 0; i < patt_count; i++) { err = do_sync_erase(ubi, pnum); if (err) goto out; /* Make sure the PEB contains only 0xFF bytes */ err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); if (err) goto out; err = ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->peb_size); if (err == 0) { ubi_err(ubi, "erased PEB %d, but a non-0xFF byte found", pnum); err = -EIO; goto out; } /* Write a pattern and check it */ memset(ubi->peb_buf, patterns[i], ubi->peb_size); err = ubi_io_write(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); if (err) goto out; memset(ubi->peb_buf, ~patterns[i], ubi->peb_size); err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); if (err) goto out; err = ubi_check_pattern(ubi->peb_buf, patterns[i], ubi->peb_size); if (err == 0) { ubi_err(ubi, "pattern %x checking failed for PEB %d", patterns[i], pnum); err = -EIO; goto out; } } err = patt_count; ubi_msg(ubi, "PEB %d passed torture test, do not mark it as bad", pnum); out: mutex_unlock(&ubi->buf_mutex); if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) { /* * If a bit-flip or data integrity error was detected, the test * has not passed because it happened on a freshly erased * physical eraseblock which means something is wrong with it. */ ubi_err(ubi, "read problems on freshly erased PEB %d, must be bad", pnum); err = -EIO; } return err; } /** * nor_erase_prepare - prepare a NOR flash PEB for erasure. * @ubi: UBI device description object * @pnum: physical eraseblock number to prepare * * NOR flash, or at least some of them, have peculiar embedded PEB erasure * algorithm: the PEB is first filled with zeroes, then it is erased. And * filling with zeroes starts from the end of the PEB. This was observed with * Spansion S29GL512N NOR flash. * * This means that in case of a power cut we may end up with intact data at the * beginning of the PEB, and all zeroes at the end of PEB. In other words, the * EC and VID headers are OK, but a large chunk of data at the end of PEB is * zeroed. This makes UBI mistakenly treat this PEB as used and associate it * with an LEB, which leads to subsequent failures (e.g., UBIFS fails). * * This function is called before erasing NOR PEBs and it zeroes out EC and VID * magic numbers in order to invalidate them and prevent the failures. Returns * zero in case of success and a negative error code in case of failure. */ static int nor_erase_prepare(struct ubi_device *ubi, int pnum) { int err; size_t written; loff_t addr; uint32_t data = 0; struct ubi_ec_hdr ec_hdr; struct ubi_vid_io_buf vidb; /* * Note, we cannot generally define VID header buffers on stack, * because of the way we deal with these buffers (see the header * comment in this file). But we know this is a NOR-specific piece of * code, so we can do this. But yes, this is error-prone and we should * (pre-)allocate VID header buffer instead. */ struct ubi_vid_hdr vid_hdr; /* * If VID or EC is valid, we have to corrupt them before erasing. * It is important to first invalidate the EC header, and then the VID * header. Otherwise a power cut may lead to valid EC header and * invalid VID header, in which case UBI will treat this PEB as * corrupted and will try to preserve it, and print scary warnings. */ addr = (loff_t)pnum * ubi->peb_size; err = ubi_io_read_ec_hdr(ubi, pnum, &ec_hdr, 0); if (err != UBI_IO_BAD_HDR_EBADMSG && err != UBI_IO_BAD_HDR && err != UBI_IO_FF){ err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data); if(err) goto error; } ubi_init_vid_buf(ubi, &vidb, &vid_hdr); ubi_assert(&vid_hdr == ubi_get_vid_hdr(&vidb)); err = ubi_io_read_vid_hdr(ubi, pnum, &vidb, 0); if (err != UBI_IO_BAD_HDR_EBADMSG && err != UBI_IO_BAD_HDR && err != UBI_IO_FF){ addr += ubi->vid_hdr_aloffset; err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data); if (err) goto error; } return 0; error: /* * The PEB contains a valid VID or EC header, but we cannot invalidate * it. Supposedly the flash media or the driver is screwed up, so * return an error. */ ubi_err(ubi, "cannot invalidate PEB %d, write returned %d", pnum, err); ubi_dump_flash(ubi, pnum, 0, ubi->peb_size); return -EIO; } /** * ubi_io_sync_erase - synchronously erase a physical eraseblock. * @ubi: UBI device description object * @pnum: physical eraseblock number to erase * @torture: if this physical eraseblock has to be tortured * * This function synchronously erases physical eraseblock @pnum. If @torture * flag is not zero, the physical eraseblock is checked by means of writing * different patterns to it and reading them back. If the torturing is enabled, * the physical eraseblock is erased more than once. * * This function returns the number of erasures made in case of success, %-EIO * if the erasure failed or the torturing test failed, and other negative error * codes in case of other errors. Note, %-EIO means that the physical * eraseblock is bad. */ int ubi_io_sync_erase(struct ubi_device *ubi, int pnum, int torture) { int err, ret = 0; ubi_assert(pnum >= 0 && pnum < ubi->peb_count); err = self_check_not_bad(ubi, pnum); if (err != 0) return err; if (ubi->ro_mode) { ubi_err(ubi, "read-only mode"); return -EROFS; } /* * If the flash is ECC-ed then we have to erase the ECC block before we * can write to it. But the write is in preparation to an erase in the * first place. This means we cannot zero out EC and VID before the * erase and we just have to hope the flash starts erasing from the * start of the page. */ if (ubi->nor_flash && ubi->mtd->writesize == 1) { err = nor_erase_prepare(ubi, pnum); if (err) return err; } if (torture) { ret = torture_peb(ubi, pnum); if (ret < 0) return ret; } err = do_sync_erase(ubi, pnum); if (err) return err; return ret + 1; } /** * ubi_io_is_bad - check if a physical eraseblock is bad. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * * This function returns a positive number if the physical eraseblock is bad, * zero if not, and a negative error code if an error occurred. */ int ubi_io_is_bad(const struct ubi_device *ubi, int pnum) { struct mtd_info *mtd = ubi->mtd; ubi_assert(pnum >= 0 && pnum < ubi->peb_count); if (ubi->bad_allowed) { int ret; ret = mtd_block_isbad(mtd, (loff_t)pnum * ubi->peb_size); if (ret < 0) ubi_err(ubi, "error %d while checking if PEB %d is bad", ret, pnum); else if (ret) dbg_io("PEB %d is bad", pnum); return ret; } return 0; } /** * ubi_io_mark_bad - mark a physical eraseblock as bad. * @ubi: UBI device description object * @pnum: the physical eraseblock number to mark * * This function returns zero in case of success and a negative error code in * case of failure. */ int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum) { int err; struct mtd_info *mtd = ubi->mtd; ubi_assert(pnum >= 0 && pnum < ubi->peb_count); if (ubi->ro_mode) { ubi_err(ubi, "read-only mode"); return -EROFS; } if (!ubi->bad_allowed) return 0; err = mtd_block_markbad(mtd, (loff_t)pnum * ubi->peb_size); if (err) ubi_err(ubi, "cannot mark PEB %d bad, error %d", pnum, err); return err; } /** * validate_ec_hdr - validate an erase counter header. * @ubi: UBI device description object * @ec_hdr: the erase counter header to check * * This function returns zero if the erase counter header is OK, and %1 if * not. */ static int validate_ec_hdr(const struct ubi_device *ubi, const struct ubi_ec_hdr *ec_hdr) { long long ec; int vid_hdr_offset, leb_start; ec = be64_to_cpu(ec_hdr->ec); vid_hdr_offset = be32_to_cpu(ec_hdr->vid_hdr_offset); leb_start = be32_to_cpu(ec_hdr->data_offset); if (ec_hdr->version != UBI_VERSION) { ubi_err(ubi, "node with incompatible UBI version found: this UBI version is %d, image version is %d", UBI_VERSION, (int)ec_hdr->version); goto bad; } if (vid_hdr_offset != ubi->vid_hdr_offset) { ubi_err(ubi, "bad VID header offset %d, expected %d", vid_hdr_offset, ubi->vid_hdr_offset); goto bad; } if (leb_start != ubi->leb_start) { ubi_err(ubi, "bad data offset %d, expected %d", leb_start, ubi->leb_start); goto bad; } if (ec < 0 || ec > UBI_MAX_ERASECOUNTER) { ubi_err(ubi, "bad erase counter %lld", ec); goto bad; } return 0; bad: ubi_err(ubi, "bad EC header"); ubi_dump_ec_hdr(ec_hdr); dump_stack(); return 1; } /** * ubi_io_read_ec_hdr - read and check an erase counter header. * @ubi: UBI device description object * @pnum: physical eraseblock to read from * @ec_hdr: a &struct ubi_ec_hdr object where to store the read erase counter * header * @verbose: be verbose if the header is corrupted or was not found * * This function reads erase counter header from physical eraseblock @pnum and * stores it in @ec_hdr. This function also checks CRC checksum of the read * erase counter header. The following codes may be returned: * * o %0 if the CRC checksum is correct and the header was successfully read; * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected * and corrected by the flash driver; this is harmless but may indicate that * this eraseblock may become bad soon (but may be not); * o %UBI_IO_BAD_HDR if the erase counter header is corrupted (a CRC error); * o %UBI_IO_BAD_HDR_EBADMSG is the same as %UBI_IO_BAD_HDR, but there also was * a data integrity error (uncorrectable ECC error in case of NAND); * o %UBI_IO_FF if only 0xFF bytes were read (the PEB is supposedly empty) * o a negative error code in case of failure. */ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, struct ubi_ec_hdr *ec_hdr, int verbose) { int err, read_err; uint32_t crc, magic, hdr_crc; dbg_io("read EC header from PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); read_err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); if (read_err) { if (read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) return read_err; /* * We read all the data, but either a correctable bit-flip * occurred, or MTD reported a data integrity error * (uncorrectable ECC error in case of NAND). The former is * harmless, the later may mean that the read data is * corrupted. But we have a CRC check-sum and we will detect * this. If the EC header is still OK, we just report this as * there was a bit-flip, to force scrubbing. */ } magic = be32_to_cpu(ec_hdr->magic); if (magic != UBI_EC_HDR_MAGIC) { if (mtd_is_eccerr(read_err)) return UBI_IO_BAD_HDR_EBADMSG; /* * The magic field is wrong. Let's check if we have read all * 0xFF. If yes, this physical eraseblock is assumed to be * empty. */ if (ubi_check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) { /* The physical eraseblock is supposedly empty */ if (verbose) ubi_warn(ubi, "no EC header found at PEB %d, only 0xFF bytes", pnum); dbg_bld("no EC header found at PEB %d, only 0xFF bytes", pnum); if (!read_err) return UBI_IO_FF; else return UBI_IO_FF_BITFLIPS; } /* * This is not a valid erase counter header, and these are not * 0xFF bytes. Report that the header is corrupted. */ if (verbose) { ubi_warn(ubi, "bad magic number at PEB %d: %08x instead of %08x", pnum, magic, UBI_EC_HDR_MAGIC); ubi_dump_ec_hdr(ec_hdr); } dbg_bld("bad magic number at PEB %d: %08x instead of %08x", pnum, magic, UBI_EC_HDR_MAGIC); return UBI_IO_BAD_HDR; } crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); hdr_crc = be32_to_cpu(ec_hdr->hdr_crc); if (hdr_crc != crc) { if (verbose) { ubi_warn(ubi, "bad EC header CRC at PEB %d, calculated %#08x, read %#08x", pnum, crc, hdr_crc); ubi_dump_ec_hdr(ec_hdr); } dbg_bld("bad EC header CRC at PEB %d, calculated %#08x, read %#08x", pnum, crc, hdr_crc); if (!read_err) return UBI_IO_BAD_HDR; else return UBI_IO_BAD_HDR_EBADMSG; } /* And of course validate what has just been read from the media */ err = validate_ec_hdr(ubi, ec_hdr); if (err) { ubi_err(ubi, "validation failed for PEB %d", pnum); return -EINVAL; } /* * If there was %-EBADMSG, but the header CRC is still OK, report about * a bit-flip to force scrubbing on this PEB. */ if (read_err) return UBI_IO_BITFLIPS; if (ubi_dbg_is_read_failure(ubi, MASK_READ_FAILURE_EC)) { ubi_warn(ubi, "cannot read EC header from PEB %d (emulated)", pnum); return -EIO; } if (ubi_dbg_is_ff(ubi, MASK_IO_FF_EC)) { ubi_warn(ubi, "bit-all-ff (emulated)"); return UBI_IO_FF; } if (ubi_dbg_is_ff_bitflips(ubi, MASK_IO_FF_BITFLIPS_EC)) { ubi_warn(ubi, "bit-all-ff with error reported by MTD driver (emulated)"); return UBI_IO_FF_BITFLIPS; } if (ubi_dbg_is_bad_hdr(ubi, MASK_BAD_HDR_EC)) { ubi_warn(ubi, "bad_hdr (emulated)"); return UBI_IO_BAD_HDR; } if (ubi_dbg_is_bad_hdr_ebadmsg(ubi, MASK_BAD_HDR_EBADMSG_EC)) { ubi_warn(ubi, "bad_hdr with ECC error (emulated)"); return UBI_IO_BAD_HDR_EBADMSG; } return 0; } /** * ubi_io_write_ec_hdr - write an erase counter header. * @ubi: UBI device description object * @pnum: physical eraseblock to write to * @ec_hdr: the erase counter header to write * * This function writes erase counter header described by @ec_hdr to physical * eraseblock @pnum. It also fills most fields of @ec_hdr before writing, so * the caller do not have to fill them. Callers must only fill the @ec_hdr->ec * field. * * This function returns zero in case of success and a negative error code in * case of failure. If %-EIO is returned, the physical eraseblock most probably * went bad. */ int ubi_io_write_ec_hdr(struct ubi_device *ubi, int pnum, struct ubi_ec_hdr *ec_hdr) { int err; uint32_t crc; dbg_io("write EC header to PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); ec_hdr->magic = cpu_to_be32(UBI_EC_HDR_MAGIC); ec_hdr->version = UBI_VERSION; ec_hdr->vid_hdr_offset = cpu_to_be32(ubi->vid_hdr_offset); ec_hdr->data_offset = cpu_to_be32(ubi->leb_start); ec_hdr->image_seq = cpu_to_be32(ubi->image_seq); crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); ec_hdr->hdr_crc = cpu_to_be32(crc); err = self_check_ec_hdr(ubi, pnum, ec_hdr); if (err) return err; if (ubi_dbg_is_power_cut(ubi, MASK_POWER_CUT_EC)) { ubi_warn(ubi, "emulating a power cut when writing EC header"); ubi_ro_mode(ubi); return -EROFS; } err = ubi_io_write(ubi, ec_hdr, pnum, 0, ubi->ec_hdr_alsize); return err; } /** * validate_vid_hdr - validate a volume identifier header. * @ubi: UBI device description object * @vid_hdr: the volume identifier header to check * * This function checks that data stored in the volume identifier header * @vid_hdr. Returns zero if the VID header is OK and %1 if not. */ static int validate_vid_hdr(const struct ubi_device *ubi, const struct ubi_vid_hdr *vid_hdr) { int vol_type = vid_hdr->vol_type; int copy_flag = vid_hdr->copy_flag; int vol_id = be32_to_cpu(vid_hdr->vol_id); int lnum = be32_to_cpu(vid_hdr->lnum); int compat = vid_hdr->compat; int data_size = be32_to_cpu(vid_hdr->data_size); int used_ebs = be32_to_cpu(vid_hdr->used_ebs); int data_pad = be32_to_cpu(vid_hdr->data_pad); int data_crc = be32_to_cpu(vid_hdr->data_crc); int usable_leb_size = ubi->leb_size - data_pad; if (copy_flag != 0 && copy_flag != 1) { ubi_err(ubi, "bad copy_flag"); goto bad; } if (vol_id < 0 || lnum < 0 || data_size < 0 || used_ebs < 0 || data_pad < 0) { ubi_err(ubi, "negative values"); goto bad; } if (vol_id >= UBI_MAX_VOLUMES && vol_id < UBI_INTERNAL_VOL_START) { ubi_err(ubi, "bad vol_id"); goto bad; } if (vol_id < UBI_INTERNAL_VOL_START && compat != 0) { ubi_err(ubi, "bad compat"); goto bad; } if (vol_id >= UBI_INTERNAL_VOL_START && compat != UBI_COMPAT_DELETE && compat != UBI_COMPAT_RO && compat != UBI_COMPAT_PRESERVE && compat != UBI_COMPAT_REJECT) { ubi_err(ubi, "bad compat"); goto bad; } if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { ubi_err(ubi, "bad vol_type"); goto bad; } if (data_pad >= ubi->leb_size / 2) { ubi_err(ubi, "bad data_pad"); goto bad; } if (data_size > ubi->leb_size) { ubi_err(ubi, "bad data_size"); goto bad; } if (vol_type == UBI_VID_STATIC) { /* * Although from high-level point of view static volumes may * contain zero bytes of data, but no VID headers can contain * zero at these fields, because they empty volumes do not have * mapped logical eraseblocks. */ if (used_ebs == 0) { ubi_err(ubi, "zero used_ebs"); goto bad; } if (data_size == 0) { ubi_err(ubi, "zero data_size"); goto bad; } if (lnum < used_ebs - 1) { if (data_size != usable_leb_size) { ubi_err(ubi, "bad data_size"); goto bad; } } else if (lnum > used_ebs - 1) { ubi_err(ubi, "too high lnum"); goto bad; } } else { if (copy_flag == 0) { if (data_crc != 0) { ubi_err(ubi, "non-zero data CRC"); goto bad; } if (data_size != 0) { ubi_err(ubi, "non-zero data_size"); goto bad; } } else { if (data_size == 0) { ubi_err(ubi, "zero data_size of copy"); goto bad; } } if (used_ebs != 0) { ubi_err(ubi, "bad used_ebs"); goto bad; } } return 0; bad: ubi_err(ubi, "bad VID header"); ubi_dump_vid_hdr(vid_hdr); dump_stack(); return 1; } /** * ubi_io_read_vid_hdr - read and check a volume identifier header. * @ubi: UBI device description object * @pnum: physical eraseblock number to read from * @vidb: the volume identifier buffer to store data in * @verbose: be verbose if the header is corrupted or wasn't found * * This function reads the volume identifier header from physical eraseblock * @pnum and stores it in @vidb. It also checks CRC checksum of the read * volume identifier header. The error codes are the same as in * 'ubi_io_read_ec_hdr()'. * * Note, the implementation of this function is also very similar to * 'ubi_io_read_ec_hdr()', so refer commentaries in 'ubi_io_read_ec_hdr()'. */ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, struct ubi_vid_io_buf *vidb, int verbose) { int err, read_err; uint32_t crc, magic, hdr_crc; struct ubi_vid_hdr *vid_hdr = ubi_get_vid_hdr(vidb); void *p = vidb->buffer; dbg_io("read VID header from PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, ubi->vid_hdr_shift + UBI_VID_HDR_SIZE); if (read_err && read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) return read_err; magic = be32_to_cpu(vid_hdr->magic); if (magic != UBI_VID_HDR_MAGIC) { if (mtd_is_eccerr(read_err)) return UBI_IO_BAD_HDR_EBADMSG; if (ubi_check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { if (verbose) ubi_warn(ubi, "no VID header found at PEB %d, only 0xFF bytes", pnum); dbg_bld("no VID header found at PEB %d, only 0xFF bytes", pnum); if (!read_err) return UBI_IO_FF; else return UBI_IO_FF_BITFLIPS; } if (verbose) { ubi_warn(ubi, "bad magic number at PEB %d: %08x instead of %08x", pnum, magic, UBI_VID_HDR_MAGIC); ubi_dump_vid_hdr(vid_hdr); } dbg_bld("bad magic number at PEB %d: %08x instead of %08x", pnum, magic, UBI_VID_HDR_MAGIC); return UBI_IO_BAD_HDR; } crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); hdr_crc = be32_to_cpu(vid_hdr->hdr_crc); if (hdr_crc != crc) { if (verbose) { ubi_warn(ubi, "bad CRC at PEB %d, calculated %#08x, read %#08x", pnum, crc, hdr_crc); ubi_dump_vid_hdr(vid_hdr); } dbg_bld("bad CRC at PEB %d, calculated %#08x, read %#08x", pnum, crc, hdr_crc); if (!read_err) return UBI_IO_BAD_HDR; else return UBI_IO_BAD_HDR_EBADMSG; } err = validate_vid_hdr(ubi, vid_hdr); if (err) { ubi_err(ubi, "validation failed for PEB %d", pnum); return -EINVAL; } if (read_err) return UBI_IO_BITFLIPS; if (ubi_dbg_is_read_failure(ubi, MASK_READ_FAILURE_VID)) { ubi_warn(ubi, "cannot read VID header from PEB %d (emulated)", pnum); return -EIO; } if (ubi_dbg_is_ff(ubi, MASK_IO_FF_VID)) { ubi_warn(ubi, "bit-all-ff (emulated)"); return UBI_IO_FF; } if (ubi_dbg_is_ff_bitflips(ubi, MASK_IO_FF_BITFLIPS_VID)) { ubi_warn(ubi, "bit-all-ff with error reported by MTD driver (emulated)"); return UBI_IO_FF_BITFLIPS; } if (ubi_dbg_is_bad_hdr(ubi, MASK_BAD_HDR_VID)) { ubi_warn(ubi, "bad_hdr (emulated)"); return UBI_IO_BAD_HDR; } if (ubi_dbg_is_bad_hdr_ebadmsg(ubi, MASK_BAD_HDR_EBADMSG_VID)) { ubi_warn(ubi, "bad_hdr with ECC error (emulated)"); return UBI_IO_BAD_HDR_EBADMSG; } return 0; } /** * ubi_io_write_vid_hdr - write a volume identifier header. * @ubi: UBI device description object * @pnum: the physical eraseblock number to write to * @vidb: the volume identifier buffer to write * * This function writes the volume identifier header described by @vid_hdr to * physical eraseblock @pnum. This function automatically fills the * @vidb->hdr->magic and the @vidb->hdr->version fields, as well as calculates * header CRC checksum and stores it at vidb->hdr->hdr_crc. * * This function returns zero in case of success and a negative error code in * case of failure. If %-EIO is returned, the physical eraseblock probably went * bad. */ int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, struct ubi_vid_io_buf *vidb) { struct ubi_vid_hdr *vid_hdr = ubi_get_vid_hdr(vidb); int err; uint32_t crc; void *p = vidb->buffer; dbg_io("write VID header to PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); err = self_check_peb_ec_hdr(ubi, pnum); if (err) return err; vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC); vid_hdr->version = UBI_VERSION; crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); vid_hdr->hdr_crc = cpu_to_be32(crc); err = self_check_vid_hdr(ubi, pnum, vid_hdr); if (err) return err; if (ubi_dbg_is_power_cut(ubi, MASK_POWER_CUT_VID)) { ubi_warn(ubi, "emulating a power cut when writing VID header"); ubi_ro_mode(ubi); return -EROFS; } err = ubi_io_write(ubi, p, pnum, ubi->vid_hdr_aloffset, ubi->vid_hdr_alsize); return err; } /** * self_check_not_bad - ensure that a physical eraseblock is not bad. * @ubi: UBI device description object * @pnum: physical eraseblock number to check * * This function returns zero if the physical eraseblock is good, %-EINVAL if * it is bad and a negative error code if an error occurred. */ static int self_check_not_bad(const struct ubi_device *ubi, int pnum) { int err; if (!ubi_dbg_chk_io(ubi)) return 0; err = ubi_io_is_bad(ubi, pnum); if (!err) return err; ubi_err(ubi, "self-check failed for PEB %d", pnum); dump_stack(); return err > 0 ? -EINVAL : err; } /** * self_check_ec_hdr - check if an erase counter header is all right. * @ubi: UBI device description object * @pnum: physical eraseblock number the erase counter header belongs to * @ec_hdr: the erase counter header to check * * This function returns zero if the erase counter header contains valid * values, and %-EINVAL if not. */ static int self_check_ec_hdr(const struct ubi_device *ubi, int pnum, const struct ubi_ec_hdr *ec_hdr) { int err; uint32_t magic; if (!ubi_dbg_chk_io(ubi)) return 0; magic = be32_to_cpu(ec_hdr->magic); if (magic != UBI_EC_HDR_MAGIC) { ubi_err(ubi, "bad magic %#08x, must be %#08x", magic, UBI_EC_HDR_MAGIC); goto fail; } err = validate_ec_hdr(ubi, ec_hdr); if (err) { ubi_err(ubi, "self-check failed for PEB %d", pnum); goto fail; } return 0; fail: ubi_dump_ec_hdr(ec_hdr); dump_stack(); return -EINVAL; } /** * self_check_peb_ec_hdr - check erase counter header. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * * This function returns zero if the erase counter header is all right and * a negative error code if not or if an error occurred. */ static int self_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) { int err; uint32_t crc, hdr_crc; struct ubi_ec_hdr *ec_hdr; if (!ubi_dbg_chk_io(ubi)) return 0; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); if (!ec_hdr) return -ENOMEM; err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) goto exit; crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); hdr_crc = be32_to_cpu(ec_hdr->hdr_crc); if (hdr_crc != crc) { ubi_err(ubi, "bad CRC, calculated %#08x, read %#08x", crc, hdr_crc); ubi_err(ubi, "self-check failed for PEB %d", pnum); ubi_dump_ec_hdr(ec_hdr); dump_stack(); err = -EINVAL; goto exit; } err = self_check_ec_hdr(ubi, pnum, ec_hdr); exit: kfree(ec_hdr); return err; } /** * self_check_vid_hdr - check that a volume identifier header is all right. * @ubi: UBI device description object * @pnum: physical eraseblock number the volume identifier header belongs to * @vid_hdr: the volume identifier header to check * * This function returns zero if the volume identifier header is all right, and * %-EINVAL if not. */ static int self_check_vid_hdr(const struct ubi_device *ubi, int pnum, const struct ubi_vid_hdr *vid_hdr) { int err; uint32_t magic; if (!ubi_dbg_chk_io(ubi)) return 0; magic = be32_to_cpu(vid_hdr->magic); if (magic != UBI_VID_HDR_MAGIC) { ubi_err(ubi, "bad VID header magic %#08x at PEB %d, must be %#08x", magic, pnum, UBI_VID_HDR_MAGIC); goto fail; } err = validate_vid_hdr(ubi, vid_hdr); if (err) { ubi_err(ubi, "self-check failed for PEB %d", pnum); goto fail; } return err; fail: ubi_err(ubi, "self-check failed for PEB %d", pnum); ubi_dump_vid_hdr(vid_hdr); dump_stack(); return -EINVAL; } /** * self_check_peb_vid_hdr - check volume identifier header. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * * This function returns zero if the volume identifier header is all right, * and a negative error code if not or if an error occurred. */ static int self_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) { int err; uint32_t crc, hdr_crc; struct ubi_vid_io_buf *vidb; struct ubi_vid_hdr *vid_hdr; void *p; if (!ubi_dbg_chk_io(ubi)) return 0; vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!vidb) return -ENOMEM; vid_hdr = ubi_get_vid_hdr(vidb); p = vidb->buffer; err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, ubi->vid_hdr_alsize); if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) goto exit; crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); hdr_crc = be32_to_cpu(vid_hdr->hdr_crc); if (hdr_crc != crc) { ubi_err(ubi, "bad VID header CRC at PEB %d, calculated %#08x, read %#08x", pnum, crc, hdr_crc); ubi_err(ubi, "self-check failed for PEB %d", pnum); ubi_dump_vid_hdr(vid_hdr); dump_stack(); err = -EINVAL; goto exit; } err = self_check_vid_hdr(ubi, pnum, vid_hdr); exit: ubi_free_vid_buf(vidb); return err; } /** * self_check_write - make sure write succeeded. * @ubi: UBI device description object * @buf: buffer with data which were written * @pnum: physical eraseblock number the data were written to * @offset: offset within the physical eraseblock the data were written to * @len: how many bytes were written * * This functions reads data which were recently written and compares it with * the original data buffer - the data have to match. Returns zero if the data * match and a negative error code if not or in case of failure. */ static int self_check_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, int len) { int err, i; size_t read; void *buf1; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; if (!ubi_dbg_chk_io(ubi)) return 0; buf1 = __vmalloc(len, GFP_NOFS); if (!buf1) { ubi_err(ubi, "cannot allocate memory to check writes"); return 0; } err = mtd_read(ubi->mtd, addr, len, &read, buf1); if (err && !mtd_is_bitflip(err)) goto out_free; for (i = 0; i < len; i++) { uint8_t c = ((uint8_t *)buf)[i]; uint8_t c1 = ((uint8_t *)buf1)[i]; int dump_len; if (c == c1) continue; ubi_err(ubi, "self-check failed for PEB %d:%d, len %d", pnum, offset, len); ubi_msg(ubi, "data differ at position %d", i); dump_len = max_t(int, 128, len - i); ubi_msg(ubi, "hex dump of the original buffer from %d to %d", i, i + dump_len); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf + i, dump_len, 1); ubi_msg(ubi, "hex dump of the read buffer from %d to %d", i, i + dump_len); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf1 + i, dump_len, 1); dump_stack(); err = -EINVAL; goto out_free; } vfree(buf1); return 0; out_free: vfree(buf1); return err; } /** * ubi_self_check_all_ff - check that a region of flash is empty. * @ubi: UBI device description object * @pnum: the physical eraseblock number to check * @offset: the starting offset within the physical eraseblock to check * @len: the length of the region to check * * This function returns zero if only 0xFF bytes are present at offset * @offset of the physical eraseblock @pnum, and a negative error code if not * or if an error occurred. */ int ubi_self_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) { size_t read; int err; void *buf; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; if (!ubi_dbg_chk_io(ubi)) return 0; buf = __vmalloc(len, GFP_NOFS); if (!buf) { ubi_err(ubi, "cannot allocate memory to check for 0xFFs"); return 0; } err = mtd_read(ubi->mtd, addr, len, &read, buf); if (err && !mtd_is_bitflip(err)) { ubi_err(ubi, "err %d while reading %d bytes from PEB %d:%d, read %zd bytes", err, len, pnum, offset, read); goto error; } err = ubi_check_pattern(buf, 0xFF, len); if (err == 0) { ubi_err(ubi, "flash region at PEB %d:%d, length %d does not contain all 0xFF bytes", pnum, offset, len); goto fail; } vfree(buf); return 0; fail: ubi_err(ubi, "self-check failed for PEB %d", pnum); ubi_msg(ubi, "hex dump of the %d-%d region", offset, offset + len); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1); err = -EINVAL; error: dump_stack(); vfree(buf); return err; }
10 10 10 10 1 1 4 4 4 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 // SPDX-License-Identifier: GPL-2.0 #include <linux/anon_inodes.h> #include <linux/exportfs.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/cgroup.h> #include <linux/magic.h> #include <linux/mount.h> #include <linux/pid.h> #include <linux/pidfs.h> #include <linux/pid_namespace.h> #include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/pseudo_fs.h> #include <linux/ptrace.h> #include <linux/seq_file.h> #include <uapi/linux/pidfd.h> #include <linux/ipc_namespace.h> #include <linux/time_namespace.h> #include <linux/utsname.h> #include <net/net_namespace.h> #include <linux/coredump.h> #include "internal.h" #include "mount.h" static struct kmem_cache *pidfs_cachep __ro_after_init; /* * Stashes information that userspace needs to access even after the * process has been reaped. */ struct pidfs_exit_info { __u64 cgroupid; __s32 exit_code; __u32 coredump_mask; }; struct pidfs_inode { struct pidfs_exit_info __pei; struct pidfs_exit_info *exit_info; struct inode vfs_inode; }; static inline struct pidfs_inode *pidfs_i(struct inode *inode) { return container_of(inode, struct pidfs_inode, vfs_inode); } static struct rb_root pidfs_ino_tree = RB_ROOT; #if BITS_PER_LONG == 32 static inline unsigned long pidfs_ino(u64 ino) { return lower_32_bits(ino); } /* On 32 bit the generation number are the upper 32 bits. */ static inline u32 pidfs_gen(u64 ino) { return upper_32_bits(ino); } #else /* On 64 bit simply return ino. */ static inline unsigned long pidfs_ino(u64 ino) { return ino; } /* On 64 bit the generation number is 0. */ static inline u32 pidfs_gen(u64 ino) { return 0; } #endif static int pidfs_ino_cmp(struct rb_node *a, const struct rb_node *b) { struct pid *pid_a = rb_entry(a, struct pid, pidfs_node); struct pid *pid_b = rb_entry(b, struct pid, pidfs_node); u64 pid_ino_a = pid_a->ino; u64 pid_ino_b = pid_b->ino; if (pid_ino_a < pid_ino_b) return -1; if (pid_ino_a > pid_ino_b) return 1; return 0; } void pidfs_add_pid(struct pid *pid) { static u64 pidfs_ino_nr = 2; /* * On 64 bit nothing special happens. The 64bit number assigned * to struct pid is the inode number. * * On 32 bit the 64 bit number assigned to struct pid is split * into two 32 bit numbers. The lower 32 bits are used as the * inode number and the upper 32 bits are used as the inode * generation number. * * On 32 bit pidfs_ino() will return the lower 32 bit. When * pidfs_ino() returns zero a wrap around happened. When a * wraparound happens the 64 bit number will be incremented by 2 * so inode numbering starts at 2 again. * * On 64 bit comparing two pidfds is as simple as comparing * inode numbers. * * When a wraparound happens on 32 bit multiple pidfds with the * same inode number are likely to exist (This isn't a problem * since before pidfs pidfds used the anonymous inode meaning * all pidfds had the same inode number.). Userspace can * reconstruct the 64 bit identifier by retrieving both the * inode number and the inode generation number to compare or * use file handles. */ if (pidfs_ino(pidfs_ino_nr) == 0) pidfs_ino_nr += 2; pid->ino = pidfs_ino_nr; pid->stashed = NULL; pidfs_ino_nr++; write_seqcount_begin(&pidmap_lock_seq); rb_find_add_rcu(&pid->pidfs_node, &pidfs_ino_tree, pidfs_ino_cmp); write_seqcount_end(&pidmap_lock_seq); } void pidfs_remove_pid(struct pid *pid) { write_seqcount_begin(&pidmap_lock_seq); rb_erase(&pid->pidfs_node, &pidfs_ino_tree); write_seqcount_end(&pidmap_lock_seq); } #ifdef CONFIG_PROC_FS /** * pidfd_show_fdinfo - print information about a pidfd * @m: proc fdinfo file * @f: file referencing a pidfd * * Pid: * This function will print the pid that a given pidfd refers to in the * pid namespace of the procfs instance. * If the pid namespace of the process is not a descendant of the pid * namespace of the procfs instance 0 will be shown as its pid. This is * similar to calling getppid() on a process whose parent is outside of * its pid namespace. * * NSpid: * If pid namespaces are supported then this function will also print * the pid of a given pidfd refers to for all descendant pid namespaces * starting from the current pid namespace of the instance, i.e. the * Pid field and the first entry in the NSpid field will be identical. * If the pid namespace of the process is not a descendant of the pid * namespace of the procfs instance 0 will be shown as its first NSpid * entry and no others will be shown. * Note that this differs from the Pid and NSpid fields in * /proc/<pid>/status where Pid and NSpid are always shown relative to * the pid namespace of the procfs instance. The difference becomes * obvious when sending around a pidfd between pid namespaces from a * different branch of the tree, i.e. where no ancestral relation is * present between the pid namespaces: * - create two new pid namespaces ns1 and ns2 in the initial pid * namespace (also take care to create new mount namespaces in the * new pid namespace and mount procfs) * - create a process with a pidfd in ns1 * - send pidfd from ns1 to ns2 * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid * have exactly one entry, which is 0 */ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) { struct pid *pid = pidfd_pid(f); struct pid_namespace *ns; pid_t nr = -1; if (likely(pid_has_task(pid, PIDTYPE_PID))) { ns = proc_pid_ns(file_inode(m->file)->i_sb); nr = pid_nr_ns(pid, ns); } seq_put_decimal_ll(m, "Pid:\t", nr); #ifdef CONFIG_PID_NS seq_put_decimal_ll(m, "\nNSpid:\t", nr); if (nr > 0) { int i; /* If nr is non-zero it means that 'pid' is valid and that * ns, i.e. the pid namespace associated with the procfs * instance, is in the pid namespace hierarchy of pid. * Start at one below the already printed level. */ for (i = ns->level + 1; i <= pid->level; i++) seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); } #endif seq_putc(m, '\n'); } #endif /* * Poll support for process exit notification. */ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) { struct pid *pid = pidfd_pid(file); struct task_struct *task; __poll_t poll_flags = 0; poll_wait(file, &pid->wait_pidfd, pts); /* * Don't wake waiters if the thread-group leader exited * prematurely. They either get notified when the last subthread * exits or not at all if one of the remaining subthreads execs * and assumes the struct pid of the old thread-group leader. */ guard(rcu)(); task = pid_task(pid, PIDTYPE_PID); if (!task) poll_flags = EPOLLIN | EPOLLRDNORM | EPOLLHUP; else if (task->exit_state && !delay_group_leader(task)) poll_flags = EPOLLIN | EPOLLRDNORM; return poll_flags; } static inline bool pid_in_current_pidns(const struct pid *pid) { const struct pid_namespace *ns = task_active_pid_ns(current); if (ns->level <= pid->level) return pid->numbers[ns->level].ns == ns; return false; } static __u32 pidfs_coredump_mask(unsigned long mm_flags) { switch (__get_dumpable(mm_flags)) { case SUID_DUMP_USER: return PIDFD_COREDUMP_USER; case SUID_DUMP_ROOT: return PIDFD_COREDUMP_ROOT; case SUID_DUMP_DISABLE: return PIDFD_COREDUMP_SKIP; default: WARN_ON_ONCE(true); } return 0; } static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) { struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; struct inode *inode = file_inode(file); struct pid *pid = pidfd_pid(file); size_t usize = _IOC_SIZE(cmd); struct pidfd_info kinfo = {}; struct pidfs_exit_info *exit_info; struct user_namespace *user_ns; struct task_struct *task; const struct cred *c; __u64 mask; if (!uinfo) return -EINVAL; if (usize < PIDFD_INFO_SIZE_VER0) return -EINVAL; /* First version, no smaller struct possible */ if (copy_from_user(&mask, &uinfo->mask, sizeof(mask))) return -EFAULT; /* * Restrict information retrieval to tasks within the caller's pid * namespace hierarchy. */ if (!pid_in_current_pidns(pid)) return -ESRCH; if (mask & PIDFD_INFO_EXIT) { exit_info = READ_ONCE(pidfs_i(inode)->exit_info); if (exit_info) { kinfo.mask |= PIDFD_INFO_EXIT; #ifdef CONFIG_CGROUPS kinfo.cgroupid = exit_info->cgroupid; kinfo.mask |= PIDFD_INFO_CGROUPID; #endif kinfo.exit_code = exit_info->exit_code; } } if (mask & PIDFD_INFO_COREDUMP) { kinfo.mask |= PIDFD_INFO_COREDUMP; kinfo.coredump_mask = READ_ONCE(pidfs_i(inode)->__pei.coredump_mask); } task = get_pid_task(pid, PIDTYPE_PID); if (!task) { /* * If the task has already been reaped, only exit * information is available */ if (!(mask & PIDFD_INFO_EXIT)) return -ESRCH; goto copy_out; } c = get_task_cred(task); if (!c) return -ESRCH; if (!(kinfo.mask & PIDFD_INFO_COREDUMP)) { task_lock(task); if (task->mm) kinfo.coredump_mask = pidfs_coredump_mask(task->mm->flags); task_unlock(task); } /* Unconditionally return identifiers and credentials, the rest only on request */ user_ns = current_user_ns(); kinfo.ruid = from_kuid_munged(user_ns, c->uid); kinfo.rgid = from_kgid_munged(user_ns, c->gid); kinfo.euid = from_kuid_munged(user_ns, c->euid); kinfo.egid = from_kgid_munged(user_ns, c->egid); kinfo.suid = from_kuid_munged(user_ns, c->suid); kinfo.sgid = from_kgid_munged(user_ns, c->sgid); kinfo.fsuid = from_kuid_munged(user_ns, c->fsuid); kinfo.fsgid = from_kgid_munged(user_ns, c->fsgid); kinfo.mask |= PIDFD_INFO_CREDS; put_cred(c); #ifdef CONFIG_CGROUPS if (!kinfo.cgroupid) { struct cgroup *cgrp; rcu_read_lock(); cgrp = task_dfl_cgroup(task); kinfo.cgroupid = cgroup_id(cgrp); kinfo.mask |= PIDFD_INFO_CGROUPID; rcu_read_unlock(); } #endif /* * Copy pid/tgid last, to reduce the chances the information might be * stale. Note that it is not possible to ensure it will be valid as the * task might return as soon as the copy_to_user finishes, but that's ok * and userspace expects that might happen and can act accordingly, so * this is just best-effort. What we can do however is checking that all * the fields are set correctly, or return ESRCH to avoid providing * incomplete information. */ kinfo.ppid = task_ppid_nr_ns(task, NULL); kinfo.tgid = task_tgid_vnr(task); kinfo.pid = task_pid_vnr(task); kinfo.mask |= PIDFD_INFO_PID; if (kinfo.pid == 0 || kinfo.tgid == 0) return -ESRCH; copy_out: /* * If userspace and the kernel have the same struct size it can just * be copied. If userspace provides an older struct, only the bits that * userspace knows about will be copied. If userspace provides a new * struct, only the bits that the kernel knows about will be copied. */ return copy_struct_to_user(uinfo, usize, &kinfo, sizeof(kinfo), NULL); } static bool pidfs_ioctl_valid(unsigned int cmd) { switch (cmd) { case FS_IOC_GETVERSION: case PIDFD_GET_CGROUP_NAMESPACE: case PIDFD_GET_IPC_NAMESPACE: case PIDFD_GET_MNT_NAMESPACE: case PIDFD_GET_NET_NAMESPACE: case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: case PIDFD_GET_TIME_NAMESPACE: case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: case PIDFD_GET_UTS_NAMESPACE: case PIDFD_GET_USER_NAMESPACE: case PIDFD_GET_PID_NAMESPACE: return true; } /* Extensible ioctls require some more careful checks. */ switch (_IOC_NR(cmd)) { case _IOC_NR(PIDFD_GET_INFO): /* * Try to prevent performing a pidfd ioctl when someone * erronously mistook the file descriptor for a pidfd. * This is not perfect but will catch most cases. */ return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO)); } return false; } static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct task_struct *task __free(put_task) = NULL; struct nsproxy *nsp __free(put_nsproxy) = NULL; struct ns_common *ns_common = NULL; struct pid_namespace *pid_ns; if (!pidfs_ioctl_valid(cmd)) return -ENOIOCTLCMD; if (cmd == FS_IOC_GETVERSION) { if (!arg) return -EINVAL; __u32 __user *argp = (__u32 __user *)arg; return put_user(file_inode(file)->i_generation, argp); } /* Extensible IOCTL that does not open namespace FDs, take a shortcut */ if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO)) return pidfd_info(file, cmd, arg); task = get_pid_task(pidfd_pid(file), PIDTYPE_PID); if (!task) return -ESRCH; if (arg) return -EINVAL; scoped_guard(task_lock, task) { nsp = task->nsproxy; if (nsp) get_nsproxy(nsp); } if (!nsp) return -ESRCH; /* just pretend it didn't exist */ /* * We're trying to open a file descriptor to the namespace so perform a * filesystem cred ptrace check. Also, we mirror nsfs behavior. */ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) return -EACCES; switch (cmd) { /* Namespaces that hang of nsproxy. */ case PIDFD_GET_CGROUP_NAMESPACE: if (IS_ENABLED(CONFIG_CGROUPS)) { get_cgroup_ns(nsp->cgroup_ns); ns_common = to_ns_common(nsp->cgroup_ns); } break; case PIDFD_GET_IPC_NAMESPACE: if (IS_ENABLED(CONFIG_IPC_NS)) { get_ipc_ns(nsp->ipc_ns); ns_common = to_ns_common(nsp->ipc_ns); } break; case PIDFD_GET_MNT_NAMESPACE: get_mnt_ns(nsp->mnt_ns); ns_common = to_ns_common(nsp->mnt_ns); break; case PIDFD_GET_NET_NAMESPACE: if (IS_ENABLED(CONFIG_NET_NS)) { ns_common = to_ns_common(nsp->net_ns); get_net_ns(ns_common); } break; case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: if (IS_ENABLED(CONFIG_PID_NS)) { get_pid_ns(nsp->pid_ns_for_children); ns_common = to_ns_common(nsp->pid_ns_for_children); } break; case PIDFD_GET_TIME_NAMESPACE: if (IS_ENABLED(CONFIG_TIME_NS)) { get_time_ns(nsp->time_ns); ns_common = to_ns_common(nsp->time_ns); } break; case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: if (IS_ENABLED(CONFIG_TIME_NS)) { get_time_ns(nsp->time_ns_for_children); ns_common = to_ns_common(nsp->time_ns_for_children); } break; case PIDFD_GET_UTS_NAMESPACE: if (IS_ENABLED(CONFIG_UTS_NS)) { get_uts_ns(nsp->uts_ns); ns_common = to_ns_common(nsp->uts_ns); } break; /* Namespaces that don't hang of nsproxy. */ case PIDFD_GET_USER_NAMESPACE: if (IS_ENABLED(CONFIG_USER_NS)) { rcu_read_lock(); ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns))); rcu_read_unlock(); } break; case PIDFD_GET_PID_NAMESPACE: if (IS_ENABLED(CONFIG_PID_NS)) { rcu_read_lock(); pid_ns = task_active_pid_ns(task); if (pid_ns) ns_common = to_ns_common(get_pid_ns(pid_ns)); rcu_read_unlock(); } break; default: return -ENOIOCTLCMD; } if (!ns_common) return -EOPNOTSUPP; /* open_namespace() unconditionally consumes the reference */ return open_namespace(ns_common); } static const struct file_operations pidfs_file_operations = { .poll = pidfd_poll, #ifdef CONFIG_PROC_FS .show_fdinfo = pidfd_show_fdinfo, #endif .unlocked_ioctl = pidfd_ioctl, .compat_ioctl = compat_ptr_ioctl, }; struct pid *pidfd_pid(const struct file *file) { if (file->f_op != &pidfs_file_operations) return ERR_PTR(-EBADF); return file_inode(file)->i_private; } /* * We're called from release_task(). We know there's at least one * reference to struct pid being held that won't be released until the * task has been reaped which cannot happen until we're out of * release_task(). * * If this struct pid is referred to by a pidfd then * stashed_dentry_get() will return the dentry and inode for that struct * pid. Since we've taken a reference on it there's now an additional * reference from the exit path on it. Which is fine. We're going to put * it again in a second and we know that the pid is kept alive anyway. * * Worst case is that we've filled in the info and immediately free the * dentry and inode afterwards since the pidfd has been closed. Since * pidfs_exit() currently is placed after exit_task_work() we know that * it cannot be us aka the exiting task holding a pidfd to ourselves. */ void pidfs_exit(struct task_struct *tsk) { struct dentry *dentry; might_sleep(); dentry = stashed_dentry_get(&task_pid(tsk)->stashed); if (dentry) { struct inode *inode = d_inode(dentry); struct pidfs_exit_info *exit_info = &pidfs_i(inode)->__pei; #ifdef CONFIG_CGROUPS struct cgroup *cgrp; rcu_read_lock(); cgrp = task_dfl_cgroup(tsk); exit_info->cgroupid = cgroup_id(cgrp); rcu_read_unlock(); #endif exit_info->exit_code = tsk->exit_code; /* Ensure that PIDFD_GET_INFO sees either all or nothing. */ smp_store_release(&pidfs_i(inode)->exit_info, &pidfs_i(inode)->__pei); dput(dentry); } } #ifdef CONFIG_COREDUMP void pidfs_coredump(const struct coredump_params *cprm) { struct pid *pid = cprm->pid; struct pidfs_exit_info *exit_info; struct dentry *dentry; struct inode *inode; __u32 coredump_mask = 0; dentry = pid->stashed; if (WARN_ON_ONCE(!dentry)) return; inode = d_inode(dentry); exit_info = &pidfs_i(inode)->__pei; /* Note how we were coredumped. */ coredump_mask = pidfs_coredump_mask(cprm->mm_flags); /* Note that we actually did coredump. */ coredump_mask |= PIDFD_COREDUMPED; /* If coredumping is set to skip we should never end up here. */ VFS_WARN_ON_ONCE(coredump_mask & PIDFD_COREDUMP_SKIP); smp_store_release(&exit_info->coredump_mask, coredump_mask); } #endif static struct vfsmount *pidfs_mnt __ro_after_init; /* * The vfs falls back to simple_setattr() if i_op->setattr() isn't * implemented. Let's reject it completely until we have a clean * permission concept for pidfds. */ static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { return anon_inode_setattr(idmap, dentry, attr); } static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { return anon_inode_getattr(idmap, path, stat, request_mask, query_flags); } static const struct inode_operations pidfs_inode_operations = { .getattr = pidfs_getattr, .setattr = pidfs_setattr, }; static void pidfs_evict_inode(struct inode *inode) { struct pid *pid = inode->i_private; clear_inode(inode); put_pid(pid); } static struct inode *pidfs_alloc_inode(struct super_block *sb) { struct pidfs_inode *pi; pi = alloc_inode_sb(sb, pidfs_cachep, GFP_KERNEL); if (!pi) return NULL; memset(&pi->__pei, 0, sizeof(pi->__pei)); pi->exit_info = NULL; return &pi->vfs_inode; } static void pidfs_free_inode(struct inode *inode) { kmem_cache_free(pidfs_cachep, pidfs_i(inode)); } static const struct super_operations pidfs_sops = { .alloc_inode = pidfs_alloc_inode, .drop_inode = generic_delete_inode, .evict_inode = pidfs_evict_inode, .free_inode = pidfs_free_inode, .statfs = simple_statfs, }; /* * 'lsof' has knowledge of out historical anon_inode use, and expects * the pidfs dentry name to start with 'anon_inode'. */ static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) { return dynamic_dname(buffer, buflen, "anon_inode:[pidfd]"); } const struct dentry_operations pidfs_dentry_operations = { .d_dname = pidfs_dname, .d_prune = stashed_dentry_prune, }; static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, struct inode *parent) { const struct pid *pid = inode->i_private; if (*max_len < 2) { *max_len = 2; return FILEID_INVALID; } *max_len = 2; *(u64 *)fh = pid->ino; return FILEID_KERNFS; } static int pidfs_ino_find(const void *key, const struct rb_node *node) { const u64 pid_ino = *(u64 *)key; const struct pid *pid = rb_entry(node, struct pid, pidfs_node); if (pid_ino < pid->ino) return -1; if (pid_ino > pid->ino) return 1; return 0; } /* Find a struct pid based on the inode number. */ static struct pid *pidfs_ino_get_pid(u64 ino) { struct pid *pid; struct rb_node *node; unsigned int seq; guard(rcu)(); do { seq = read_seqcount_begin(&pidmap_lock_seq); node = rb_find_rcu(&ino, &pidfs_ino_tree, pidfs_ino_find); if (node) break; } while (read_seqcount_retry(&pidmap_lock_seq, seq)); if (!node) return NULL; pid = rb_entry(node, struct pid, pidfs_node); /* Within our pid namespace hierarchy? */ if (pid_vnr(pid) == 0) return NULL; return get_pid(pid); } static struct dentry *pidfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { int ret; u64 pid_ino; struct path path; struct pid *pid; if (fh_len < 2) return NULL; switch (fh_type) { case FILEID_KERNFS: pid_ino = *(u64 *)fid; break; default: return NULL; } pid = pidfs_ino_get_pid(pid_ino); if (!pid) return NULL; ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path); if (ret < 0) return ERR_PTR(ret); mntput(path.mnt); return path.dentry; } /* * Make sure that we reject any nonsensical flags that users pass via * open_by_handle_at(). Note that PIDFD_THREAD is defined as O_EXCL, and * PIDFD_NONBLOCK as O_NONBLOCK. */ #define VALID_FILE_HANDLE_OPEN_FLAGS \ (O_RDONLY | O_WRONLY | O_RDWR | O_NONBLOCK | O_CLOEXEC | O_EXCL) static int pidfs_export_permission(struct handle_to_path_ctx *ctx, unsigned int oflags) { if (oflags & ~(VALID_FILE_HANDLE_OPEN_FLAGS | O_LARGEFILE)) return -EINVAL; /* * pidfd_ino_get_pid() will verify that the struct pid is part * of the caller's pid namespace hierarchy. No further * permission checks are needed. */ return 0; } static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path, unsigned int flags) { enum pid_type type; if (flags & PIDFD_STALE) return true; /* * Make sure that if a pidfd is created PIDFD_INFO_EXIT * information will be available. So after an inode for the * pidfd has been allocated perform another check that the pid * is still alive. If it is exit information is available even * if the task gets reaped before the pidfd is returned to * userspace. The only exception are indicated by PIDFD_STALE: * * (1) The kernel is in the middle of task creation and thus no * task linkage has been established yet. * (2) The caller knows @pid has been registered in pidfs at a * time when the task was still alive. * * In both cases exit information will have been reported. */ if (flags & PIDFD_THREAD) type = PIDTYPE_PID; else type = PIDTYPE_TGID; /* * Since pidfs_exit() is called before struct pid's task linkage * is removed the case where the task got reaped but a dentry * was already attached to struct pid and exit information was * recorded and published can be handled correctly. */ if (unlikely(!pid_has_task(pid, type))) { struct inode *inode = d_inode(path->dentry); return !!READ_ONCE(pidfs_i(inode)->exit_info); } return true; } static struct file *pidfs_export_open(struct path *path, unsigned int oflags) { if (!pidfs_pid_valid(d_inode(path->dentry)->i_private, path, oflags)) return ERR_PTR(-ESRCH); /* * Clear O_LARGEFILE as open_by_handle_at() forces it and raise * O_RDWR as pidfds always are. */ oflags &= ~O_LARGEFILE; return dentry_open(path, oflags | O_RDWR, current_cred()); } static const struct export_operations pidfs_export_operations = { .encode_fh = pidfs_encode_fh, .fh_to_dentry = pidfs_fh_to_dentry, .open = pidfs_export_open, .permission = pidfs_export_permission, }; static int pidfs_init_inode(struct inode *inode, void *data) { const struct pid *pid = data; inode->i_private = data; inode->i_flags |= S_PRIVATE | S_ANON_INODE; inode->i_mode |= S_IRWXU; inode->i_op = &pidfs_inode_operations; inode->i_fop = &pidfs_file_operations; inode->i_ino = pidfs_ino(pid->ino); inode->i_generation = pidfs_gen(pid->ino); return 0; } static void pidfs_put_data(void *data) { struct pid *pid = data; put_pid(pid); } static const struct stashed_operations pidfs_stashed_ops = { .init_inode = pidfs_init_inode, .put_data = pidfs_put_data, }; static int pidfs_init_fs_context(struct fs_context *fc) { struct pseudo_fs_context *ctx; ctx = init_pseudo(fc, PID_FS_MAGIC); if (!ctx) return -ENOMEM; ctx->ops = &pidfs_sops; ctx->eops = &pidfs_export_operations; ctx->dops = &pidfs_dentry_operations; fc->s_fs_info = (void *)&pidfs_stashed_ops; return 0; } static struct file_system_type pidfs_type = { .name = "pidfs", .init_fs_context = pidfs_init_fs_context, .kill_sb = kill_anon_super, }; struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) { struct file *pidfd_file; struct path path __free(path_put) = {}; int ret; /* * Ensure that PIDFD_STALE can be passed as a flag without * overloading other uapi pidfd flags. */ BUILD_BUG_ON(PIDFD_STALE == PIDFD_THREAD); BUILD_BUG_ON(PIDFD_STALE == PIDFD_NONBLOCK); ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path); if (ret < 0) return ERR_PTR(ret); if (!pidfs_pid_valid(pid, &path, flags)) return ERR_PTR(-ESRCH); flags &= ~PIDFD_STALE; flags |= O_RDWR; pidfd_file = dentry_open(&path, flags, current_cred()); /* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */ if (!IS_ERR(pidfd_file)) pidfd_file->f_flags |= (flags & PIDFD_THREAD); return pidfd_file; } /** * pidfs_register_pid - register a struct pid in pidfs * @pid: pid to pin * * Register a struct pid in pidfs. Needs to be paired with * pidfs_put_pid() to not risk leaking the pidfs dentry and inode. * * Return: On success zero, on error a negative error code is returned. */ int pidfs_register_pid(struct pid *pid) { struct path path __free(path_put) = {}; int ret; might_sleep(); if (!pid) return 0; ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path); if (unlikely(ret)) return ret; /* Keep the dentry and only put the reference to the mount. */ path.dentry = NULL; return 0; } /** * pidfs_get_pid - pin a struct pid through pidfs * @pid: pid to pin * * Similar to pidfs_register_pid() but only valid if the caller knows * there's a reference to the @pid through a dentry already that can't * go away. */ void pidfs_get_pid(struct pid *pid) { if (!pid) return; WARN_ON_ONCE(!stashed_dentry_get(&pid->stashed)); } /** * pidfs_put_pid - drop a pidfs reference * @pid: pid to drop * * Drop a reference to @pid via pidfs. This is only safe if the * reference has been taken via pidfs_get_pid(). */ void pidfs_put_pid(struct pid *pid) { might_sleep(); if (!pid) return; VFS_WARN_ON_ONCE(!pid->stashed); dput(pid->stashed); } static void pidfs_inode_init_once(void *data) { struct pidfs_inode *pi = data; inode_init_once(&pi->vfs_inode); } void __init pidfs_init(void) { pidfs_cachep = kmem_cache_create("pidfs_cache", sizeof(struct pidfs_inode), 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT | SLAB_PANIC), pidfs_inode_init_once); pidfs_mnt = kern_mount(&pidfs_type); if (IS_ERR(pidfs_mnt)) panic("Failed to mount pidfs pseudo filesystem"); }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 // SPDX-License-Identifier: GPL-2.0 or MIT /* * Copyright 2018 Noralf Trønnes */ #include <linux/list.h> #include <linux/mutex.h> #include <linux/seq_file.h> #include <drm/drm_client.h> #include <drm/drm_client_event.h> #include <drm/drm_debugfs.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_print.h> #include "drm_internal.h" /** * drm_client_dev_unregister - Unregister clients * @dev: DRM device * * This function releases all clients by calling each client's * &drm_client_funcs.unregister callback. The callback function * is responsibe for releaseing all resources including the client * itself. * * The helper drm_dev_unregister() calls this function. Drivers * that use it don't need to call this function themselves. */ void drm_client_dev_unregister(struct drm_device *dev) { struct drm_client_dev *client, *tmp; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return; mutex_lock(&dev->clientlist_mutex); list_for_each_entry_safe(client, tmp, &dev->clientlist, list) { list_del(&client->list); if (client->funcs && client->funcs->unregister) { client->funcs->unregister(client); } else { drm_client_release(client); kfree(client); } } mutex_unlock(&dev->clientlist_mutex); } EXPORT_SYMBOL(drm_client_dev_unregister); static void drm_client_hotplug(struct drm_client_dev *client) { struct drm_device *dev = client->dev; int ret; if (!client->funcs || !client->funcs->hotplug) return; if (client->hotplug_failed) return; if (client->suspended) { client->hotplug_pending = true; return; } client->hotplug_pending = false; ret = client->funcs->hotplug(client); drm_dbg_kms(dev, "%s: ret=%d\n", client->name, ret); if (ret) client->hotplug_failed = true; } /** * drm_client_dev_hotplug - Send hotplug event to clients * @dev: DRM device * * This function calls the &drm_client_funcs.hotplug callback on the attached clients. * * drm_kms_helper_hotplug_event() calls this function, so drivers that use it * don't need to call this function themselves. */ void drm_client_dev_hotplug(struct drm_device *dev) { struct drm_client_dev *client; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return; if (!dev->mode_config.num_connector) { drm_dbg_kms(dev, "No connectors found, will not send hotplug events!\n"); return; } mutex_lock(&dev->clientlist_mutex); list_for_each_entry(client, &dev->clientlist, list) drm_client_hotplug(client); mutex_unlock(&dev->clientlist_mutex); } EXPORT_SYMBOL(drm_client_dev_hotplug); void drm_client_dev_restore(struct drm_device *dev) { struct drm_client_dev *client; int ret; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return; mutex_lock(&dev->clientlist_mutex); list_for_each_entry(client, &dev->clientlist, list) { if (!client->funcs || !client->funcs->restore) continue; ret = client->funcs->restore(client); drm_dbg_kms(dev, "%s: ret=%d\n", client->name, ret); if (!ret) /* The first one to return zero gets the privilege to restore */ break; } mutex_unlock(&dev->clientlist_mutex); } static int drm_client_suspend(struct drm_client_dev *client, bool holds_console_lock) { struct drm_device *dev = client->dev; int ret = 0; if (drm_WARN_ON_ONCE(dev, client->suspended)) return 0; if (client->funcs && client->funcs->suspend) ret = client->funcs->suspend(client, holds_console_lock); drm_dbg_kms(dev, "%s: ret=%d\n", client->name, ret); client->suspended = true; return ret; } void drm_client_dev_suspend(struct drm_device *dev, bool holds_console_lock) { struct drm_client_dev *client; mutex_lock(&dev->clientlist_mutex); list_for_each_entry(client, &dev->clientlist, list) { if (!client->suspended) drm_client_suspend(client, holds_console_lock); } mutex_unlock(&dev->clientlist_mutex); } EXPORT_SYMBOL(drm_client_dev_suspend); static int drm_client_resume(struct drm_client_dev *client, bool holds_console_lock) { struct drm_device *dev = client->dev; int ret = 0; if (drm_WARN_ON_ONCE(dev, !client->suspended)) return 0; if (client->funcs && client->funcs->resume) ret = client->funcs->resume(client, holds_console_lock); drm_dbg_kms(dev, "%s: ret=%d\n", client->name, ret); client->suspended = false; if (client->hotplug_pending) drm_client_hotplug(client); return ret; } void drm_client_dev_resume(struct drm_device *dev, bool holds_console_lock) { struct drm_client_dev *client; mutex_lock(&dev->clientlist_mutex); list_for_each_entry(client, &dev->clientlist, list) { if (client->suspended) drm_client_resume(client, holds_console_lock); } mutex_unlock(&dev->clientlist_mutex); } EXPORT_SYMBOL(drm_client_dev_resume); #ifdef CONFIG_DEBUG_FS static int drm_client_debugfs_internal_clients(struct seq_file *m, void *data) { struct drm_debugfs_entry *entry = m->private; struct drm_device *dev = entry->dev; struct drm_printer p = drm_seq_file_printer(m); struct drm_client_dev *client; mutex_lock(&dev->clientlist_mutex); list_for_each_entry(client, &dev->clientlist, list) drm_printf(&p, "%s\n", client->name); mutex_unlock(&dev->clientlist_mutex); return 0; } static const struct drm_debugfs_info drm_client_debugfs_list[] = { { "internal_clients", drm_client_debugfs_internal_clients, 0 }, }; void drm_client_debugfs_init(struct drm_device *dev) { drm_debugfs_add_files(dev, drm_client_debugfs_list, ARRAY_SIZE(drm_client_debugfs_list)); } #endif
512 512 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 /* SPDX-License-Identifier: GPL-2.0 */ /* Rewritten and vastly simplified by Rusty Russell for in-kernel * module loader: * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation */ #ifndef _LINUX_KALLSYMS_H #define _LINUX_KALLSYMS_H #include <linux/errno.h> #include <linux/buildid.h> #include <linux/kernel.h> #include <linux/stddef.h> #include <linux/mm.h> #include <linux/module.h> #include <asm/sections.h> #define KSYM_NAME_LEN 512 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s %s]") + \ (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + \ (BUILD_ID_SIZE_MAX * 2) + 1) struct cred; struct module; static inline int is_kernel_text(unsigned long addr) { if (__is_kernel_text(addr)) return 1; return in_gate_area_no_mm(addr); } static inline int is_kernel(unsigned long addr) { if (__is_kernel(addr)) return 1; return in_gate_area_no_mm(addr); } static inline int is_ksym_addr(unsigned long addr) { if (IS_ENABLED(CONFIG_KALLSYMS_ALL)) return is_kernel(addr); return is_kernel_text(addr) || is_kernel_inittext(addr); } static inline void *dereference_symbol_descriptor(void *ptr) { #ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS struct module *mod; ptr = dereference_kernel_function_descriptor(ptr); if (is_ksym_addr((unsigned long)ptr)) return ptr; guard(rcu)(); mod = __module_address((unsigned long)ptr); if (mod) ptr = dereference_module_function_descriptor(mod, ptr); #endif return ptr; } /* How and when do we show kallsyms values? */ extern bool kallsyms_show_value(const struct cred *cred); #ifdef CONFIG_KALLSYMS unsigned long kallsyms_sym_address(int idx); int kallsyms_on_each_symbol(int (*fn)(void *, const char *, unsigned long), void *data); int kallsyms_on_each_match_symbol(int (*fn)(void *, unsigned long), const char *name, void *data); /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); extern int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset); /* Lookup an address. modname is set to NULL if it's in the kernel. */ const char *kallsyms_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf); /* Look up a kernel symbol and return it in a text buffer. */ extern int sprint_symbol(char *buffer, unsigned long address); extern int sprint_symbol_build_id(char *buffer, unsigned long address); extern int sprint_symbol_no_offset(char *buffer, unsigned long address); extern int sprint_backtrace(char *buffer, unsigned long address); extern int sprint_backtrace_build_id(char *buffer, unsigned long address); int lookup_symbol_name(unsigned long addr, char *symname); #else /* !CONFIG_KALLSYMS */ static inline unsigned long kallsyms_lookup_name(const char *name) { return 0; } static inline int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) { return 0; } static inline const char *kallsyms_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf) { return NULL; } static inline int sprint_symbol(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int sprint_symbol_build_id(char *buffer, unsigned long address) { *buffer = '\0'; return 0; } static inline int sprint_symbol_no_offset(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int sprint_backtrace(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int sprint_backtrace_build_id(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int lookup_symbol_name(unsigned long addr, char *symname) { return -ERANGE; } static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, unsigned long), void *data) { return -EOPNOTSUPP; } static inline int kallsyms_on_each_match_symbol(int (*fn)(void *, unsigned long), const char *name, void *data) { return -EOPNOTSUPP; } #endif /*CONFIG_KALLSYMS*/ static inline void print_ip_sym(const char *loglvl, unsigned long ip) { printk("%s[<%px>] %pS\n", loglvl, (void *) ip, (void *) ip); } #endif /*_LINUX_KALLSYMS_H*/
261 262 260 212 15 197 174 174 262 262 1 5 171 174 174 197 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 // SPDX-License-Identifier: GPL-2.0-only /* * lib/parser.c - simple parser for mount, etc. options. */ #include <linux/ctype.h> #include <linux/types.h> #include <linux/export.h> #include <linux/kstrtox.h> #include <linux/parser.h> #include <linux/slab.h> #include <linux/string.h> /* * max size needed by different bases to express U64 * HEX: "0xFFFFFFFFFFFFFFFF" --> 18 * DEC: "18446744073709551615" --> 20 * OCT: "01777777777777777777777" --> 23 * pick the max one to define NUMBER_BUF_LEN */ #define NUMBER_BUF_LEN 24 /** * match_one - Determines if a string matches a simple pattern * @s: the string to examine for presence of the pattern * @p: the string containing the pattern * @args: array of %MAX_OPT_ARGS &substring_t elements. Used to return match * locations. * * Description: Determines if the pattern @p is present in string @s. Can only * match extremely simple token=arg style patterns. If the pattern is found, * the location(s) of the arguments will be returned in the @args array. */ static int match_one(char *s, const char *p, substring_t args[]) { char *meta; int argc = 0; if (!p) return 1; while(1) { int len = -1; meta = strchr(p, '%'); if (!meta) return strcmp(p, s) == 0; if (strncmp(p, s, meta-p)) return 0; s += meta - p; p = meta + 1; if (isdigit(*p)) len = simple_strtoul(p, (char **) &p, 10); else if (*p == '%') { if (*s++ != '%') return 0; p++; continue; } if (argc >= MAX_OPT_ARGS) return 0; args[argc].from = s; switch (*p++) { case 's': { size_t str_len = strlen(s); if (str_len == 0) return 0; if (len == -1 || len > str_len) len = str_len; args[argc].to = s + len; break; } case 'd': simple_strtol(s, &args[argc].to, 0); goto num; case 'u': simple_strtoul(s, &args[argc].to, 0); goto num; case 'o': simple_strtoul(s, &args[argc].to, 8); goto num; case 'x': simple_strtoul(s, &args[argc].to, 16); num: if (args[argc].to == args[argc].from) return 0; break; default: return 0; } s = args[argc].to; argc++; } } /** * match_token - Find a token (and optional args) in a string * @s: the string to examine for token/argument pairs * @table: match_table_t describing the set of allowed option tokens and the * arguments that may be associated with them. Must be terminated with a * &struct match_token whose pattern is set to the NULL pointer. * @args: array of %MAX_OPT_ARGS &substring_t elements. Used to return match * locations. * * Description: Detects which if any of a set of token strings has been passed * to it. Tokens can include up to %MAX_OPT_ARGS instances of basic c-style * format identifiers which will be taken into account when matching the * tokens, and whose locations will be returned in the @args array. */ int match_token(char *s, const match_table_t table, substring_t args[]) { const struct match_token *p; for (p = table; !match_one(s, p->pattern, args) ; p++) ; return p->token; } EXPORT_SYMBOL(match_token); /** * match_number - scan a number in the given base from a substring_t * @s: substring to be scanned * @result: resulting integer on success * @base: base to use when converting string * * Description: Given a &substring_t and a base, attempts to parse the substring * as a number in that base. * * Return: On success, sets @result to the integer represented by the * string and returns 0. Returns -EINVAL or -ERANGE on failure. */ static int match_number(substring_t *s, int *result, int base) { char *endp; char buf[NUMBER_BUF_LEN]; int ret; long val; if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) return -ERANGE; ret = 0; val = simple_strtol(buf, &endp, base); if (endp == buf) ret = -EINVAL; else if (val < (long)INT_MIN || val > (long)INT_MAX) ret = -ERANGE; else *result = (int) val; return ret; } /** * match_u64int - scan a number in the given base from a substring_t * @s: substring to be scanned * @result: resulting u64 on success * @base: base to use when converting string * * Description: Given a &substring_t and a base, attempts to parse the substring * as a number in that base. * * Return: On success, sets @result to the integer represented by the * string and returns 0. Returns -EINVAL or -ERANGE on failure. */ static int match_u64int(substring_t *s, u64 *result, int base) { char buf[NUMBER_BUF_LEN]; int ret; u64 val; if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) return -ERANGE; ret = kstrtoull(buf, base, &val); if (!ret) *result = val; return ret; } /** * match_int - scan a decimal representation of an integer from a substring_t * @s: substring_t to be scanned * @result: resulting integer on success * * Description: Attempts to parse the &substring_t @s as a decimal integer. * * Return: On success, sets @result to the integer represented by the string * and returns 0. Returns -EINVAL or -ERANGE on failure. */ int match_int(substring_t *s, int *result) { return match_number(s, result, 0); } EXPORT_SYMBOL(match_int); /** * match_uint - scan a decimal representation of an integer from a substring_t * @s: substring_t to be scanned * @result: resulting integer on success * * Description: Attempts to parse the &substring_t @s as a decimal integer. * * Return: On success, sets @result to the integer represented by the string * and returns 0. Returns -EINVAL or -ERANGE on failure. */ int match_uint(substring_t *s, unsigned int *result) { char buf[NUMBER_BUF_LEN]; if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) return -ERANGE; return kstrtouint(buf, 10, result); } EXPORT_SYMBOL(match_uint); /** * match_u64 - scan a decimal representation of a u64 from * a substring_t * @s: substring_t to be scanned * @result: resulting unsigned long long on success * * Description: Attempts to parse the &substring_t @s as a long decimal * integer. * * Return: On success, sets @result to the integer represented by the string * and returns 0. Returns -EINVAL or -ERANGE on failure. */ int match_u64(substring_t *s, u64 *result) { return match_u64int(s, result, 0); } EXPORT_SYMBOL(match_u64); /** * match_octal - scan an octal representation of an integer from a substring_t * @s: substring_t to be scanned * @result: resulting integer on success * * Description: Attempts to parse the &substring_t @s as an octal integer. * * Return: On success, sets @result to the integer represented by the string * and returns 0. Returns -EINVAL or -ERANGE on failure. */ int match_octal(substring_t *s, int *result) { return match_number(s, result, 8); } EXPORT_SYMBOL(match_octal); /** * match_hex - scan a hex representation of an integer from a substring_t * @s: substring_t to be scanned * @result: resulting integer on success * * Description: Attempts to parse the &substring_t @s as a hexadecimal integer. * * Return: On success, sets @result to the integer represented by the string * and returns 0. Returns -EINVAL or -ERANGE on failure. */ int match_hex(substring_t *s, int *result) { return match_number(s, result, 16); } EXPORT_SYMBOL(match_hex); /** * match_wildcard - parse if a string matches given wildcard pattern * @pattern: wildcard pattern * @str: the string to be parsed * * Description: Parse the string @str to check if matches wildcard * pattern @pattern. The pattern may contain two types of wildcards: * * * '*' - matches zero or more characters * * '?' - matches one character * * Return: If the @str matches the @pattern, return true, else return false. */ bool match_wildcard(const char *pattern, const char *str) { const char *s = str; const char *p = pattern; bool star = false; while (*s) { switch (*p) { case '?': s++; p++; break; case '*': star = true; str = s; if (!*++p) return true; pattern = p; break; default: if (*s == *p) { s++; p++; } else { if (!star) return false; str++; s = str; p = pattern; } break; } } if (*p == '*') ++p; return !*p; } EXPORT_SYMBOL(match_wildcard); /** * match_strlcpy - Copy the characters from a substring_t to a sized buffer * @dest: where to copy to * @src: &substring_t to copy * @size: size of destination buffer * * Description: Copy the characters in &substring_t @src to the * c-style string @dest. Copy no more than @size - 1 characters, plus * the terminating NUL. * * Return: length of @src. */ size_t match_strlcpy(char *dest, const substring_t *src, size_t size) { size_t ret = src->to - src->from; if (size) { size_t len = ret >= size ? size - 1 : ret; memcpy(dest, src->from, len); dest[len] = '\0'; } return ret; } EXPORT_SYMBOL(match_strlcpy); /** * match_strdup - allocate a new string with the contents of a substring_t * @s: &substring_t to copy * * Description: Allocates and returns a string filled with the contents of * the &substring_t @s. The caller is responsible for freeing the returned * string with kfree(). * * Return: the address of the newly allocated NUL-terminated string or * %NULL on error. */ char *match_strdup(const substring_t *s) { return kmemdup_nul(s->from, s->to - s->from, GFP_KERNEL); } EXPORT_SYMBOL(match_strdup);
29 29 29 29 29 29 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 // SPDX-License-Identifier: GPL-2.0 /* * seq_buf.c * * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> * * The seq_buf is a handy tool that allows you to pass a descriptor around * to a buffer that other functions can write to. It is similar to the * seq_file functionality but has some differences. * * To use it, the seq_buf must be initialized with seq_buf_init(). * This will set up the counters within the descriptor. You can call * seq_buf_init() more than once to reset the seq_buf to start * from scratch. */ #include <linux/bug.h> #include <linux/err.h> #include <linux/export.h> #include <linux/hex.h> #include <linux/minmax.h> #include <linux/printk.h> #include <linux/seq_buf.h> #include <linux/seq_file.h> #include <linux/sprintf.h> #include <linux/string.h> #include <linux/types.h> #include <linux/uaccess.h> /** * seq_buf_can_fit - can the new data fit in the current buffer? * @s: the seq_buf descriptor * @len: The length to see if it can fit in the current buffer * * Returns: true if there's enough unused space in the seq_buf buffer * to fit the amount of new data according to @len. */ static bool seq_buf_can_fit(struct seq_buf *s, size_t len) { return s->len + len <= s->size; } /** * seq_buf_print_seq - move the contents of seq_buf into a seq_file * @m: the seq_file descriptor that is the destination * @s: the seq_buf descriptor that is the source. * * Returns: zero on success, non-zero otherwise. */ int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s) { unsigned int len = seq_buf_used(s); return seq_write(m, s->buffer, len); } /** * seq_buf_vprintf - sequence printing of information. * @s: seq_buf descriptor * @fmt: printf format string * @args: va_list of arguments from a printf() type function * * Writes a vnprintf() format into the sequence buffer. * * Returns: zero on success, -1 on overflow. */ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) { int len; WARN_ON(s->size == 0); if (s->len < s->size) { len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args); if (s->len + len < s->size) { s->len += len; return 0; } } seq_buf_set_overflow(s); return -1; } /** * seq_buf_printf - sequence printing of information * @s: seq_buf descriptor * @fmt: printf format string * * Writes a printf() format into the sequence buffer. * * Returns: zero on success, -1 on overflow. */ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) { va_list ap; int ret; va_start(ap, fmt); ret = seq_buf_vprintf(s, fmt, ap); va_end(ap); return ret; } EXPORT_SYMBOL_GPL(seq_buf_printf); /** * seq_buf_do_printk - printk() seq_buf line by line * @s: seq_buf descriptor * @lvl: printk level * * printk()-s a multi-line sequential buffer line by line. The function * makes sure that the buffer in @s is NUL-terminated and safe to read * as a string. */ void seq_buf_do_printk(struct seq_buf *s, const char *lvl) { const char *start, *lf; if (s->size == 0 || s->len == 0) return; start = seq_buf_str(s); while ((lf = strchr(start, '\n'))) { int len = lf - start + 1; printk("%s%.*s", lvl, len, start); start = ++lf; } /* No trailing LF */ if (start < s->buffer + s->len) printk("%s%s\n", lvl, start); } EXPORT_SYMBOL_GPL(seq_buf_do_printk); #ifdef CONFIG_BINARY_PRINTF /** * seq_buf_bprintf - Write the printf string from binary arguments * @s: seq_buf descriptor * @fmt: The format string for the @binary arguments * @binary: The binary arguments for @fmt. * * When recording in a fast path, a printf may be recorded with just * saving the format and the arguments as they were passed to the * function, instead of wasting cycles converting the arguments into * ASCII characters. Instead, the arguments are saved in a 32 bit * word array that is defined by the format string constraints. * * This function will take the format and the binary array and finish * the conversion into the ASCII string within the buffer. * * Returns: zero on success, -1 on overflow. */ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) { unsigned int len = seq_buf_buffer_left(s); int ret; WARN_ON(s->size == 0); if (s->len < s->size) { ret = bstr_printf(s->buffer + s->len, len, fmt, binary); if (s->len + ret < s->size) { s->len += ret; return 0; } } seq_buf_set_overflow(s); return -1; } #endif /* CONFIG_BINARY_PRINTF */ /** * seq_buf_puts - sequence printing of simple string * @s: seq_buf descriptor * @str: simple string to record * * Copy a simple string into the sequence buffer. * * Returns: zero on success, -1 on overflow. */ int seq_buf_puts(struct seq_buf *s, const char *str) { size_t len = strlen(str); WARN_ON(s->size == 0); /* Add 1 to len for the trailing null byte which must be there */ len += 1; if (seq_buf_can_fit(s, len)) { memcpy(s->buffer + s->len, str, len); /* Don't count the trailing null byte against the capacity */ s->len += len - 1; return 0; } seq_buf_set_overflow(s); return -1; } EXPORT_SYMBOL_GPL(seq_buf_puts); /** * seq_buf_putc - sequence printing of simple character * @s: seq_buf descriptor * @c: simple character to record * * Copy a single character into the sequence buffer. * * Returns: zero on success, -1 on overflow. */ int seq_buf_putc(struct seq_buf *s, unsigned char c) { WARN_ON(s->size == 0); if (seq_buf_can_fit(s, 1)) { s->buffer[s->len++] = c; return 0; } seq_buf_set_overflow(s); return -1; } EXPORT_SYMBOL_GPL(seq_buf_putc); /** * seq_buf_putmem - write raw data into the sequence buffer * @s: seq_buf descriptor * @mem: The raw memory to copy into the buffer * @len: The length of the raw memory to copy (in bytes) * * There may be cases where raw memory needs to be written into the * buffer and a strcpy() would not work. Using this function allows * for such cases. * * Returns: zero on success, -1 on overflow. */ int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len) { WARN_ON(s->size == 0); if (seq_buf_can_fit(s, len)) { memcpy(s->buffer + s->len, mem, len); s->len += len; return 0; } seq_buf_set_overflow(s); return -1; } #define MAX_MEMHEX_BYTES 8U #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) /** * seq_buf_putmem_hex - write raw memory into the buffer in ASCII hex * @s: seq_buf descriptor * @mem: The raw memory to write its hex ASCII representation of * @len: The length of the raw memory to copy (in bytes) * * This is similar to seq_buf_putmem() except instead of just copying the * raw memory into the buffer it writes its ASCII representation of it * in hex characters. * * Returns: zero on success, -1 on overflow. */ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, unsigned int len) { unsigned char hex[HEX_CHARS]; const unsigned char *data = mem; unsigned int start_len; int i, j; WARN_ON(s->size == 0); BUILD_BUG_ON(MAX_MEMHEX_BYTES * 2 >= HEX_CHARS); while (len) { start_len = min(len, MAX_MEMHEX_BYTES); #ifdef __BIG_ENDIAN for (i = 0, j = 0; i < start_len; i++) { #else for (i = start_len-1, j = 0; i >= 0; i--) { #endif hex[j++] = hex_asc_hi(data[i]); hex[j++] = hex_asc_lo(data[i]); } if (WARN_ON_ONCE(j == 0 || j/2 > len)) break; /* j increments twice per loop */ hex[j++] = ' '; seq_buf_putmem(s, hex, j); if (seq_buf_has_overflowed(s)) return -1; len -= start_len; data += start_len; } return 0; } /** * seq_buf_path - copy a path into the sequence buffer * @s: seq_buf descriptor * @path: path to write into the sequence buffer. * @esc: set of characters to escape in the output * * Write a path name into the sequence buffer. * * Returns: the number of written bytes on success, -1 on overflow. */ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc) { char *buf; size_t size = seq_buf_get_buf(s, &buf); int res = -1; WARN_ON(s->size == 0); if (size) { char *p = d_path(path, buf, size); if (!IS_ERR(p)) { char *end = mangle_path(buf, p, esc); if (end) res = end - buf; } } seq_buf_commit(s, res); return res; } /** * seq_buf_to_user - copy the sequence buffer to user space * @s: seq_buf descriptor * @ubuf: The userspace memory location to copy to * @start: The first byte in the buffer to copy * @cnt: The amount to copy * * Copies the sequence buffer into the userspace memory pointed to * by @ubuf. It starts from @start and writes up to @cnt characters * or until it reaches the end of the content in the buffer (@s->len), * whichever comes first. * * Returns: * On success, it returns a positive number of the number of bytes * it copied. * * On failure it returns -EBUSY if all of the content in the * sequence has been already read, which includes nothing in the * sequence (@s->len == @start). * * Returns -EFAULT if the copy to userspace fails. */ int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, size_t start, int cnt) { int len; int ret; if (!cnt) return 0; len = seq_buf_used(s); if (len <= start) return -EBUSY; len -= start; if (cnt > len) cnt = len; ret = copy_to_user(ubuf, s->buffer + start, cnt); if (ret == cnt) return -EFAULT; return cnt - ret; } /** * seq_buf_hex_dump - print formatted hex dump into the sequence buffer * @s: seq_buf descriptor * @prefix_str: string to prefix each line with; * caller supplies trailing spaces for alignment if desired * @prefix_type: controls whether prefix of an offset, address, or none * is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE) * @rowsize: number of bytes to print per line; must be 16 or 32 * @groupsize: number of bytes to print at a time (1, 2, 4, 8; default = 1) * @buf: data blob to dump * @len: number of bytes in the @buf * @ascii: include ASCII after the hex output * * Function is an analogue of print_hex_dump() and thus has similar interface. * * linebuf size is maximal length for one line. * 32 * 3 - maximum bytes per line, each printed into 2 chars + 1 for * separating space * 2 - spaces separating hex dump and ASCII representation * 32 - ASCII representation * 1 - terminating '\0' * * Returns: zero on success, -1 on overflow. */ int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { const u8 *ptr = buf; int i, linelen, remaining = len; unsigned char linebuf[32 * 3 + 2 + 32 + 1]; int ret; if (rowsize != 16 && rowsize != 32) rowsize = 16; for (i = 0; i < len; i += rowsize) { linelen = min(remaining, rowsize); remaining -= rowsize; hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, linebuf, sizeof(linebuf), ascii); switch (prefix_type) { case DUMP_PREFIX_ADDRESS: ret = seq_buf_printf(s, "%s%p: %s\n", prefix_str, ptr + i, linebuf); break; case DUMP_PREFIX_OFFSET: ret = seq_buf_printf(s, "%s%.8x: %s\n", prefix_str, i, linebuf); break; default: ret = seq_buf_printf(s, "%s%s\n", prefix_str, linebuf); break; } if (ret) return ret; } return 0; }
208 208 208 206 208 21 65 189 189 1 189 208 150 2 63 104 65 39 70 64 26 83 63 63 48 1 101 4 145 145 131 4 131 36 4 5 131 207 66 1 2 145 145 132 206 62 189 64 64 64 7 1 1 17 17 23 159 159 31 31 6 174 3 135 37 31 37 2 161 160 36 174 146 37 18 44 11 159 37 112 37 134 40 128 48 57 57 6 15 15 15 159 159 118 13 111 117 117 13 1 2 108 3 15 97 2 170 116 57 169 170 5 170 170 31 2 30 26 15 11 15 11 25 1 53 34 19 22 31 31 4 27 27 11 20 16 10 5 6 6 18 18 11 9 2 2 1 13 7 9 17 29 28 1 17 2 10 10 8 1 1 26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 // SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/dir.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ #include <linux/unaligned.h> #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/sched/signal.h> #include <linux/unicode.h> #include "f2fs.h" #include "node.h" #include "acl.h" #include "xattr.h" #include <trace/events/f2fs.h> #if IS_ENABLED(CONFIG_UNICODE) extern struct kmem_cache *f2fs_cf_name_slab; #endif static unsigned long dir_blocks(struct inode *inode) { return ((unsigned long long) (i_size_read(inode) + PAGE_SIZE - 1)) >> PAGE_SHIFT; } static unsigned int dir_buckets(unsigned int level, int dir_level) { if (level + dir_level < MAX_DIR_HASH_DEPTH / 2) return BIT(level + dir_level); else return MAX_DIR_BUCKETS; } static unsigned int bucket_blocks(unsigned int level) { if (level < MAX_DIR_HASH_DEPTH / 2) return 2; else return 4; } #if IS_ENABLED(CONFIG_UNICODE) /* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */ int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname) { struct super_block *sb = dir->i_sb; unsigned char *buf; int len; if (IS_CASEFOLDED(dir) && !is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) { buf = f2fs_kmem_cache_alloc(f2fs_cf_name_slab, GFP_NOFS, false, F2FS_SB(sb)); if (!buf) return -ENOMEM; len = utf8_casefold(sb->s_encoding, fname->usr_fname, buf, F2FS_NAME_LEN); if (len <= 0) { kmem_cache_free(f2fs_cf_name_slab, buf); if (sb_has_strict_encoding(sb)) return -EINVAL; /* fall back to treating name as opaque byte sequence */ return 0; } fname->cf_name.name = buf; fname->cf_name.len = len; } return 0; } void f2fs_free_casefolded_name(struct f2fs_filename *fname) { unsigned char *buf = (unsigned char *)fname->cf_name.name; if (buf) { kmem_cache_free(f2fs_cf_name_slab, buf); fname->cf_name.name = NULL; } } #endif /* CONFIG_UNICODE */ static int __f2fs_setup_filename(const struct inode *dir, const struct fscrypt_name *crypt_name, struct f2fs_filename *fname) { int err; memset(fname, 0, sizeof(*fname)); fname->usr_fname = crypt_name->usr_fname; fname->disk_name = crypt_name->disk_name; #ifdef CONFIG_FS_ENCRYPTION fname->crypto_buf = crypt_name->crypto_buf; #endif if (crypt_name->is_nokey_name) { /* hash was decoded from the no-key name */ fname->hash = cpu_to_le32(crypt_name->hash); } else { err = f2fs_init_casefolded_name(dir, fname); if (err) { f2fs_free_filename(fname); return err; } f2fs_hash_filename(dir, fname); } return 0; } /* * Prepare to search for @iname in @dir. This is similar to * fscrypt_setup_filename(), but this also handles computing the casefolded name * and the f2fs dirhash if needed, then packing all the information about this * filename up into a 'struct f2fs_filename'. */ int f2fs_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct f2fs_filename *fname) { struct fscrypt_name crypt_name; int err; err = fscrypt_setup_filename(dir, iname, lookup, &crypt_name); if (err) return err; return __f2fs_setup_filename(dir, &crypt_name, fname); } /* * Prepare to look up @dentry in @dir. This is similar to * fscrypt_prepare_lookup(), but this also handles computing the casefolded name * and the f2fs dirhash if needed, then packing all the information about this * filename up into a 'struct f2fs_filename'. */ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, struct f2fs_filename *fname) { struct fscrypt_name crypt_name; int err; err = fscrypt_prepare_lookup(dir, dentry, &crypt_name); if (err) return err; return __f2fs_setup_filename(dir, &crypt_name, fname); } void f2fs_free_filename(struct f2fs_filename *fname) { #ifdef CONFIG_FS_ENCRYPTION kfree(fname->crypto_buf.name); fname->crypto_buf.name = NULL; #endif f2fs_free_casefolded_name(fname); } static unsigned long dir_block_index(unsigned int level, int dir_level, unsigned int idx) { unsigned long i; unsigned long bidx = 0; for (i = 0; i < level; i++) bidx += mul_u32_u32(dir_buckets(i, dir_level), bucket_blocks(i)); bidx += idx * bucket_blocks(level); return bidx; } static struct f2fs_dir_entry *find_in_block(struct inode *dir, struct folio *dentry_folio, const struct f2fs_filename *fname, int *max_slots, bool use_hash) { struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr d; dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(dir, &d, dentry_blk); return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); } static inline int f2fs_match_name(const struct inode *dir, const struct f2fs_filename *fname, const u8 *de_name, u32 de_name_len) { struct fscrypt_name f; #if IS_ENABLED(CONFIG_UNICODE) if (fname->cf_name.name) return generic_ci_match(dir, fname->usr_fname, &fname->cf_name, de_name, de_name_len); #endif f.usr_fname = fname->usr_fname; f.disk_name = fname->disk_name; #ifdef CONFIG_FS_ENCRYPTION f.crypto_buf = fname->crypto_buf; #endif return fscrypt_match_name(&f, de_name, de_name_len); } struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, const struct f2fs_filename *fname, int *max_slots, bool use_hash) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; int max_len = 0; int res = 0; if (max_slots) *max_slots = 0; while (bit_pos < d->max) { if (!test_bit_le(bit_pos, d->bitmap)) { bit_pos++; max_len++; continue; } de = &d->dentry[bit_pos]; if (unlikely(!de->name_len)) { bit_pos++; continue; } if (!use_hash || de->hash_code == fname->hash) { res = f2fs_match_name(d->inode, fname, d->filename[bit_pos], le16_to_cpu(de->name_len)); if (res < 0) return ERR_PTR(res); if (res) goto found; } if (max_slots && max_len > *max_slots) *max_slots = max_len; max_len = 0; bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } de = NULL; found: if (max_slots && max_len > *max_slots) *max_slots = max_len; return de; } static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, const struct f2fs_filename *fname, struct folio **res_folio, bool use_hash) { int s = GET_DENTRY_SLOTS(fname->disk_name.len); unsigned int nbucket, nblock; unsigned int bidx, end_block, bucket_no; struct f2fs_dir_entry *de = NULL; pgoff_t next_pgofs; bool room = false; int max_slots; nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0; start_find_bucket: bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, bucket_no); end_block = bidx + nblock; while (bidx < end_block) { /* no need to allocate new dentry pages to all the indices */ struct folio *dentry_folio; dentry_folio = f2fs_find_data_folio(dir, bidx, &next_pgofs); if (IS_ERR(dentry_folio)) { if (PTR_ERR(dentry_folio) == -ENOENT) { room = true; bidx = next_pgofs; continue; } else { *res_folio = dentry_folio; break; } } de = find_in_block(dir, dentry_folio, fname, &max_slots, use_hash); if (IS_ERR(de)) { *res_folio = ERR_CAST(de); de = NULL; break; } else if (de) { *res_folio = dentry_folio; break; } if (max_slots >= s) room = true; f2fs_folio_put(dentry_folio, false); bidx++; } if (de) return de; if (likely(use_hash)) { if (room && F2FS_I(dir)->chash != fname->hash) { F2FS_I(dir)->chash = fname->hash; F2FS_I(dir)->clevel = level; } } else if (++bucket_no < nbucket) { goto start_find_bucket; } return NULL; } struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, const struct f2fs_filename *fname, struct folio **res_folio) { unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; unsigned int max_depth; unsigned int level; bool use_hash = true; *res_folio = NULL; #if IS_ENABLED(CONFIG_UNICODE) start_find_entry: #endif if (f2fs_has_inline_dentry(dir)) { de = f2fs_find_in_inline_dir(dir, fname, res_folio, use_hash); goto out; } if (npages == 0) goto out; max_depth = F2FS_I(dir)->i_current_depth; if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { f2fs_warn(F2FS_I_SB(dir), "Corrupted max_depth of %lu: %u", dir->i_ino, max_depth); max_depth = MAX_DIR_HASH_DEPTH; f2fs_i_depth_write(dir, max_depth); } for (level = 0; level < max_depth; level++) { de = find_in_level(dir, level, fname, res_folio, use_hash); if (de || IS_ERR(*res_folio)) break; } out: #if IS_ENABLED(CONFIG_UNICODE) if (!sb_no_casefold_compat_fallback(dir->i_sb) && IS_CASEFOLDED(dir) && !de && use_hash) { use_hash = false; goto start_find_entry; } #endif /* This is to increase the speed of f2fs_create */ if (!de) F2FS_I(dir)->task = current; return de; } /* * Find an entry in the specified directory with the wanted name. * It returns the page where the entry was found (as a parameter - res_page), * and the entry itself. Page is returned mapped and unlocked. * Entry is guaranteed to be valid. */ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, const struct qstr *child, struct folio **res_folio) { struct f2fs_dir_entry *de = NULL; struct f2fs_filename fname; int err; err = f2fs_setup_filename(dir, child, 1, &fname); if (err) { if (err == -ENOENT) *res_folio = NULL; else *res_folio = ERR_PTR(err); return NULL; } de = __f2fs_find_entry(dir, &fname, res_folio); f2fs_free_filename(&fname); return de; } struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct folio **f) { return f2fs_find_entry(dir, &dotdot_name, f); } ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, struct folio **folio) { ino_t res = 0; struct f2fs_dir_entry *de; de = f2fs_find_entry(dir, qstr, folio); if (de) { res = le32_to_cpu(de->ino); f2fs_folio_put(*folio, false); } return res; } void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct folio *folio, struct inode *inode) { enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; folio_lock(folio); f2fs_folio_wait_writeback(folio, type, true, true); de->ino = cpu_to_le32(inode->i_ino); de->file_type = fs_umode_to_ftype(inode->i_mode); folio_mark_dirty(folio); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); f2fs_mark_inode_dirty_sync(dir, false); f2fs_folio_put(folio, true); } static void init_dent_inode(struct inode *dir, struct inode *inode, const struct f2fs_filename *fname, struct folio *ifolio) { struct f2fs_inode *ri; if (!fname) /* tmpfile case? */ return; f2fs_folio_wait_writeback(ifolio, NODE, true, true); /* copy name info. to this inode folio */ ri = F2FS_INODE(&ifolio->page); ri->i_namelen = cpu_to_le32(fname->disk_name.len); memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len); if (IS_ENCRYPTED(dir)) { file_set_enc_name(inode); /* * Roll-forward recovery doesn't have encryption keys available, * so it can't compute the dirhash for encrypted+casefolded * filenames. Append it to i_name if possible. Else, disable * roll-forward recovery of the dentry (i.e., make fsync'ing the * file force a checkpoint) by setting LOST_PINO. */ if (IS_CASEFOLDED(dir)) { if (fname->disk_name.len + sizeof(f2fs_hash_t) <= F2FS_NAME_LEN) put_unaligned(fname->hash, (f2fs_hash_t *) &ri->i_name[fname->disk_name.len]); else file_lost_pino(inode); } } folio_mark_dirty(ifolio); } void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d) { struct fscrypt_str dot = FSTR_INIT(".", 1); struct fscrypt_str dotdot = FSTR_INIT("..", 2); /* update dirent of "." */ f2fs_update_dentry(inode->i_ino, inode->i_mode, d, &dot, 0, 0); /* update dirent of ".." */ f2fs_update_dentry(parent->i_ino, parent->i_mode, d, &dotdot, 0, 1); } static int make_empty_dir(struct inode *inode, struct inode *parent, struct folio *folio) { struct folio *dentry_folio; struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr d; if (f2fs_has_inline_dentry(inode)) return f2fs_make_empty_inline_dir(inode, parent, folio); dentry_folio = f2fs_get_new_data_folio(inode, folio, 0, true); if (IS_ERR(dentry_folio)) return PTR_ERR(dentry_folio); dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(NULL, &d, dentry_blk); f2fs_do_make_empty_dir(inode, parent, &d); folio_mark_dirty(dentry_folio); f2fs_folio_put(dentry_folio, true); return 0; } struct folio *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, const struct f2fs_filename *fname, struct folio *dfolio) { struct folio *folio; int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { folio = f2fs_new_inode_folio(inode); if (IS_ERR(folio)) return folio; if (S_ISDIR(inode->i_mode)) { /* in order to handle error case */ folio_get(folio); err = make_empty_dir(inode, dir, folio); if (err) { folio_lock(folio); goto put_error; } folio_put(folio); } err = f2fs_init_acl(inode, dir, folio, dfolio); if (err) goto put_error; err = f2fs_init_security(inode, dir, fname ? fname->usr_fname : NULL, folio); if (err) goto put_error; if (IS_ENCRYPTED(inode)) { err = fscrypt_set_context(inode, folio); if (err) goto put_error; } } else { folio = f2fs_get_inode_folio(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(folio)) return folio; } init_dent_inode(dir, inode, fname, folio); /* * This file should be checkpointed during fsync. * We lost i_pino from now on. */ if (is_inode_flag_set(inode, FI_INC_LINK)) { if (!S_ISDIR(inode->i_mode)) file_lost_pino(inode); /* * If link the tmpfile to alias through linkat path, * we should remove this inode from orphan list. */ if (inode->i_nlink == 0) f2fs_remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); f2fs_i_links_write(inode, true); } return folio; put_error: clear_nlink(inode); f2fs_update_inode(inode, folio); f2fs_folio_put(folio, true); return ERR_PTR(err); } void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, true); clear_inode_flag(inode, FI_NEW_INODE); } inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); f2fs_mark_inode_dirty_sync(dir, false); if (F2FS_I(dir)->i_current_depth != current_depth) f2fs_i_depth_write(dir, current_depth); if (inode && is_inode_flag_set(inode, FI_INC_LINK)) clear_inode_flag(inode, FI_INC_LINK); } int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots) { int bit_start = 0; int zero_start, zero_end; next: zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start); if (zero_start >= max_slots) return max_slots; zero_end = find_next_bit_le(bitmap, max_slots, zero_start); if (zero_end - zero_start >= slots) return zero_start; bit_start = zero_end + 1; if (zero_end + 1 >= max_slots) return max_slots; goto next; } bool f2fs_has_enough_room(struct inode *dir, struct folio *ifolio, const struct f2fs_filename *fname) { struct f2fs_dentry_ptr d; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(fname->disk_name.len); make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ifolio)); bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); return bit_pos < d.max; } void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct fscrypt_str *name, f2fs_hash_t name_hash, unsigned int bit_pos) { struct f2fs_dir_entry *de; int slots = GET_DENTRY_SLOTS(name->len); int i; de = &d->dentry[bit_pos]; de->hash_code = name_hash; de->name_len = cpu_to_le16(name->len); memcpy(d->filename[bit_pos], name->name, name->len); de->ino = cpu_to_le32(ino); de->file_type = fs_umode_to_ftype(mode); for (i = 0; i < slots; i++) { __set_bit_le(bit_pos + i, (void *)d->bitmap); /* avoid wrong garbage data for readdir */ if (i) (de + i)->name_len = 0; } } int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; unsigned int current_depth; unsigned long bidx, block; unsigned int nbucket, nblock; struct folio *dentry_folio = NULL; struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dentry_ptr d; struct folio *folio = NULL; int slots, err = 0; level = 0; slots = GET_DENTRY_SLOTS(fname->disk_name.len); current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == fname->hash) { level = F2FS_I(dir)->clevel; F2FS_I(dir)->chash = 0; } start: if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) return -ENOSPC; if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) return -ENOSPC; /* Increase the depth, if required */ if (level == current_depth) ++current_depth; nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, (le32_to_cpu(fname->hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { dentry_folio = f2fs_get_new_data_folio(dir, NULL, block, true); if (IS_ERR(dentry_folio)) return PTR_ERR(dentry_folio); dentry_blk = folio_address(dentry_folio); bit_pos = f2fs_room_for_filename(&dentry_blk->dentry_bitmap, slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; f2fs_folio_put(dentry_folio, true); } /* Move to next level to find the empty slot for new dentry */ ++level; goto start; add_dentry: f2fs_folio_wait_writeback(dentry_folio, DATA, true, true); if (inode) { f2fs_down_write(&F2FS_I(inode)->i_sem); folio = f2fs_init_inode_metadata(inode, dir, fname, NULL); if (IS_ERR(folio)) { err = PTR_ERR(folio); goto fail; } } make_dentry_ptr_block(NULL, &d, dentry_blk); f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash, bit_pos); folio_mark_dirty(dentry_folio); if (inode) { f2fs_i_pino_write(inode, dir->i_ino); /* synchronize inode page's data from inode cache */ if (is_inode_flag_set(inode, FI_NEW_INODE)) f2fs_update_inode(inode, folio); f2fs_folio_put(folio, true); } f2fs_update_parent_metadata(dir, inode, current_depth); fail: if (inode) f2fs_up_write(&F2FS_I(inode)->i_sem); f2fs_folio_put(dentry_folio, true); return err; } int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode) { int err = -EAGAIN; if (f2fs_has_inline_dentry(dir)) { /* * Should get i_xattr_sem to keep the lock order: * i_xattr_sem -> inode_page lock used by f2fs_setxattr. */ f2fs_down_read(&F2FS_I(dir)->i_xattr_sem); err = f2fs_add_inline_entry(dir, fname, inode, ino, mode); f2fs_up_read(&F2FS_I(dir)->i_xattr_sem); } if (err == -EAGAIN) err = f2fs_add_regular_entry(dir, fname, inode, ino, mode); f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; } /* * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). */ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_filename fname; struct folio *folio = NULL; struct f2fs_dir_entry *de = NULL; int err; err = f2fs_setup_filename(dir, name, 0, &fname); if (err) return err; /* * An immature stackable filesystem shows a race condition between lookup * and create. If we have same task when doing lookup and create, it's * definitely fine as expected by VFS normally. Otherwise, let's just * verify on-disk dentry one more time, which guarantees filesystem * consistency more. */ if (current != F2FS_I(dir)->task) { de = __f2fs_find_entry(dir, &fname, &folio); F2FS_I(dir)->task = NULL; } if (de) { f2fs_folio_put(folio, false); err = -EEXIST; } else if (IS_ERR(folio)) { err = PTR_ERR(folio); } else { err = f2fs_add_dentry(dir, &fname, inode, ino, mode); } f2fs_free_filename(&fname); return err; } int f2fs_do_tmpfile(struct inode *inode, struct inode *dir, struct f2fs_filename *fname) { struct folio *folio; int err = 0; f2fs_down_write(&F2FS_I(inode)->i_sem); folio = f2fs_init_inode_metadata(inode, dir, fname, NULL); if (IS_ERR(folio)) { err = PTR_ERR(folio); goto fail; } f2fs_folio_put(folio, true); clear_inode_flag(inode, FI_NEW_INODE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); fail: f2fs_up_write(&F2FS_I(inode)->i_sem); return err; } void f2fs_drop_nlink(struct inode *dir, struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); f2fs_down_write(&F2FS_I(inode)->i_sem); if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, false); inode_set_ctime_current(inode); f2fs_i_links_write(inode, false); if (S_ISDIR(inode->i_mode)) { f2fs_i_links_write(inode, false); f2fs_i_size_write(inode, 0); } f2fs_up_write(&F2FS_I(inode)->i_sem); if (inode->i_nlink == 0) f2fs_add_orphan_inode(inode); else f2fs_release_orphan_inode(sbi); } /* * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct folio *folio, struct inode *dir, struct inode *inode) { struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); pgoff_t index = folio->index; int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); if (F2FS_OPTION(F2FS_I_SB(dir)).fsync_mode == FSYNC_MODE_STRICT) f2fs_add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); if (f2fs_has_inline_dentry(dir)) return f2fs_delete_inline_entry(dentry, folio, dir, inode); folio_lock(folio); f2fs_folio_wait_writeback(folio, DATA, true, true); dentry_blk = folio_address(folio); bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); /* Let's check and deallocate this dentry page */ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, 0); folio_mark_dirty(folio); if (bit_pos == NR_DENTRY_IN_BLOCK && !f2fs_truncate_hole(dir, index, index + 1)) { f2fs_clear_page_cache_dirty_tag(folio); folio_clear_dirty_for_io(folio); folio_clear_uptodate(folio); clear_page_private_all(&folio->page); inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); } f2fs_folio_put(folio, true); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); f2fs_mark_inode_dirty_sync(dir, false); if (inode) f2fs_drop_nlink(dir, inode); } bool f2fs_empty_dir(struct inode *dir) { unsigned long bidx = 0; unsigned int bit_pos; struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); if (f2fs_has_inline_dentry(dir)) return f2fs_empty_inline_dir(dir); while (bidx < nblock) { pgoff_t next_pgofs; struct folio *dentry_folio; dentry_folio = f2fs_find_data_folio(dir, bidx, &next_pgofs); if (IS_ERR(dentry_folio)) { if (PTR_ERR(dentry_folio) == -ENOENT) { bidx = next_pgofs; continue; } else { return false; } } dentry_blk = folio_address(dentry_folio); if (bidx == 0) bit_pos = 2; else bit_pos = 0; bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, bit_pos); f2fs_folio_put(dentry_folio, false); if (bit_pos < NR_DENTRY_IN_BLOCK) return false; bidx++; } return true; } int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr) { unsigned char d_type = DT_UNKNOWN; unsigned int bit_pos; struct f2fs_dir_entry *de = NULL; struct fscrypt_str de_name = FSTR_INIT(NULL, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); struct blk_plug plug; bool readdir_ra = sbi->readdir_ra; bool found_valid_dirent = false; int err = 0; bit_pos = ((unsigned long)ctx->pos % d->max); if (readdir_ra) blk_start_plug(&plug); while (bit_pos < d->max) { bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); if (bit_pos >= d->max) break; de = &d->dentry[bit_pos]; if (de->name_len == 0) { if (found_valid_dirent || !bit_pos) { f2fs_warn_ratelimited(sbi, "invalid namelen(0), ino:%u, run fsck to fix.", le32_to_cpu(de->ino)); set_sbi_flag(sbi, SBI_NEED_FSCK); } bit_pos++; ctx->pos = start_pos + bit_pos; continue; } d_type = fs_ftype_to_dtype(de->file_type); de_name.name = d->filename[bit_pos]; de_name.len = le16_to_cpu(de->name_len); /* check memory boundary before moving forward */ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); if (unlikely(bit_pos > d->max || le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) { f2fs_warn(sbi, "%s: corrupted namelen=%d, run fsck to fix.", __func__, le16_to_cpu(de->name_len)); set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_CORRUPTED_DIRENT); goto out; } if (IS_ENCRYPTED(d->inode)) { int save_len = fstr->len; err = fscrypt_fname_disk_to_usr(d->inode, (u32)le32_to_cpu(de->hash_code), 0, &de_name, fstr); if (err) goto out; de_name = *fstr; fstr->len = save_len; } if (!dir_emit(ctx, de_name.name, de_name.len, le32_to_cpu(de->ino), d_type)) { err = 1; goto out; } if (readdir_ra) f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); ctx->pos = start_pos + bit_pos; found_valid_dirent = true; } out: if (readdir_ra) blk_finish_plug(&plug); return err; } static int f2fs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); struct f2fs_dentry_block *dentry_blk = NULL; struct file_ra_state *ra = &file->f_ra; loff_t start_pos = ctx->pos; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); struct f2fs_dentry_ptr d; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); int err = 0; if (IS_ENCRYPTED(inode)) { err = fscrypt_prepare_readdir(inode); if (err) goto out; err = fscrypt_fname_alloc_buffer(F2FS_NAME_LEN, &fstr); if (err < 0) goto out; } if (f2fs_has_inline_dentry(inode)) { err = f2fs_read_inline_dir(file, ctx, &fstr); goto out_free; } for (; n < npages; ctx->pos = n * NR_DENTRY_IN_BLOCK) { struct folio *dentry_folio; pgoff_t next_pgofs; /* allow readdir() to be interrupted */ if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_free; } cond_resched(); /* readahead for multi pages of dir */ if (npages - n > 1 && !ra_has_index(ra, n)) page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); dentry_folio = f2fs_find_data_folio(inode, n, &next_pgofs); if (IS_ERR(dentry_folio)) { err = PTR_ERR(dentry_folio); if (err == -ENOENT) { err = 0; n = next_pgofs; continue; } else { goto out_free; } } dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(inode, &d, dentry_blk); err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); f2fs_folio_put(dentry_folio, false); if (err) break; n++; } out_free: fscrypt_fname_free_buffer(&fstr); out: trace_f2fs_readdir(inode, start_pos, ctx->pos, err); return err < 0 ? err : 0; } const struct file_operations f2fs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = f2fs_readdir, .fsync = f2fs_sync_file, .unlocked_ioctl = f2fs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = f2fs_compat_ioctl, #endif };
1 1617 1 1617 317 318 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 // SPDX-License-Identifier: GPL-2.0 /* * fs/sysfs/dir.c - sysfs core and dir operation implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007 Tejun Heo <teheo@suse.de> * * Please see Documentation/filesystems/sysfs.rst for more information. */ #define pr_fmt(fmt) "sysfs: " fmt #include <linux/fs.h> #include <linux/kobject.h> #include <linux/slab.h> #include "sysfs.h" DEFINE_SPINLOCK(sysfs_symlink_target_lock); void sysfs_warn_dup(struct kernfs_node *parent, const char *name) { char *buf; buf = kzalloc(PATH_MAX, GFP_KERNEL); if (buf) kernfs_path(parent, buf, PATH_MAX); pr_warn("cannot create duplicate filename '%s/%s'\n", buf, name); dump_stack(); kfree(buf); } /** * sysfs_create_dir_ns - create a directory for an object with a namespace tag * @kobj: object we're creating directory for * @ns: the namespace tag to use */ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { struct kernfs_node *parent, *kn; kuid_t uid; kgid_t gid; if (WARN_ON(!kobj)) return -EINVAL; if (kobj->parent) parent = kobj->parent->sd; else parent = sysfs_root_kn; if (!parent) return -ENOENT; kobject_get_ownership(kobj, &uid, &gid); kn = kernfs_create_dir_ns(parent, kobject_name(kobj), 0755, uid, gid, kobj, ns); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, kobject_name(kobj)); return PTR_ERR(kn); } kobj->sd = kn; return 0; } /** * sysfs_remove_dir - remove an object's directory. * @kobj: object. * * The only thing special about this is that we remove any files in * the directory before we remove the directory, and we've inlined * what used to be sysfs_rmdir() below, instead of calling separately. */ void sysfs_remove_dir(struct kobject *kobj) { struct kernfs_node *kn = kobj->sd; /* * In general, kobject owner is responsible for ensuring removal * doesn't race with other operations and sysfs doesn't provide any * protection; however, when @kobj is used as a symlink target, the * symlinking entity usually doesn't own @kobj and thus has no * control over removal. @kobj->sd may be removed anytime * and symlink code may end up dereferencing an already freed node. * * sysfs_symlink_target_lock synchronizes @kobj->sd * disassociation against symlink operations so that symlink code * can safely dereference @kobj->sd. */ spin_lock(&sysfs_symlink_target_lock); kobj->sd = NULL; spin_unlock(&sysfs_symlink_target_lock); if (kn) { WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR); kernfs_remove(kn); } } int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, const void *new_ns) { struct kernfs_node *parent; int ret; parent = kernfs_get_parent(kobj->sd); ret = kernfs_rename_ns(kobj->sd, parent, new_name, new_ns); kernfs_put(parent); return ret; } int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, const void *new_ns) { struct kernfs_node *kn = kobj->sd; struct kernfs_node *new_parent; new_parent = new_parent_kobj && new_parent_kobj->sd ? new_parent_kobj->sd : sysfs_root_kn; return kernfs_rename_ns(kn, new_parent, NULL, new_ns); } /** * sysfs_create_mount_point - create an always empty directory * @parent_kobj: kobject that will contain this always empty directory * @name: The name of the always empty directory to add */ int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name) { struct kernfs_node *kn, *parent = parent_kobj->sd; kn = kernfs_create_empty_dir(parent, name); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, name); return PTR_ERR(kn); } return 0; } EXPORT_SYMBOL_GPL(sysfs_create_mount_point); /** * sysfs_remove_mount_point - remove an always empty directory. * @parent_kobj: kobject that will contain this always empty directory * @name: The name of the always empty directory to remove * */ void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name) { struct kernfs_node *parent = parent_kobj->sd; kernfs_remove_by_name_ns(parent, name, NULL); } EXPORT_SYMBOL_GPL(sysfs_remove_mount_point);
41 41 41 41 43 32 41 41 24 24 24 34 35 35 16 16 16 41 25 16 30 13 14 14 14 14 41 31 16 8 13 1 7 1 41 37 5 5 9 2 8 3 17 7 23 9 41 41 23 3 9 18 14 23 32 23 32 11 32 32 26 13 21 23 23 27 2 11 2 32 22 27 29 15 32 40 40 41 41 22 27 16 11 41 16 22 22 27 27 32 32 24 10 3 23 2 3 3 20 8 21 1 31 20 2 11 16 16 11 20 19 16 16 11 11 16 16 11 11 28 28 28 28 28 28 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_refcount_btree.h" #include "xfs_alloc.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_bit.h" #include "xfs_refcount.h" #include "xfs_rmap.h" #include "xfs_ag.h" #include "xfs_health.h" #include "xfs_refcount_item.h" #include "xfs_rtgroup.h" #include "xfs_rtalloc.h" #include "xfs_rtrefcount_btree.h" struct kmem_cache *xfs_refcount_intent_cache; /* Allowable refcount adjustment amounts. */ enum xfs_refc_adjust_op { XFS_REFCOUNT_ADJUST_INCREASE = 1, XFS_REFCOUNT_ADJUST_DECREASE = -1, XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, XFS_REFCOUNT_ADJUST_COW_FREE = -1, }; STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, xfs_agblock_t agbno, xfs_extlen_t aglen); STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, xfs_agblock_t agbno, xfs_extlen_t aglen); /* * Look up the first record less than or equal to [bno, len] in the btree * given by cur. */ int xfs_refcount_lookup_le( struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { trace_xfs_refcount_lookup(cur, xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); } /* * Look up the first record greater than or equal to [bno, len] in the btree * given by cur. */ int xfs_refcount_lookup_ge( struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { trace_xfs_refcount_lookup(cur, xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_GE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); } /* * Look up the first record equal to [bno, len] in the btree * given by cur. */ int xfs_refcount_lookup_eq( struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { trace_xfs_refcount_lookup(cur, xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); } /* Convert on-disk record to in-core format. */ void xfs_refcount_btrec_to_irec( const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec) { uint32_t start; start = be32_to_cpu(rec->refc.rc_startblock); if (start & XFS_REFC_COWFLAG) { start &= ~XFS_REFC_COWFLAG; irec->rc_domain = XFS_REFC_DOMAIN_COW; } else { irec->rc_domain = XFS_REFC_DOMAIN_SHARED; } irec->rc_startblock = start; irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); } /* Simple checks for refcount records. */ xfs_failaddr_t xfs_refcount_check_irec( struct xfs_perag *pag, const struct xfs_refcount_irec *irec) { if (irec->rc_blockcount == 0 || irec->rc_blockcount > XFS_REFC_LEN_MAX) return __this_address; if (!xfs_refcount_check_domain(irec)) return __this_address; /* check for valid extent range, including overflow */ if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) return __this_address; if (irec->rc_refcount == 0 || irec->rc_refcount > XFS_REFC_REFCOUNT_MAX) return __this_address; return NULL; } xfs_failaddr_t xfs_rtrefcount_check_irec( struct xfs_rtgroup *rtg, const struct xfs_refcount_irec *irec) { if (irec->rc_blockcount == 0 || irec->rc_blockcount > XFS_REFC_LEN_MAX) return __this_address; if (!xfs_refcount_check_domain(irec)) return __this_address; /* check for valid extent range, including overflow */ if (!xfs_verify_rgbext(rtg, irec->rc_startblock, irec->rc_blockcount)) return __this_address; if (irec->rc_refcount == 0 || irec->rc_refcount > XFS_REFC_REFCOUNT_MAX) return __this_address; return NULL; } static inline xfs_failaddr_t xfs_refcount_check_btrec( struct xfs_btree_cur *cur, const struct xfs_refcount_irec *irec) { if (xfs_btree_is_rtrefcount(cur->bc_ops)) return xfs_rtrefcount_check_irec(to_rtg(cur->bc_group), irec); return xfs_refcount_check_irec(to_perag(cur->bc_group), irec); } static inline int xfs_refcount_complain_bad_rec( struct xfs_btree_cur *cur, xfs_failaddr_t fa, const struct xfs_refcount_irec *irec) { struct xfs_mount *mp = cur->bc_mp; if (xfs_btree_is_rtrefcount(cur->bc_ops)) { xfs_warn(mp, "RT Refcount BTree record corruption in rtgroup %u detected at %pS!", cur->bc_group->xg_gno, fa); } else { xfs_warn(mp, "Refcount BTree record corruption in AG %d detected at %pS!", cur->bc_group->xg_gno, fa); } xfs_warn(mp, "Start block 0x%x, block count 0x%x, references 0x%x", irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } /* * Get the data from the pointed-to record. */ int xfs_refcount_get_rec( struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat) { union xfs_btree_rec *rec; xfs_failaddr_t fa; int error; error = xfs_btree_get_rec(cur, &rec, stat); if (error || !*stat) return error; xfs_refcount_btrec_to_irec(rec, irec); fa = xfs_refcount_check_btrec(cur, irec); if (fa) return xfs_refcount_complain_bad_rec(cur, fa, irec); trace_xfs_refcount_get(cur, irec); return 0; } /* * Update the record referred to by cur to the value given * by [bno, len, refcount]. * This either works (return 0) or gets an EFSCORRUPTED error. */ STATIC int xfs_refcount_update( struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec) { union xfs_btree_rec rec; uint32_t start; int error; trace_xfs_refcount_update(cur, irec); start = xfs_refcount_encode_startblock(irec->rc_startblock, irec->rc_domain); rec.refc.rc_startblock = cpu_to_be32(start); rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); error = xfs_btree_update(cur, &rec); if (error) trace_xfs_refcount_update_error(cur, error, _RET_IP_); return error; } /* * Insert the record referred to by cur to the value given * by [bno, len, refcount]. * This either works (return 0) or gets an EFSCORRUPTED error. */ int xfs_refcount_insert( struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *i) { int error; trace_xfs_refcount_insert(cur, irec); cur->bc_rec.rc.rc_startblock = irec->rc_startblock; cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; cur->bc_rec.rc.rc_refcount = irec->rc_refcount; cur->bc_rec.rc.rc_domain = irec->rc_domain; error = xfs_btree_insert(cur, i); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } out_error: if (error) trace_xfs_refcount_insert_error(cur, error, _RET_IP_); return error; } /* * Remove the record referred to by cur, then set the pointer to the spot * where the record could be re-inserted, in case we want to increment or * decrement the cursor. * This either works (return 0) or gets an EFSCORRUPTED error. */ STATIC int xfs_refcount_delete( struct xfs_btree_cur *cur, int *i) { struct xfs_refcount_irec irec; int found_rec; int error; error = xfs_refcount_get_rec(cur, &irec, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } trace_xfs_refcount_delete(cur, &irec); error = xfs_btree_delete(cur, i); if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (error) goto out_error; error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock, &found_rec); out_error: if (error) trace_xfs_refcount_delete_error(cur, error, _RET_IP_); return error; } /* * Adjusting the Reference Count * * As stated elsewhere, the reference count btree (refcbt) stores * >1 reference counts for extents of physical blocks. In this * operation, we're either raising or lowering the reference count of * some subrange stored in the tree: * * <------ adjustment range ------> * ----+ +---+-----+ +--+--------+--------- * 2 | | 3 | 4 | |17| 55 | 10 * ----+ +---+-----+ +--+--------+--------- * X axis is physical blocks number; * reference counts are the numbers inside the rectangles * * The first thing we need to do is to ensure that there are no * refcount extents crossing either boundary of the range to be * adjusted. For any extent that does cross a boundary, split it into * two extents so that we can increment the refcount of one of the * pieces later: * * <------ adjustment range ------> * ----+ +---+-----+ +--+--------+----+---- * 2 | | 3 | 2 | |17| 55 | 10 | 10 * ----+ +---+-----+ +--+--------+----+---- * * For this next step, let's assume that all the physical blocks in * the adjustment range are mapped to a file and are therefore in use * at least once. Therefore, we can infer that any gap in the * refcount tree within the adjustment range represents a physical * extent with refcount == 1: * * <------ adjustment range ------> * ----+---+---+-----+-+--+--------+----+---- * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 * ----+---+---+-----+-+--+--------+----+---- * ^ * * For each extent that falls within the interval range, figure out * which extent is to the left or the right of that extent. Now we * have a left, current, and right extent. If the new reference count * of the center extent enables us to merge left, center, and right * into one record covering all three, do so. If the center extent is * at the left end of the range, abuts the left extent, and its new * reference count matches the left extent's record, then merge them. * If the center extent is at the right end of the range, abuts the * right extent, and the reference counts match, merge those. In the * example, we can left merge (assuming an increment operation): * * <------ adjustment range ------> * --------+---+-----+-+--+--------+----+---- * 2 | 3 | 2 |1|17| 55 | 10 | 10 * --------+---+-----+-+--+--------+----+---- * ^ * * For all other extents within the range, adjust the reference count * or delete it if the refcount falls below 2. If we were * incrementing, the end result looks like this: * * <------ adjustment range ------> * --------+---+-----+-+--+--------+----+---- * 2 | 4 | 3 |2|18| 56 | 11 | 10 * --------+---+-----+-+--+--------+----+---- * * The result of a decrement operation looks as such: * * <------ adjustment range ------> * ----+ +---+ +--+--------+----+---- * 2 | | 2 | |16| 54 | 9 | 10 * ----+ +---+ +--+--------+----+---- * DDDD 111111DD * * The blocks marked "D" are freed; the blocks marked "1" are only * referenced once and therefore the record is removed from the * refcount btree. */ /* Next block after this extent. */ static inline xfs_agblock_t xfs_refc_next( struct xfs_refcount_irec *rc) { return rc->rc_startblock + rc->rc_blockcount; } /* * Split a refcount extent that crosses agbno. */ STATIC int xfs_refcount_split_extent( struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t agbno, bool *shape_changed) { struct xfs_refcount_irec rcext, tmp; int found_rec; int error; *shape_changed = false; error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec); if (error) goto out_error; if (!found_rec) return 0; error = xfs_refcount_get_rec(cur, &rcext, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (rcext.rc_domain != domain) return 0; if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) return 0; *shape_changed = true; trace_xfs_refcount_split_extent(cur, &rcext, agbno); /* Establish the right extent. */ tmp = rcext; tmp.rc_startblock = agbno; tmp.rc_blockcount -= (agbno - rcext.rc_startblock); error = xfs_refcount_update(cur, &tmp); if (error) goto out_error; /* Insert the left extent. */ tmp = rcext; tmp.rc_blockcount = agbno - rcext.rc_startblock; error = xfs_refcount_insert(cur, &tmp, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } return error; out_error: trace_xfs_refcount_split_extent_error(cur, error, _RET_IP_); return error; } /* * Merge the left, center, and right extents. */ STATIC int xfs_refcount_merge_center_extents( struct xfs_btree_cur *cur, struct xfs_refcount_irec *left, struct xfs_refcount_irec *center, struct xfs_refcount_irec *right, unsigned long long extlen, xfs_extlen_t *aglen) { int error; int found_rec; trace_xfs_refcount_merge_center_extents(cur, left, center, right); ASSERT(left->rc_domain == center->rc_domain); ASSERT(right->rc_domain == center->rc_domain); /* * Make sure the center and right extents are not in the btree. * If the center extent was synthesized, the first delete call * removes the right extent and we skip the second deletion. * If center and right were in the btree, then the first delete * call removes the center and the second one removes the right * extent. */ error = xfs_refcount_lookup_ge(cur, center->rc_domain, center->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (center->rc_refcount > 1) { error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } } /* Enlarge the left extent. */ error = xfs_refcount_lookup_le(cur, left->rc_domain, left->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } left->rc_blockcount = extlen; error = xfs_refcount_update(cur, left); if (error) goto out_error; *aglen = 0; return error; out_error: trace_xfs_refcount_merge_center_extents_error(cur, error, _RET_IP_); return error; } /* * Merge with the left extent. */ STATIC int xfs_refcount_merge_left_extent( struct xfs_btree_cur *cur, struct xfs_refcount_irec *left, struct xfs_refcount_irec *cleft, xfs_agblock_t *agbno, xfs_extlen_t *aglen) { int error; int found_rec; trace_xfs_refcount_merge_left_extent(cur, left, cleft); ASSERT(left->rc_domain == cleft->rc_domain); /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ if (cleft->rc_refcount > 1) { error = xfs_refcount_lookup_le(cur, cleft->rc_domain, cleft->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } } /* Enlarge the left extent. */ error = xfs_refcount_lookup_le(cur, left->rc_domain, left->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } left->rc_blockcount += cleft->rc_blockcount; error = xfs_refcount_update(cur, left); if (error) goto out_error; *agbno += cleft->rc_blockcount; *aglen -= cleft->rc_blockcount; return error; out_error: trace_xfs_refcount_merge_left_extent_error(cur, error, _RET_IP_); return error; } /* * Merge with the right extent. */ STATIC int xfs_refcount_merge_right_extent( struct xfs_btree_cur *cur, struct xfs_refcount_irec *right, struct xfs_refcount_irec *cright, xfs_extlen_t *aglen) { int error; int found_rec; trace_xfs_refcount_merge_right_extent(cur, cright, right); ASSERT(right->rc_domain == cright->rc_domain); /* * If the extent ending at agbno+aglen (cright) wasn't synthesized, * remove it. */ if (cright->rc_refcount > 1) { error = xfs_refcount_lookup_le(cur, cright->rc_domain, cright->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } } /* Enlarge the right extent. */ error = xfs_refcount_lookup_le(cur, right->rc_domain, right->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } right->rc_startblock -= cright->rc_blockcount; right->rc_blockcount += cright->rc_blockcount; error = xfs_refcount_update(cur, right); if (error) goto out_error; *aglen -= cright->rc_blockcount; return error; out_error: trace_xfs_refcount_merge_right_extent_error(cur, error, _RET_IP_); return error; } /* * Find the left extent and the one after it (cleft). This function assumes * that we've already split any extent crossing agbno. */ STATIC int xfs_refcount_find_left_extents( struct xfs_btree_cur *cur, struct xfs_refcount_irec *left, struct xfs_refcount_irec *cleft, enum xfs_refc_domain domain, xfs_agblock_t agbno, xfs_extlen_t aglen) { struct xfs_refcount_irec tmp; int error; int found_rec; left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); if (error) goto out_error; if (!found_rec) return 0; error = xfs_refcount_get_rec(cur, &tmp, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (tmp.rc_domain != domain) return 0; if (xfs_refc_next(&tmp) != agbno) return 0; /* We have a left extent; retrieve (or invent) the next right one */ *left = tmp; error = xfs_btree_increment(cur, 0, &found_rec); if (error) goto out_error; if (found_rec) { error = xfs_refcount_get_rec(cur, &tmp, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (tmp.rc_domain != domain) goto not_found; /* if tmp starts at the end of our range, just use that */ if (tmp.rc_startblock == agbno) *cleft = tmp; else { /* * There's a gap in the refcntbt at the start of the * range we're interested in (refcount == 1) so * synthesize the implied extent and pass it back. * We assume here that the agbno/aglen range was * passed in from a data fork extent mapping and * therefore is allocated to exactly one owner. */ cleft->rc_startblock = agbno; cleft->rc_blockcount = min(aglen, tmp.rc_startblock - agbno); cleft->rc_refcount = 1; cleft->rc_domain = domain; } } else { not_found: /* * No extents, so pretend that there's one covering the whole * range. */ cleft->rc_startblock = agbno; cleft->rc_blockcount = aglen; cleft->rc_refcount = 1; cleft->rc_domain = domain; } trace_xfs_refcount_find_left_extent(cur, left, cleft, agbno); return error; out_error: trace_xfs_refcount_find_left_extent_error(cur, error, _RET_IP_); return error; } /* * Find the right extent and the one before it (cright). This function * assumes that we've already split any extents crossing agbno + aglen. */ STATIC int xfs_refcount_find_right_extents( struct xfs_btree_cur *cur, struct xfs_refcount_irec *right, struct xfs_refcount_irec *cright, enum xfs_refc_domain domain, xfs_agblock_t agbno, xfs_extlen_t aglen) { struct xfs_refcount_irec tmp; int error; int found_rec; right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); if (error) goto out_error; if (!found_rec) return 0; error = xfs_refcount_get_rec(cur, &tmp, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (tmp.rc_domain != domain) return 0; if (tmp.rc_startblock != agbno + aglen) return 0; /* We have a right extent; retrieve (or invent) the next left one */ *right = tmp; error = xfs_btree_decrement(cur, 0, &found_rec); if (error) goto out_error; if (found_rec) { error = xfs_refcount_get_rec(cur, &tmp, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (tmp.rc_domain != domain) goto not_found; /* if tmp ends at the end of our range, just use that */ if (xfs_refc_next(&tmp) == agbno + aglen) *cright = tmp; else { /* * There's a gap in the refcntbt at the end of the * range we're interested in (refcount == 1) so * create the implied extent and pass it back. * We assume here that the agbno/aglen range was * passed in from a data fork extent mapping and * therefore is allocated to exactly one owner. */ cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); cright->rc_blockcount = right->rc_startblock - cright->rc_startblock; cright->rc_refcount = 1; cright->rc_domain = domain; } } else { not_found: /* * No extents, so pretend that there's one covering the whole * range. */ cright->rc_startblock = agbno; cright->rc_blockcount = aglen; cright->rc_refcount = 1; cright->rc_domain = domain; } trace_xfs_refcount_find_right_extent(cur, cright, right, agbno + aglen); return error; out_error: trace_xfs_refcount_find_right_extent_error(cur, error, _RET_IP_); return error; } /* Is this extent valid? */ static inline bool xfs_refc_valid( const struct xfs_refcount_irec *rc) { return rc->rc_startblock != NULLAGBLOCK; } static inline xfs_nlink_t xfs_refc_merge_refcount( const struct xfs_refcount_irec *irec, enum xfs_refc_adjust_op adjust) { /* Once a record hits XFS_REFC_REFCOUNT_MAX, it is pinned forever */ if (irec->rc_refcount == XFS_REFC_REFCOUNT_MAX) return XFS_REFC_REFCOUNT_MAX; return irec->rc_refcount + adjust; } static inline bool xfs_refc_want_merge_center( const struct xfs_refcount_irec *left, const struct xfs_refcount_irec *cleft, const struct xfs_refcount_irec *cright, const struct xfs_refcount_irec *right, bool cleft_is_cright, enum xfs_refc_adjust_op adjust, unsigned long long *ulenp) { unsigned long long ulen = left->rc_blockcount; xfs_nlink_t new_refcount; /* * To merge with a center record, both shoulder records must be * adjacent to the record we want to adjust. This is only true if * find_left and find_right made all four records valid. */ if (!xfs_refc_valid(left) || !xfs_refc_valid(right) || !xfs_refc_valid(cleft) || !xfs_refc_valid(cright)) return false; /* There must only be one record for the entire range. */ if (!cleft_is_cright) return false; /* The shoulder record refcounts must match the new refcount. */ new_refcount = xfs_refc_merge_refcount(cleft, adjust); if (left->rc_refcount != new_refcount) return false; if (right->rc_refcount != new_refcount) return false; /* * The new record cannot exceed the max length. ulen is a ULL as the * individual record block counts can be up to (u32 - 1) in length * hence we need to catch u32 addition overflows here. */ ulen += cleft->rc_blockcount + right->rc_blockcount; if (ulen >= XFS_REFC_LEN_MAX) return false; *ulenp = ulen; return true; } static inline bool xfs_refc_want_merge_left( const struct xfs_refcount_irec *left, const struct xfs_refcount_irec *cleft, enum xfs_refc_adjust_op adjust) { unsigned long long ulen = left->rc_blockcount; xfs_nlink_t new_refcount; /* * For a left merge, the left shoulder record must be adjacent to the * start of the range. If this is true, find_left made left and cleft * contain valid contents. */ if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft)) return false; /* Left shoulder record refcount must match the new refcount. */ new_refcount = xfs_refc_merge_refcount(cleft, adjust); if (left->rc_refcount != new_refcount) return false; /* * The new record cannot exceed the max length. ulen is a ULL as the * individual record block counts can be up to (u32 - 1) in length * hence we need to catch u32 addition overflows here. */ ulen += cleft->rc_blockcount; if (ulen >= XFS_REFC_LEN_MAX) return false; return true; } static inline bool xfs_refc_want_merge_right( const struct xfs_refcount_irec *cright, const struct xfs_refcount_irec *right, enum xfs_refc_adjust_op adjust) { unsigned long long ulen = right->rc_blockcount; xfs_nlink_t new_refcount; /* * For a right merge, the right shoulder record must be adjacent to the * end of the range. If this is true, find_right made cright and right * contain valid contents. */ if (!xfs_refc_valid(right) || !xfs_refc_valid(cright)) return false; /* Right shoulder record refcount must match the new refcount. */ new_refcount = xfs_refc_merge_refcount(cright, adjust); if (right->rc_refcount != new_refcount) return false; /* * The new record cannot exceed the max length. ulen is a ULL as the * individual record block counts can be up to (u32 - 1) in length * hence we need to catch u32 addition overflows here. */ ulen += cright->rc_blockcount; if (ulen >= XFS_REFC_LEN_MAX) return false; return true; } /* * Try to merge with any extents on the boundaries of the adjustment range. */ STATIC int xfs_refcount_merge_extents( struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t *agbno, xfs_extlen_t *aglen, enum xfs_refc_adjust_op adjust, bool *shape_changed) { struct xfs_refcount_irec left = {0}, cleft = {0}; struct xfs_refcount_irec cright = {0}, right = {0}; int error; unsigned long long ulen; bool cequal; *shape_changed = false; /* * Find the extent just below agbno [left], just above agbno [cleft], * just below (agbno + aglen) [cright], and just above (agbno + aglen) * [right]. */ error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain, *agbno, *aglen); if (error) return error; error = xfs_refcount_find_right_extents(cur, &right, &cright, domain, *agbno, *aglen); if (error) return error; /* No left or right extent to merge; exit. */ if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) return 0; cequal = (cleft.rc_startblock == cright.rc_startblock) && (cleft.rc_blockcount == cright.rc_blockcount); /* Try to merge left, cleft, and right. cleft must == cright. */ if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal, adjust, &ulen)) { *shape_changed = true; return xfs_refcount_merge_center_extents(cur, &left, &cleft, &right, ulen, aglen); } /* Try to merge left and cleft. */ if (xfs_refc_want_merge_left(&left, &cleft, adjust)) { *shape_changed = true; error = xfs_refcount_merge_left_extent(cur, &left, &cleft, agbno, aglen); if (error) return error; /* * If we just merged left + cleft and cleft == cright, * we no longer have a cright to merge with right. We're done. */ if (cequal) return 0; } /* Try to merge cright and right. */ if (xfs_refc_want_merge_right(&cright, &right, adjust)) { *shape_changed = true; return xfs_refcount_merge_right_extent(cur, &right, &cright, aglen); } return 0; } /* * XXX: This is a pretty hand-wavy estimate. The penalty for guessing * true incorrectly is a shutdown FS; the penalty for guessing false * incorrectly is more transaction rolls than might be necessary. * Be conservative here. */ static bool xfs_refcount_still_have_space( struct xfs_btree_cur *cur) { unsigned long overhead; /* * Worst case estimate: full splits of the free space and rmap btrees * to handle each of the shape changes to the refcount btree. */ overhead = xfs_allocfree_block_count(cur->bc_mp, cur->bc_refc.shape_changes); overhead += cur->bc_maxlevels; overhead *= cur->bc_mp->m_sb.sb_blocksize; /* * Only allow 2 refcount extent updates per transaction if the * refcount continue update "error" has been injected. */ if (cur->bc_refc.nr_ops > 2 && XFS_TEST_ERROR(false, cur->bc_mp, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) return false; if (cur->bc_refc.nr_ops == 0) return true; else if (overhead > cur->bc_tp->t_log_res) return false; return cur->bc_tp->t_log_res - overhead > cur->bc_refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; } /* Schedule an extent free. */ static int xrefc_free_extent( struct xfs_btree_cur *cur, struct xfs_refcount_irec *rec) { unsigned int flags = 0; if (xfs_btree_is_rtrefcount(cur->bc_ops)) flags |= XFS_FREE_EXTENT_REALTIME; return xfs_free_extent_later(cur->bc_tp, xfs_gbno_to_fsb(cur->bc_group, rec->rc_startblock), rec->rc_blockcount, NULL, XFS_AG_RESV_NONE, flags); } /* * Adjust the refcounts of middle extents. At this point we should have * split extents that crossed the adjustment range; merged with adjacent * extents; and updated agbno/aglen to reflect the merges. Therefore, * all we have to do is update the extents inside [agbno, agbno + aglen]. */ STATIC int xfs_refcount_adjust_extents( struct xfs_btree_cur *cur, xfs_agblock_t *agbno, xfs_extlen_t *aglen, enum xfs_refc_adjust_op adj) { struct xfs_refcount_irec ext, tmp; int error; int found_rec, found_tmp; /* Merging did all the work already. */ if (*aglen == 0) return 0; error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, &found_rec); if (error) goto out_error; while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) { ext.rc_startblock = xfs_group_max_blocks(cur->bc_group); ext.rc_blockcount = 0; ext.rc_refcount = 0; ext.rc_domain = XFS_REFC_DOMAIN_SHARED; } /* * Deal with a hole in the refcount tree; if a file maps to * these blocks and there's no refcountbt record, pretend that * there is one with refcount == 1. */ if (ext.rc_startblock != *agbno) { tmp.rc_startblock = *agbno; tmp.rc_blockcount = min(*aglen, ext.rc_startblock - *agbno); tmp.rc_refcount = 1 + adj; tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; trace_xfs_refcount_modify_extent(cur, &tmp); /* * Either cover the hole (increment) or * delete the range (decrement). */ cur->bc_refc.nr_ops++; if (tmp.rc_refcount) { error = xfs_refcount_insert(cur, &tmp, &found_tmp); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } } else { error = xrefc_free_extent(cur, &tmp); if (error) goto out_error; } (*agbno) += tmp.rc_blockcount; (*aglen) -= tmp.rc_blockcount; /* Stop if there's nothing left to modify */ if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) break; /* Move the cursor to the start of ext. */ error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, &found_rec); if (error) goto out_error; } /* * A previous step trimmed agbno/aglen such that the end of the * range would not be in the middle of the record. If this is * no longer the case, something is seriously wrong with the * btree. Make sure we never feed the synthesized record into * the processing loop below. */ if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } /* * Adjust the reference count and either update the tree * (incr) or free the blocks (decr). */ if (ext.rc_refcount == XFS_REFC_REFCOUNT_MAX) goto skip; ext.rc_refcount += adj; trace_xfs_refcount_modify_extent(cur, &ext); cur->bc_refc.nr_ops++; if (ext.rc_refcount > 1) { error = xfs_refcount_update(cur, &ext); if (error) goto out_error; } else if (ext.rc_refcount == 1) { error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } goto advloop; } else { error = xrefc_free_extent(cur, &ext); if (error) goto out_error; } skip: error = xfs_btree_increment(cur, 0, &found_rec); if (error) goto out_error; advloop: (*agbno) += ext.rc_blockcount; (*aglen) -= ext.rc_blockcount; } return error; out_error: trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_); return error; } /* Adjust the reference count of a range of AG blocks. */ STATIC int xfs_refcount_adjust( struct xfs_btree_cur *cur, xfs_agblock_t *agbno, xfs_extlen_t *aglen, enum xfs_refc_adjust_op adj) { bool shape_changed; int shape_changes = 0; int error; if (adj == XFS_REFCOUNT_ADJUST_INCREASE) trace_xfs_refcount_increase(cur, *agbno, *aglen); else trace_xfs_refcount_decrease(cur, *agbno, *aglen); /* * Ensure that no rcextents cross the boundary of the adjustment range. */ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, *agbno, &shape_changed); if (error) goto out_error; if (shape_changed) shape_changes++; error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, *agbno + *aglen, &shape_changed); if (error) goto out_error; if (shape_changed) shape_changes++; /* * Try to merge with the left or right extents of the range. */ error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, agbno, aglen, adj, &shape_changed); if (error) goto out_error; if (shape_changed) shape_changes++; if (shape_changes) cur->bc_refc.shape_changes++; /* Now that we've taken care of the ends, adjust the middle extents */ error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj); if (error) goto out_error; return 0; out_error: trace_xfs_refcount_adjust_error(cur, error, _RET_IP_); return error; } /* * Set up a continuation a deferred refcount operation by updating the intent. * Checks to make sure we're not going to run off the end of the AG. */ static inline int xfs_refcount_continue_op( struct xfs_btree_cur *cur, struct xfs_refcount_intent *ri, xfs_agblock_t new_agbno) { struct xfs_mount *mp = cur->bc_mp; struct xfs_perag *pag = to_perag(cur->bc_group); if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, ri->ri_blockcount))) { xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } ri->ri_startblock = xfs_agbno_to_fsb(pag, new_agbno); ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount)); ASSERT(pag_agno(pag) == XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); return 0; } /* * Process one of the deferred refcount operations. We pass back the * btree cursor to maintain our lock on the btree between calls. * This saves time and eliminates a buffer deadlock between the * superblock and the AGF because we'll always grab them in the same * order. */ int xfs_refcount_finish_one( struct xfs_trans *tp, struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur) { struct xfs_mount *mp = tp->t_mountp; struct xfs_btree_cur *rcur = *pcur; struct xfs_buf *agbp = NULL; int error = 0; xfs_agblock_t bno; unsigned long nr_ops = 0; int shape_changes = 0; bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock); trace_xfs_refcount_deferred(mp, ri); if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) return -EIO; /* * If we haven't gotten a cursor or the cursor AG doesn't match * the startblock, get one now. */ if (rcur != NULL && rcur->bc_group != ri->ri_group) { nr_ops = rcur->bc_refc.nr_ops; shape_changes = rcur->bc_refc.shape_changes; xfs_btree_del_cursor(rcur, 0); rcur = NULL; *pcur = NULL; } if (rcur == NULL) { struct xfs_perag *pag = to_perag(ri->ri_group); error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_FREEING, &agbp); if (error) return error; *pcur = rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); rcur->bc_refc.nr_ops = nr_ops; rcur->bc_refc.shape_changes = shape_changes; } switch (ri->ri_type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, XFS_REFCOUNT_ADJUST_INCREASE); if (error) return error; if (ri->ri_blockcount > 0) error = xfs_refcount_continue_op(rcur, ri, bno); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, XFS_REFCOUNT_ADJUST_DECREASE); if (error) return error; if (ri->ri_blockcount > 0) error = xfs_refcount_continue_op(rcur, ri, bno); break; case XFS_REFCOUNT_ALLOC_COW: error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); if (error) return error; ri->ri_blockcount = 0; break; case XFS_REFCOUNT_FREE_COW: error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); if (error) return error; ri->ri_blockcount = 0; break; default: ASSERT(0); return -EFSCORRUPTED; } if (!error && ri->ri_blockcount > 0) trace_xfs_refcount_finish_one_leftover(mp, ri); return error; } /* * Set up a continuation a deferred rtrefcount operation by updating the * intent. Checks to make sure we're not going to run off the end of the * rtgroup. */ static inline int xfs_rtrefcount_continue_op( struct xfs_btree_cur *cur, struct xfs_refcount_intent *ri, xfs_agblock_t new_agbno) { struct xfs_mount *mp = cur->bc_mp; struct xfs_rtgroup *rtg = to_rtg(ri->ri_group); if (XFS_IS_CORRUPT(mp, !xfs_verify_rgbext(rtg, new_agbno, ri->ri_blockcount))) { xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } ri->ri_startblock = xfs_rgbno_to_rtb(rtg, new_agbno); ASSERT(xfs_verify_rtbext(mp, ri->ri_startblock, ri->ri_blockcount)); return 0; } /* * Process one of the deferred realtime refcount operations. We pass back the * btree cursor to maintain our lock on the btree between calls. */ int xfs_rtrefcount_finish_one( struct xfs_trans *tp, struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur) { struct xfs_mount *mp = tp->t_mountp; struct xfs_rtgroup *rtg = to_rtg(ri->ri_group); struct xfs_btree_cur *rcur = *pcur; int error = 0; xfs_rgblock_t bno; unsigned long nr_ops = 0; int shape_changes = 0; bno = xfs_rtb_to_rgbno(mp, ri->ri_startblock); trace_xfs_refcount_deferred(mp, ri); if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) return -EIO; /* * If we haven't gotten a cursor or the cursor AG doesn't match * the startblock, get one now. */ if (rcur != NULL && rcur->bc_group != ri->ri_group) { nr_ops = rcur->bc_refc.nr_ops; shape_changes = rcur->bc_refc.shape_changes; xfs_btree_del_cursor(rcur, 0); rcur = NULL; *pcur = NULL; } if (rcur == NULL) { xfs_rtgroup_lock(rtg, XFS_RTGLOCK_REFCOUNT); xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_REFCOUNT); *pcur = rcur = xfs_rtrefcountbt_init_cursor(tp, rtg); rcur->bc_refc.nr_ops = nr_ops; rcur->bc_refc.shape_changes = shape_changes; } switch (ri->ri_type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, XFS_REFCOUNT_ADJUST_INCREASE); if (error) return error; if (ri->ri_blockcount > 0) error = xfs_rtrefcount_continue_op(rcur, ri, bno); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, XFS_REFCOUNT_ADJUST_DECREASE); if (error) return error; if (ri->ri_blockcount > 0) error = xfs_rtrefcount_continue_op(rcur, ri, bno); break; case XFS_REFCOUNT_ALLOC_COW: error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); if (error) return error; ri->ri_blockcount = 0; break; case XFS_REFCOUNT_FREE_COW: error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); if (error) return error; ri->ri_blockcount = 0; break; default: ASSERT(0); return -EFSCORRUPTED; } if (!error && ri->ri_blockcount > 0) trace_xfs_refcount_finish_one_leftover(mp, ri); return error; } /* * Record a refcount intent for later processing. */ static void __xfs_refcount_add( struct xfs_trans *tp, enum xfs_refcount_intent_type type, bool isrt, xfs_fsblock_t startblock, xfs_extlen_t blockcount) { struct xfs_refcount_intent *ri; ri = kmem_cache_alloc(xfs_refcount_intent_cache, GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_startblock = startblock; ri->ri_blockcount = blockcount; ri->ri_realtime = isrt; xfs_refcount_defer_add(tp, ri); } /* * Increase the reference count of the blocks backing a file's extent. */ void xfs_refcount_increase_extent( struct xfs_trans *tp, bool isrt, struct xfs_bmbt_irec *PREV) { if (!xfs_has_reflink(tp->t_mountp)) return; __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, isrt, PREV->br_startblock, PREV->br_blockcount); } /* * Decrease the reference count of the blocks backing a file's extent. */ void xfs_refcount_decrease_extent( struct xfs_trans *tp, bool isrt, struct xfs_bmbt_irec *PREV) { if (!xfs_has_reflink(tp->t_mountp)) return; __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, isrt, PREV->br_startblock, PREV->br_blockcount); } /* * Given an AG extent, find the lowest-numbered run of shared blocks * within that range and return the range in fbno/flen. If * find_end_of_shared is set, return the longest contiguous extent of * shared blocks; if not, just return the first extent we find. If no * shared blocks are found, fbno and flen will be set to NULLAGBLOCK * and 0, respectively. */ int xfs_refcount_find_shared( struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_end_of_shared) { struct xfs_refcount_irec tmp; int i; int have; int error; trace_xfs_refcount_find_shared(cur, agbno, aglen); /* By default, skip the whole range */ *fbno = NULLAGBLOCK; *flen = 0; /* Try to find a refcount extent that crosses the start */ error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno, &have); if (error) goto out_error; if (!have) { /* No left extent, look at the next one */ error = xfs_btree_increment(cur, 0, &have); if (error) goto out_error; if (!have) goto done; } error = xfs_refcount_get_rec(cur, &tmp, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) goto done; /* If the extent ends before the start, look at the next one */ if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { error = xfs_btree_increment(cur, 0, &have); if (error) goto out_error; if (!have) goto done; error = xfs_refcount_get_rec(cur, &tmp, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) goto done; } /* If the extent starts after the range we want, bail out */ if (tmp.rc_startblock >= agbno + aglen) goto done; /* We found the start of a shared extent! */ if (tmp.rc_startblock < agbno) { tmp.rc_blockcount -= (agbno - tmp.rc_startblock); tmp.rc_startblock = agbno; } *fbno = tmp.rc_startblock; *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); if (!find_end_of_shared) goto done; /* Otherwise, find the end of this shared extent */ while (*fbno + *flen < agbno + aglen) { error = xfs_btree_increment(cur, 0, &have); if (error) goto out_error; if (!have) break; error = xfs_refcount_get_rec(cur, &tmp, &i); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED || tmp.rc_startblock >= agbno + aglen || tmp.rc_startblock != *fbno + *flen) break; *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); } done: trace_xfs_refcount_find_shared_result(cur, *fbno, *flen); out_error: if (error) trace_xfs_refcount_find_shared_error(cur, error, _RET_IP_); return error; } /* * Recovering CoW Blocks After a Crash * * Due to the way that the copy on write mechanism works, there's a window of * opportunity in which we can lose track of allocated blocks during a crash. * Because CoW uses delayed allocation in the in-core CoW fork, writeback * causes blocks to be allocated and stored in the CoW fork. The blocks are * no longer in the free space btree but are not otherwise recorded anywhere * until the write completes and the blocks are mapped into the file. A crash * in between allocation and remapping results in the replacement blocks being * lost. This situation is exacerbated by the CoW extent size hint because * allocations can hang around for long time. * * However, there is a place where we can record these allocations before they * become mappings -- the reference count btree. The btree does not record * extents with refcount == 1, so we can record allocations with a refcount of * 1. Blocks being used for CoW writeout cannot be shared, so there should be * no conflict with shared block records. These mappings should be created * when we allocate blocks to the CoW fork and deleted when they're removed * from the CoW fork. * * Minor nit: records for in-progress CoW allocations and records for shared * extents must never be merged, to preserve the property that (except for CoW * allocations) there are no refcount btree entries with refcount == 1. The * only time this could potentially happen is when unsharing a block that's * adjacent to CoW allocations, so we must be careful to avoid this. * * At mount time we recover lost CoW allocations by searching the refcount * btree for these refcount == 1 mappings. These represent CoW allocations * that were in progress at the time the filesystem went down, so we can free * them to get the space back. * * This mechanism is superior to creating EFIs for unmapped CoW extents for * several reasons -- first, EFIs pin the tail of the log and would have to be * periodically relogged to avoid filling up the log. Second, CoW completions * will have to file an EFD and create new EFIs for whatever remains in the * CoW fork; this partially takes care of (1) but extent-size reservations * will have to periodically relog even if there's no writeout in progress. * This can happen if the CoW extent size hint is set, which you really want. * Third, EFIs cannot currently be automatically relogged into newer * transactions to advance the log tail. Fourth, stuffing the log full of * EFIs places an upper bound on the number of CoW allocations that can be * held filesystem-wide at any given time. Recording them in the refcount * btree doesn't require us to maintain any state in memory and doesn't pin * the log. */ /* * Adjust the refcounts of CoW allocations. These allocations are "magic" * in that they're not referenced anywhere else in the filesystem, so we * stash them in the refcount btree with a refcount of 1 until either file * remapping (or CoW cancellation) happens. */ STATIC int xfs_refcount_adjust_cow_extents( struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t aglen, enum xfs_refc_adjust_op adj) { struct xfs_refcount_irec ext, tmp; int error; int found_rec, found_tmp; if (aglen == 0) return 0; /* Find any overlapping refcount records */ error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno, &found_rec); if (error) goto out_error; error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && ext.rc_domain != XFS_REFC_DOMAIN_COW)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (!found_rec) { ext.rc_startblock = xfs_group_max_blocks(cur->bc_group); ext.rc_blockcount = 0; ext.rc_refcount = 0; ext.rc_domain = XFS_REFC_DOMAIN_COW; } switch (adj) { case XFS_REFCOUNT_ADJUST_COW_ALLOC: /* Adding a CoW reservation, there should be nothing here. */ if (XFS_IS_CORRUPT(cur->bc_mp, agbno + aglen > ext.rc_startblock)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } tmp.rc_startblock = agbno; tmp.rc_blockcount = aglen; tmp.rc_refcount = 1; tmp.rc_domain = XFS_REFC_DOMAIN_COW; trace_xfs_refcount_modify_extent(cur, &tmp); error = xfs_refcount_insert(cur, &tmp, &found_tmp); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } break; case XFS_REFCOUNT_ADJUST_COW_FREE: /* Removing a CoW reservation, there should be one extent. */ if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } ext.rc_refcount = 0; trace_xfs_refcount_modify_extent(cur, &ext); error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } break; default: ASSERT(0); } return error; out_error: trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_); return error; } /* * Add or remove refcount btree entries for CoW reservations. */ STATIC int xfs_refcount_adjust_cow( struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t aglen, enum xfs_refc_adjust_op adj) { bool shape_changed; int error; /* * Ensure that no rcextents cross the boundary of the adjustment range. */ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, agbno, &shape_changed); if (error) goto out_error; error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, agbno + aglen, &shape_changed); if (error) goto out_error; /* * Try to merge with the left or right extents of the range. */ error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno, &aglen, adj, &shape_changed); if (error) goto out_error; /* Now that we've taken care of the ends, adjust the middle extents */ error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); if (error) goto out_error; return 0; out_error: trace_xfs_refcount_adjust_cow_error(cur, error, _RET_IP_); return error; } /* * Record a CoW allocation in the refcount btree. */ STATIC int __xfs_refcount_cow_alloc( struct xfs_btree_cur *rcur, xfs_agblock_t agbno, xfs_extlen_t aglen) { trace_xfs_refcount_cow_increase(rcur, agbno, aglen); /* Add refcount btree reservation */ return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_ALLOC); } /* * Remove a CoW allocation from the refcount btree. */ STATIC int __xfs_refcount_cow_free( struct xfs_btree_cur *rcur, xfs_agblock_t agbno, xfs_extlen_t aglen) { trace_xfs_refcount_cow_decrease(rcur, agbno, aglen); /* Remove refcount btree reservation */ return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_FREE); } /* Record a CoW staging extent in the refcount btree. */ void xfs_refcount_alloc_cow_extent( struct xfs_trans *tp, bool isrt, xfs_fsblock_t fsb, xfs_extlen_t len) { struct xfs_mount *mp = tp->t_mountp; if (!xfs_has_reflink(mp)) return; __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, isrt, fsb, len); /* Add rmap entry */ xfs_rmap_alloc_extent(tp, isrt, fsb, len, XFS_RMAP_OWN_COW); } /* Forget a CoW staging event in the refcount btree. */ void xfs_refcount_free_cow_extent( struct xfs_trans *tp, bool isrt, xfs_fsblock_t fsb, xfs_extlen_t len) { struct xfs_mount *mp = tp->t_mountp; if (!xfs_has_reflink(mp)) return; /* Remove rmap entry */ xfs_rmap_free_extent(tp, isrt, fsb, len, XFS_RMAP_OWN_COW); __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, isrt, fsb, len); } struct xfs_refcount_recovery { struct list_head rr_list; struct xfs_refcount_irec rr_rrec; }; /* Stuff an extent on the recovery list. */ STATIC int xfs_refcount_recover_extent( struct xfs_btree_cur *cur, const union xfs_btree_rec *rec, void *priv) { struct list_head *debris = priv; struct xfs_refcount_recovery *rr; if (XFS_IS_CORRUPT(cur->bc_mp, be32_to_cpu(rec->refc.rc_refcount) != 1)) { xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } rr = kmalloc(sizeof(struct xfs_refcount_recovery), GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&rr->rr_list); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); if (xfs_refcount_check_btrec(cur, &rr->rr_rrec) != NULL || XFS_IS_CORRUPT(cur->bc_mp, rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { xfs_btree_mark_sick(cur); kfree(rr); return -EFSCORRUPTED; } list_add_tail(&rr->rr_list, debris); return 0; } /* Find and remove leftover CoW reservations. */ int xfs_refcount_recover_cow_leftovers( struct xfs_group *xg) { struct xfs_mount *mp = xg->xg_mount; bool isrt = xg->xg_type == XG_TYPE_RTG; struct xfs_trans *tp; struct xfs_btree_cur *cur; struct xfs_buf *agbp = NULL; struct xfs_refcount_recovery *rr, *n; struct list_head debris; union xfs_btree_irec low = { .rc.rc_domain = XFS_REFC_DOMAIN_COW, }; union xfs_btree_irec high = { .rc.rc_domain = XFS_REFC_DOMAIN_COW, .rc.rc_startblock = -1U, }; xfs_fsblock_t fsb; int error; /* reflink filesystems must not have groups larger than 2^31-1 blocks */ BUILD_BUG_ON(XFS_MAX_RGBLOCKS >= XFS_REFC_COWFLAG); BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG); if (isrt) { if (!xfs_has_rtgroups(mp)) return 0; if (xfs_group_max_blocks(xg) >= XFS_MAX_RGBLOCKS) return -EOPNOTSUPP; } else { if (xfs_group_max_blocks(xg) > XFS_MAX_CRC_AG_BLOCKS) return -EOPNOTSUPP; } INIT_LIST_HEAD(&debris); /* * In this first part, we use an empty transaction to gather up * all the leftover CoW extents so that we can subsequently * delete them. The empty transaction is used to avoid * a buffer lock deadlock if there happens to be a loop in the * refcountbt because we're allowed to re-grab a buffer that is * already attached to our transaction. When we're done * recording the CoW debris we cancel the (empty) transaction * and everything goes away cleanly. */ error = xfs_trans_alloc_empty(mp, &tp); if (error) return error; if (isrt) { xfs_rtgroup_lock(to_rtg(xg), XFS_RTGLOCK_REFCOUNT); cur = xfs_rtrefcountbt_init_cursor(tp, to_rtg(xg)); } else { error = xfs_alloc_read_agf(to_perag(xg), tp, 0, &agbp); if (error) goto out_trans; cur = xfs_refcountbt_init_cursor(mp, tp, agbp, to_perag(xg)); } /* Find all the leftover CoW staging extents. */ error = xfs_btree_query_range(cur, &low, &high, xfs_refcount_recover_extent, &debris); xfs_btree_del_cursor(cur, error); if (agbp) xfs_trans_brelse(tp, agbp); else xfs_rtgroup_unlock(to_rtg(xg), XFS_RTGLOCK_REFCOUNT); xfs_trans_cancel(tp); if (error) goto out_free; /* Now iterate the list to free the leftovers */ list_for_each_entry_safe(rr, n, &debris, rr_list) { /* Set up transaction. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); if (error) goto out_free; /* Free the orphan record */ fsb = xfs_gbno_to_fsb(xg, rr->rr_rrec.rc_startblock); xfs_refcount_free_cow_extent(tp, isrt, fsb, rr->rr_rrec.rc_blockcount); /* Free the block. */ error = xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL, XFS_AG_RESV_NONE, isrt ? XFS_FREE_EXTENT_REALTIME : 0); if (error) goto out_trans; error = xfs_trans_commit(tp); if (error) goto out_free; list_del(&rr->rr_list); kfree(rr); } return error; out_trans: xfs_trans_cancel(tp); out_free: /* Free the leftover list */ list_for_each_entry_safe(rr, n, &debris, rr_list) { list_del(&rr->rr_list); kfree(rr); } return error; } /* * Scan part of the keyspace of the refcount records and tell us if the area * has no records, is fully mapped by records, or is partially filled. */ int xfs_refcount_has_records( struct xfs_btree_cur *cur, enum xfs_refc_domain domain, xfs_agblock_t bno, xfs_extlen_t len, enum xbtree_recpacking *outcome) { union xfs_btree_irec low; union xfs_btree_irec high; memset(&low, 0, sizeof(low)); low.rc.rc_startblock = bno; memset(&high, 0xFF, sizeof(high)); high.rc.rc_startblock = bno + len - 1; low.rc.rc_domain = high.rc.rc_domain = domain; return xfs_btree_has_records(cur, &low, &high, NULL, outcome); } struct xfs_refcount_query_range_info { xfs_refcount_query_range_fn fn; void *priv; }; /* Format btree record and pass to our callback. */ STATIC int xfs_refcount_query_range_helper( struct xfs_btree_cur *cur, const union xfs_btree_rec *rec, void *priv) { struct xfs_refcount_query_range_info *query = priv; struct xfs_refcount_irec irec; xfs_failaddr_t fa; xfs_refcount_btrec_to_irec(rec, &irec); fa = xfs_refcount_check_btrec(cur, &irec); if (fa) return xfs_refcount_complain_bad_rec(cur, fa, &irec); return query->fn(cur, &irec, query->priv); } /* Find all refcount records between two keys. */ int xfs_refcount_query_range( struct xfs_btree_cur *cur, const struct xfs_refcount_irec *low_rec, const struct xfs_refcount_irec *high_rec, xfs_refcount_query_range_fn fn, void *priv) { union xfs_btree_irec low_brec = { .rc = *low_rec }; union xfs_btree_irec high_brec = { .rc = *high_rec }; struct xfs_refcount_query_range_info query = { .priv = priv, .fn = fn }; return xfs_btree_query_range(cur, &low_brec, &high_brec, xfs_refcount_query_range_helper, &query); } int __init xfs_refcount_intent_init_cache(void) { xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", sizeof(struct xfs_refcount_intent), 0, 0, NULL); return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; } void xfs_refcount_intent_destroy_cache(void) { kmem_cache_destroy(xfs_refcount_intent_cache); xfs_refcount_intent_cache = NULL; }
1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 // SPDX-License-Identifier: GPL-2.0-or-later /* * NetLabel Unlabeled Support * * This file defines functions for dealing with unlabeled packets for the * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * * Author: Paul Moore <paul@paul-moore.com> */ /* * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2008 */ #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/socket.h> #include <linux/string.h> #include <linux/skbuff.h> #include <linux/audit.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/notifier.h> #include <linux/netdevice.h> #include <linux/security.h> #include <linux/slab.h> #include <net/sock.h> #include <net/netlink.h> #include <net/genetlink.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/net_namespace.h> #include <net/netlabel.h> #include <asm/bug.h> #include <linux/atomic.h> #include "netlabel_user.h" #include "netlabel_addrlist.h" #include "netlabel_domainhash.h" #include "netlabel_unlabeled.h" #include "netlabel_mgmt.h" /* NOTE: at present we always use init's network namespace since we don't * presently support different namespaces even though the majority of * the functions in this file are "namespace safe" */ /* The unlabeled connection hash table which we use to map network interfaces * and addresses of unlabeled packets to a user specified secid value for the * LSM. The hash table is used to lookup the network interface entry * (struct netlbl_unlhsh_iface) and then the interface entry is used to * lookup an IP address match from an ordered list. If a network interface * match can not be found in the hash table then the default entry * (netlbl_unlhsh_def) is used. The IP address entry list * (struct netlbl_unlhsh_addr) is ordered such that the entries with a * larger netmask come first. */ struct netlbl_unlhsh_tbl { struct list_head *tbl; u32 size; }; #define netlbl_unlhsh_addr4_entry(iter) \ container_of(iter, struct netlbl_unlhsh_addr4, list) struct netlbl_unlhsh_addr4 { u32 secid; struct netlbl_af4list list; struct rcu_head rcu; }; #define netlbl_unlhsh_addr6_entry(iter) \ container_of(iter, struct netlbl_unlhsh_addr6, list) struct netlbl_unlhsh_addr6 { u32 secid; struct netlbl_af6list list; struct rcu_head rcu; }; struct netlbl_unlhsh_iface { int ifindex; struct list_head addr4_list; struct list_head addr6_list; u32 valid; struct list_head list; struct rcu_head rcu; }; /* Argument struct for netlbl_unlhsh_walk() */ struct netlbl_unlhsh_walk_arg { struct netlink_callback *nl_cb; struct sk_buff *skb; u32 seq; }; /* Unlabeled connection hash table */ /* updates should be so rare that having one spinlock for the entire * hash table should be okay */ static DEFINE_SPINLOCK(netlbl_unlhsh_lock); #define netlbl_unlhsh_rcu_deref(p) \ rcu_dereference_check(p, lockdep_is_held(&netlbl_unlhsh_lock)) static struct netlbl_unlhsh_tbl __rcu *netlbl_unlhsh; static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def; /* Accept unlabeled packets flag */ static u8 netlabel_unlabel_acceptflg; /* NetLabel Generic NETLINK unlabeled family */ static struct genl_family netlbl_unlabel_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 }, [NLBL_UNLABEL_A_IPV6ADDR] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr) }, [NLBL_UNLABEL_A_IPV6MASK] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr) }, [NLBL_UNLABEL_A_IPV4ADDR] = { .type = NLA_BINARY, .len = sizeof(struct in_addr) }, [NLBL_UNLABEL_A_IPV4MASK] = { .type = NLA_BINARY, .len = sizeof(struct in_addr) }, [NLBL_UNLABEL_A_IFACE] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [NLBL_UNLABEL_A_SECCTX] = { .type = NLA_BINARY } }; /* * Unlabeled Connection Hash Table Functions */ /** * netlbl_unlhsh_free_iface - Frees an interface entry from the hash table * @entry: the entry's RCU field * * Description: * This function is designed to be used as a callback to the call_rcu() * function so that memory allocated to a hash table interface entry can be * released safely. It is important to note that this function does not free * the IPv4 and IPv6 address lists contained as part of an interface entry. It * is up to the rest of the code to make sure an interface entry is only freed * once it's address lists are empty. * */ static void netlbl_unlhsh_free_iface(struct rcu_head *entry) { struct netlbl_unlhsh_iface *iface; struct netlbl_af4list *iter4; struct netlbl_af4list *tmp4; #if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; struct netlbl_af6list *tmp6; #endif /* IPv6 */ iface = container_of(entry, struct netlbl_unlhsh_iface, rcu); /* no need for locks here since we are the only one with access to this * structure */ netlbl_af4list_foreach_safe(iter4, tmp4, &iface->addr4_list) { netlbl_af4list_remove_entry(iter4); kfree(netlbl_unlhsh_addr4_entry(iter4)); } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) { netlbl_af6list_remove_entry(iter6); kfree(netlbl_unlhsh_addr6_entry(iter6)); } #endif /* IPv6 */ kfree(iface); } /** * netlbl_unlhsh_hash - Hashing function for the hash table * @ifindex: the network interface/device to hash * * Description: * This is the hashing function for the unlabeled hash table, it returns the * bucket number for the given device/interface. The caller is responsible for * ensuring that the hash table is protected with either a RCU read lock or * the hash table lock. * */ static u32 netlbl_unlhsh_hash(int ifindex) { return ifindex & (netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->size - 1); } /** * netlbl_unlhsh_search_iface - Search for a matching interface entry * @ifindex: the network interface * * Description: * Searches the unlabeled connection hash table and returns a pointer to the * interface entry which matches @ifindex, otherwise NULL is returned. The * caller is responsible for ensuring that the hash table is protected with * either a RCU read lock or the hash table lock. * */ static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex) { u32 bkt; struct list_head *bkt_list; struct netlbl_unlhsh_iface *iter; bkt = netlbl_unlhsh_hash(ifindex); bkt_list = &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt]; list_for_each_entry_rcu(iter, bkt_list, list, lockdep_is_held(&netlbl_unlhsh_lock)) if (iter->valid && iter->ifindex == ifindex) return iter; return NULL; } /** * netlbl_unlhsh_add_addr4 - Add a new IPv4 address entry to the hash table * @iface: the associated interface entry * @addr: IPv4 address in network byte order * @mask: IPv4 address mask in network byte order * @secid: LSM secid value for entry * * Description: * Add a new address entry into the unlabeled connection hash table using the * interface entry specified by @iface. On success zero is returned, otherwise * a negative value is returned. * */ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface, const struct in_addr *addr, const struct in_addr *mask, u32 secid) { int ret_val; struct netlbl_unlhsh_addr4 *entry; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) return -ENOMEM; entry->list.addr = addr->s_addr & mask->s_addr; entry->list.mask = mask->s_addr; entry->list.valid = 1; entry->secid = secid; spin_lock(&netlbl_unlhsh_lock); ret_val = netlbl_af4list_add(&entry->list, &iface->addr4_list); spin_unlock(&netlbl_unlhsh_lock); if (ret_val != 0) kfree(entry); return ret_val; } #if IS_ENABLED(CONFIG_IPV6) /** * netlbl_unlhsh_add_addr6 - Add a new IPv6 address entry to the hash table * @iface: the associated interface entry * @addr: IPv6 address in network byte order * @mask: IPv6 address mask in network byte order * @secid: LSM secid value for entry * * Description: * Add a new address entry into the unlabeled connection hash table using the * interface entry specified by @iface. On success zero is returned, otherwise * a negative value is returned. * */ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface, const struct in6_addr *addr, const struct in6_addr *mask, u32 secid) { int ret_val; struct netlbl_unlhsh_addr6 *entry; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) return -ENOMEM; entry->list.addr = *addr; entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; entry->list.mask = *mask; entry->list.valid = 1; entry->secid = secid; spin_lock(&netlbl_unlhsh_lock); ret_val = netlbl_af6list_add(&entry->list, &iface->addr6_list); spin_unlock(&netlbl_unlhsh_lock); if (ret_val != 0) kfree(entry); return 0; } #endif /* IPv6 */ /** * netlbl_unlhsh_add_iface - Adds a new interface entry to the hash table * @ifindex: network interface * * Description: * Add a new, empty, interface entry into the unlabeled connection hash table. * On success a pointer to the new interface entry is returned, on failure NULL * is returned. * */ static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex) { u32 bkt; struct netlbl_unlhsh_iface *iface; iface = kzalloc(sizeof(*iface), GFP_ATOMIC); if (iface == NULL) return NULL; iface->ifindex = ifindex; INIT_LIST_HEAD(&iface->addr4_list); INIT_LIST_HEAD(&iface->addr6_list); iface->valid = 1; spin_lock(&netlbl_unlhsh_lock); if (ifindex > 0) { bkt = netlbl_unlhsh_hash(ifindex); if (netlbl_unlhsh_search_iface(ifindex) != NULL) goto add_iface_failure; list_add_tail_rcu(&iface->list, &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt]); } else { INIT_LIST_HEAD(&iface->list); if (netlbl_unlhsh_rcu_deref(netlbl_unlhsh_def) != NULL) goto add_iface_failure; rcu_assign_pointer(netlbl_unlhsh_def, iface); } spin_unlock(&netlbl_unlhsh_lock); return iface; add_iface_failure: spin_unlock(&netlbl_unlhsh_lock); kfree(iface); return NULL; } /** * netlbl_unlhsh_add - Adds a new entry to the unlabeled connection hash table * @net: network namespace * @dev_name: interface name * @addr: IP address in network byte order * @mask: address mask in network byte order * @addr_len: length of address/mask (4 for IPv4, 16 for IPv6) * @secid: LSM secid value for the entry * @audit_info: NetLabel audit information * * Description: * Adds a new entry to the unlabeled connection hash table. Returns zero on * success, negative values on failure. * */ int netlbl_unlhsh_add(struct net *net, const char *dev_name, const void *addr, const void *mask, u32 addr_len, u32 secid, struct netlbl_audit *audit_info) { int ret_val; int ifindex; struct net_device *dev; struct netlbl_unlhsh_iface *iface; struct audit_buffer *audit_buf = NULL; struct lsm_context ctx; if (addr_len != sizeof(struct in_addr) && addr_len != sizeof(struct in6_addr)) return -EINVAL; rcu_read_lock(); if (dev_name != NULL) { dev = dev_get_by_name_rcu(net, dev_name); if (dev == NULL) { ret_val = -ENODEV; goto unlhsh_add_return; } ifindex = dev->ifindex; iface = netlbl_unlhsh_search_iface(ifindex); } else { ifindex = 0; iface = rcu_dereference(netlbl_unlhsh_def); } if (iface == NULL) { iface = netlbl_unlhsh_add_iface(ifindex); if (iface == NULL) { ret_val = -ENOMEM; goto unlhsh_add_return; } } audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCADD, audit_info); switch (addr_len) { case sizeof(struct in_addr): { const struct in_addr *addr4 = addr; const struct in_addr *mask4 = mask; ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); if (audit_buf != NULL) netlbl_af4list_audit_addr(audit_buf, 1, dev_name, addr4->s_addr, mask4->s_addr); break; } #if IS_ENABLED(CONFIG_IPV6) case sizeof(struct in6_addr): { const struct in6_addr *addr6 = addr; const struct in6_addr *mask6 = mask; ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); if (audit_buf != NULL) netlbl_af6list_audit_addr(audit_buf, 1, dev_name, addr6, mask6); break; } #endif /* IPv6 */ default: ret_val = -EINVAL; } if (ret_val == 0) atomic_inc(&netlabel_mgmt_protocount); unlhsh_add_return: rcu_read_unlock(); if (audit_buf != NULL) { if (security_secid_to_secctx(secid, &ctx) >= 0) { audit_log_format(audit_buf, " sec_obj=%s", ctx.context); security_release_secctx(&ctx); } audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); } return ret_val; } /** * netlbl_unlhsh_remove_addr4 - Remove an IPv4 address entry * @net: network namespace * @iface: interface entry * @addr: IP address * @mask: IP address mask * @audit_info: NetLabel audit information * * Description: * Remove an IP address entry from the unlabeled connection hash table. * Returns zero on success, negative values on failure. * */ static int netlbl_unlhsh_remove_addr4(struct net *net, struct netlbl_unlhsh_iface *iface, const struct in_addr *addr, const struct in_addr *mask, struct netlbl_audit *audit_info) { struct netlbl_af4list *list_entry; struct netlbl_unlhsh_addr4 *entry; struct audit_buffer *audit_buf; struct net_device *dev; struct lsm_context ctx; spin_lock(&netlbl_unlhsh_lock); list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr, &iface->addr4_list); spin_unlock(&netlbl_unlhsh_lock); if (list_entry != NULL) entry = netlbl_unlhsh_addr4_entry(list_entry); else entry = NULL; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); if (audit_buf != NULL) { dev = dev_get_by_index(net, iface->ifindex); netlbl_af4list_audit_addr(audit_buf, 1, (dev != NULL ? dev->name : NULL), addr->s_addr, mask->s_addr); dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, &ctx) >= 0) { audit_log_format(audit_buf, " sec_obj=%s", ctx.context); security_release_secctx(&ctx); } audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); } if (entry == NULL) return -ENOENT; kfree_rcu(entry, rcu); return 0; } #if IS_ENABLED(CONFIG_IPV6) /** * netlbl_unlhsh_remove_addr6 - Remove an IPv6 address entry * @net: network namespace * @iface: interface entry * @addr: IP address * @mask: IP address mask * @audit_info: NetLabel audit information * * Description: * Remove an IP address entry from the unlabeled connection hash table. * Returns zero on success, negative values on failure. * */ static int netlbl_unlhsh_remove_addr6(struct net *net, struct netlbl_unlhsh_iface *iface, const struct in6_addr *addr, const struct in6_addr *mask, struct netlbl_audit *audit_info) { struct netlbl_af6list *list_entry; struct netlbl_unlhsh_addr6 *entry; struct audit_buffer *audit_buf; struct net_device *dev; struct lsm_context ctx; spin_lock(&netlbl_unlhsh_lock); list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list); spin_unlock(&netlbl_unlhsh_lock); if (list_entry != NULL) entry = netlbl_unlhsh_addr6_entry(list_entry); else entry = NULL; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); if (audit_buf != NULL) { dev = dev_get_by_index(net, iface->ifindex); netlbl_af6list_audit_addr(audit_buf, 1, (dev != NULL ? dev->name : NULL), addr, mask); dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, &ctx) >= 0) { audit_log_format(audit_buf, " sec_obj=%s", ctx.context); security_release_secctx(&ctx); } audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); } if (entry == NULL) return -ENOENT; kfree_rcu(entry, rcu); return 0; } #endif /* IPv6 */ /** * netlbl_unlhsh_condremove_iface - Remove an interface entry * @iface: the interface entry * * Description: * Remove an interface entry from the unlabeled connection hash table if it is * empty. An interface entry is considered to be empty if there are no * address entries assigned to it. * */ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) { struct netlbl_af4list *iter4; #if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; #endif /* IPv6 */ spin_lock(&netlbl_unlhsh_lock); netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list) goto unlhsh_condremove_failure; #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list) goto unlhsh_condremove_failure; #endif /* IPv6 */ iface->valid = 0; if (iface->ifindex > 0) list_del_rcu(&iface->list); else RCU_INIT_POINTER(netlbl_unlhsh_def, NULL); spin_unlock(&netlbl_unlhsh_lock); call_rcu(&iface->rcu, netlbl_unlhsh_free_iface); return; unlhsh_condremove_failure: spin_unlock(&netlbl_unlhsh_lock); } /** * netlbl_unlhsh_remove - Remove an entry from the unlabeled hash table * @net: network namespace * @dev_name: interface name * @addr: IP address in network byte order * @mask: address mask in network byte order * @addr_len: length of address/mask (4 for IPv4, 16 for IPv6) * @audit_info: NetLabel audit information * * Description: * Removes and existing entry from the unlabeled connection hash table. * Returns zero on success, negative values on failure. * */ int netlbl_unlhsh_remove(struct net *net, const char *dev_name, const void *addr, const void *mask, u32 addr_len, struct netlbl_audit *audit_info) { int ret_val; struct net_device *dev; struct netlbl_unlhsh_iface *iface; if (addr_len != sizeof(struct in_addr) && addr_len != sizeof(struct in6_addr)) return -EINVAL; rcu_read_lock(); if (dev_name != NULL) { dev = dev_get_by_name_rcu(net, dev_name); if (dev == NULL) { ret_val = -ENODEV; goto unlhsh_remove_return; } iface = netlbl_unlhsh_search_iface(dev->ifindex); } else iface = rcu_dereference(netlbl_unlhsh_def); if (iface == NULL) { ret_val = -ENOENT; goto unlhsh_remove_return; } switch (addr_len) { case sizeof(struct in_addr): ret_val = netlbl_unlhsh_remove_addr4(net, iface, addr, mask, audit_info); break; #if IS_ENABLED(CONFIG_IPV6) case sizeof(struct in6_addr): ret_val = netlbl_unlhsh_remove_addr6(net, iface, addr, mask, audit_info); break; #endif /* IPv6 */ default: ret_val = -EINVAL; } if (ret_val == 0) { netlbl_unlhsh_condremove_iface(iface); atomic_dec(&netlabel_mgmt_protocount); } unlhsh_remove_return: rcu_read_unlock(); return ret_val; } /* * General Helper Functions */ /** * netlbl_unlhsh_netdev_handler - Network device notification handler * @this: notifier block * @event: the event * @ptr: the netdevice notifier info (cast to void) * * Description: * Handle network device events, although at present all we care about is a * network device going away. In the case of a device going away we clear any * related entries from the unlabeled connection hash table. * */ static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netlbl_unlhsh_iface *iface = NULL; if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* XXX - should this be a check for NETDEV_DOWN or _UNREGISTER? */ if (event == NETDEV_DOWN) { spin_lock(&netlbl_unlhsh_lock); iface = netlbl_unlhsh_search_iface(dev->ifindex); if (iface != NULL && iface->valid) { iface->valid = 0; list_del_rcu(&iface->list); } else iface = NULL; spin_unlock(&netlbl_unlhsh_lock); } if (iface != NULL) call_rcu(&iface->rcu, netlbl_unlhsh_free_iface); return NOTIFY_DONE; } /** * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag * @value: desired value * @audit_info: NetLabel audit information * * Description: * Set the value of the unlabeled accept flag to @value. * */ static void netlbl_unlabel_acceptflg_set(u8 value, struct netlbl_audit *audit_info) { struct audit_buffer *audit_buf; u8 old_val; old_val = netlabel_unlabel_acceptflg; netlabel_unlabel_acceptflg = value; audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW, audit_info); if (audit_buf != NULL) { audit_log_format(audit_buf, " unlbl_accept=%u old=%u", value, old_val); audit_log_end(audit_buf); } } /** * netlbl_unlabel_addrinfo_get - Get the IPv4/6 address information * @info: the Generic NETLINK info block * @addr: the IP address * @mask: the IP address mask * @len: the address length * * Description: * Examine the Generic NETLINK message and extract the IP address information. * Returns zero on success, negative values on failure. * */ static int netlbl_unlabel_addrinfo_get(struct genl_info *info, void **addr, void **mask, u32 *len) { u32 addr_len; if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] && info->attrs[NLBL_UNLABEL_A_IPV4MASK]) { addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); if (addr_len != sizeof(struct in_addr) && addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK])) return -EINVAL; *len = addr_len; *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4MASK]); return 0; } else if (info->attrs[NLBL_UNLABEL_A_IPV6ADDR]) { addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]); if (addr_len != sizeof(struct in6_addr) && addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV6MASK])) return -EINVAL; *len = addr_len; *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]); *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6MASK]); return 0; } return -EINVAL; } /* * NetLabel Command Handlers */ /** * netlbl_unlabel_accept - Handle an ACCEPT message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated ACCEPT message and set the accept flag accordingly. * Returns zero on success, negative values on failure. * */ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) { u8 value; struct netlbl_audit audit_info; if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) { value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]); if (value == 1 || value == 0) { netlbl_netlink_auditinfo(&audit_info); netlbl_unlabel_acceptflg_set(value, &audit_info); return 0; } } return -EINVAL; } /** * netlbl_unlabel_list - Handle a LIST message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated LIST message and respond with the current status. * Returns zero on success, negative values on failure. * */ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; struct sk_buff *ans_skb; void *data; ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (ans_skb == NULL) goto list_failure; data = genlmsg_put_reply(ans_skb, info, &netlbl_unlabel_gnl_family, 0, NLBL_UNLABEL_C_LIST); if (data == NULL) { ret_val = -ENOMEM; goto list_failure; } ret_val = nla_put_u8(ans_skb, NLBL_UNLABEL_A_ACPTFLG, netlabel_unlabel_acceptflg); if (ret_val != 0) goto list_failure; genlmsg_end(ans_skb, data); return genlmsg_reply(ans_skb, info); list_failure: kfree_skb(ans_skb); return ret_val; } /** * netlbl_unlabel_staticadd - Handle a STATICADD message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated STATICADD message and add a new unlabeled * connection entry to the hash table. Returns zero on success, negative * values on failure. * */ static int netlbl_unlabel_staticadd(struct sk_buff *skb, struct genl_info *info) { int ret_val; char *dev_name; void *addr; void *mask; u32 addr_len; u32 secid; struct netlbl_audit audit_info; /* Don't allow users to add both IPv4 and IPv6 addresses for a * single entry. However, allow users to create two entries, one each * for IPv4 and IPv6, with the same LSM security context which should * achieve the same result. */ if (!info->attrs[NLBL_UNLABEL_A_SECCTX] || !info->attrs[NLBL_UNLABEL_A_IFACE] || !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) return ret_val; dev_name = nla_data(info->attrs[NLBL_UNLABEL_A_IFACE]); ret_val = security_secctx_to_secid( nla_data(info->attrs[NLBL_UNLABEL_A_SECCTX]), nla_len(info->attrs[NLBL_UNLABEL_A_SECCTX]), &secid); if (ret_val != 0) return ret_val; return netlbl_unlhsh_add(&init_net, dev_name, addr, mask, addr_len, secid, &audit_info); } /** * netlbl_unlabel_staticadddef - Handle a STATICADDDEF message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated STATICADDDEF message and add a new default * unlabeled connection entry. Returns zero on success, negative values on * failure. * */ static int netlbl_unlabel_staticadddef(struct sk_buff *skb, struct genl_info *info) { int ret_val; void *addr; void *mask; u32 addr_len; u32 secid; struct netlbl_audit audit_info; /* Don't allow users to add both IPv4 and IPv6 addresses for a * single entry. However, allow users to create two entries, one each * for IPv4 and IPv6, with the same LSM security context which should * achieve the same result. */ if (!info->attrs[NLBL_UNLABEL_A_SECCTX] || !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) return ret_val; ret_val = security_secctx_to_secid( nla_data(info->attrs[NLBL_UNLABEL_A_SECCTX]), nla_len(info->attrs[NLBL_UNLABEL_A_SECCTX]), &secid); if (ret_val != 0) return ret_val; return netlbl_unlhsh_add(&init_net, NULL, addr, mask, addr_len, secid, &audit_info); } /** * netlbl_unlabel_staticremove - Handle a STATICREMOVE message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated STATICREMOVE message and remove the specified * unlabeled connection entry. Returns zero on success, negative values on * failure. * */ static int netlbl_unlabel_staticremove(struct sk_buff *skb, struct genl_info *info) { int ret_val; char *dev_name; void *addr; void *mask; u32 addr_len; struct netlbl_audit audit_info; /* See the note in netlbl_unlabel_staticadd() about not allowing both * IPv4 and IPv6 in the same entry. */ if (!info->attrs[NLBL_UNLABEL_A_IFACE] || !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) return ret_val; dev_name = nla_data(info->attrs[NLBL_UNLABEL_A_IFACE]); return netlbl_unlhsh_remove(&init_net, dev_name, addr, mask, addr_len, &audit_info); } /** * netlbl_unlabel_staticremovedef - Handle a STATICREMOVEDEF message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated STATICREMOVEDEF message and remove the default * unlabeled connection entry. Returns zero on success, negative values on * failure. * */ static int netlbl_unlabel_staticremovedef(struct sk_buff *skb, struct genl_info *info) { int ret_val; void *addr; void *mask; u32 addr_len; struct netlbl_audit audit_info; /* See the note in netlbl_unlabel_staticadd() about not allowing both * IPv4 and IPv6 in the same entry. */ if (!((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) return ret_val; return netlbl_unlhsh_remove(&init_net, NULL, addr, mask, addr_len, &audit_info); } /** * netlbl_unlabel_staticlist_gen - Generate messages for STATICLIST[DEF] * @cmd: command/message * @iface: the interface entry * @addr4: the IPv4 address entry * @addr6: the IPv6 address entry * @arg: the netlbl_unlhsh_walk_arg structure * * Description: * This function is designed to be used to generate a response for a * STATICLIST or STATICLISTDEF message. When called either @addr4 or @addr6 * can be specified, not both, the other unspecified entry should be set to * NULL by the caller. Returns the size of the message on success, negative * values on failure. * */ static int netlbl_unlabel_staticlist_gen(u32 cmd, const struct netlbl_unlhsh_iface *iface, const struct netlbl_unlhsh_addr4 *addr4, const struct netlbl_unlhsh_addr6 *addr6, void *arg) { int ret_val = -ENOMEM; struct netlbl_unlhsh_walk_arg *cb_arg = arg; struct net_device *dev; struct lsm_context ctx; void *data; u32 secid; data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_unlabel_gnl_family, NLM_F_MULTI, cmd); if (data == NULL) goto list_cb_failure; if (iface->ifindex > 0) { dev = dev_get_by_index(&init_net, iface->ifindex); if (!dev) { ret_val = -ENODEV; goto list_cb_failure; } ret_val = nla_put_string(cb_arg->skb, NLBL_UNLABEL_A_IFACE, dev->name); dev_put(dev); if (ret_val != 0) goto list_cb_failure; } if (addr4) { struct in_addr addr_struct; addr_struct.s_addr = addr4->list.addr; ret_val = nla_put_in_addr(cb_arg->skb, NLBL_UNLABEL_A_IPV4ADDR, addr_struct.s_addr); if (ret_val != 0) goto list_cb_failure; addr_struct.s_addr = addr4->list.mask; ret_val = nla_put_in_addr(cb_arg->skb, NLBL_UNLABEL_A_IPV4MASK, addr_struct.s_addr); if (ret_val != 0) goto list_cb_failure; secid = addr4->secid; } else { ret_val = nla_put_in6_addr(cb_arg->skb, NLBL_UNLABEL_A_IPV6ADDR, &addr6->list.addr); if (ret_val != 0) goto list_cb_failure; ret_val = nla_put_in6_addr(cb_arg->skb, NLBL_UNLABEL_A_IPV6MASK, &addr6->list.mask); if (ret_val != 0) goto list_cb_failure; secid = addr6->secid; } ret_val = security_secid_to_secctx(secid, &ctx); if (ret_val < 0) goto list_cb_failure; ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_SECCTX, ctx.len, ctx.context); security_release_secctx(&ctx); if (ret_val != 0) goto list_cb_failure; cb_arg->seq++; genlmsg_end(cb_arg->skb, data); return 0; list_cb_failure: genlmsg_cancel(cb_arg->skb, data); return ret_val; } /** * netlbl_unlabel_staticlist - Handle a STATICLIST message * @skb: the NETLINK buffer * @cb: the NETLINK callback * * Description: * Process a user generated STATICLIST message and dump the unlabeled * connection hash table in a form suitable for use in a kernel generated * STATICLIST message. Returns the length of @skb. * */ static int netlbl_unlabel_staticlist(struct sk_buff *skb, struct netlink_callback *cb) { struct netlbl_unlhsh_walk_arg cb_arg; u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; u32 skip_addr4 = cb->args[2]; u32 iter_bkt, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; struct list_head *iter_list; struct netlbl_af4list *addr4; #if IS_ENABLED(CONFIG_IPV6) u32 skip_addr6 = cb->args[3]; struct netlbl_af6list *addr6; #endif cb_arg.nl_cb = cb; cb_arg.skb = skb; cb_arg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); for (iter_bkt = skip_bkt; iter_bkt < rcu_dereference(netlbl_unlhsh)->size; iter_bkt++) { iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt]; list_for_each_entry_rcu(iface, iter_list, list) { if (!iface->valid || iter_chain++ < skip_chain) continue; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { if (iter_addr4++ < skip_addr4) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, iface, netlbl_unlhsh_addr4_entry(addr4), NULL, &cb_arg) < 0) { iter_addr4--; iter_chain--; goto unlabel_staticlist_return; } } iter_addr4 = 0; skip_addr4 = 0; #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { if (iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, iface, NULL, netlbl_unlhsh_addr6_entry(addr6), &cb_arg) < 0) { iter_addr6--; iter_chain--; goto unlabel_staticlist_return; } } iter_addr6 = 0; skip_addr6 = 0; #endif /* IPv6 */ } iter_chain = 0; skip_chain = 0; } unlabel_staticlist_return: rcu_read_unlock(); cb->args[0] = iter_bkt; cb->args[1] = iter_chain; cb->args[2] = iter_addr4; cb->args[3] = iter_addr6; return skb->len; } /** * netlbl_unlabel_staticlistdef - Handle a STATICLISTDEF message * @skb: the NETLINK buffer * @cb: the NETLINK callback * * Description: * Process a user generated STATICLISTDEF message and dump the default * unlabeled connection entry in a form suitable for use in a kernel generated * STATICLISTDEF message. Returns the length of @skb. * */ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, struct netlink_callback *cb) { struct netlbl_unlhsh_walk_arg cb_arg; struct netlbl_unlhsh_iface *iface; u32 iter_addr4 = 0, iter_addr6 = 0; struct netlbl_af4list *addr4; #if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *addr6; #endif cb_arg.nl_cb = cb; cb_arg.skb = skb; cb_arg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); iface = rcu_dereference(netlbl_unlhsh_def); if (iface == NULL || !iface->valid) goto unlabel_staticlistdef_return; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { if (iter_addr4++ < cb->args[0]) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, netlbl_unlhsh_addr4_entry(addr4), NULL, &cb_arg) < 0) { iter_addr4--; goto unlabel_staticlistdef_return; } } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { if (iter_addr6++ < cb->args[1]) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, NULL, netlbl_unlhsh_addr6_entry(addr6), &cb_arg) < 0) { iter_addr6--; goto unlabel_staticlistdef_return; } } #endif /* IPv6 */ unlabel_staticlistdef_return: rcu_read_unlock(); cb->args[0] = iter_addr4; cb->args[1] = iter_addr6; return skb->len; } /* * NetLabel Generic NETLINK Command Definitions */ static const struct genl_small_ops netlbl_unlabel_genl_ops[] = { { .cmd = NLBL_UNLABEL_C_STATICADD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticadd, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticremove, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLIST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_unlabel_staticlist, }, { .cmd = NLBL_UNLABEL_C_STATICADDDEF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticadddef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVEDEF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticremovedef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLISTDEF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_unlabel_staticlistdef, }, { .cmd = NLBL_UNLABEL_C_ACCEPT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_accept, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_LIST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = netlbl_unlabel_list, .dumpit = NULL, }, }; static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = { .hdrsize = 0, .name = NETLBL_NLTYPE_UNLABELED_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_UNLABEL_A_MAX, .policy = netlbl_unlabel_genl_policy, .module = THIS_MODULE, .small_ops = netlbl_unlabel_genl_ops, .n_small_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops), .resv_start_op = NLBL_UNLABEL_C_STATICLISTDEF + 1, }; /* * NetLabel Generic NETLINK Protocol Functions */ /** * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component * * Description: * Register the unlabeled packet NetLabel component with the Generic NETLINK * mechanism. Returns zero on success, negative values on failure. * */ int __init netlbl_unlabel_genl_init(void) { return genl_register_family(&netlbl_unlabel_gnl_family); } /* * NetLabel KAPI Hooks */ static struct notifier_block netlbl_unlhsh_netdev_notifier = { .notifier_call = netlbl_unlhsh_netdev_handler, }; /** * netlbl_unlabel_init - Initialize the unlabeled connection hash table * @size: the number of bits to use for the hash buckets * * Description: * Initializes the unlabeled connection hash table and registers a network * device notification handler. This function should only be called by the * NetLabel subsystem itself during initialization. Returns zero on success, * non-zero values on error. * */ int __init netlbl_unlabel_init(u32 size) { u32 iter; struct netlbl_unlhsh_tbl *hsh_tbl; if (size == 0) return -EINVAL; hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL); if (hsh_tbl == NULL) return -ENOMEM; hsh_tbl->size = 1 << size; hsh_tbl->tbl = kcalloc(hsh_tbl->size, sizeof(struct list_head), GFP_KERNEL); if (hsh_tbl->tbl == NULL) { kfree(hsh_tbl); return -ENOMEM; } for (iter = 0; iter < hsh_tbl->size; iter++) INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); spin_lock(&netlbl_unlhsh_lock); rcu_assign_pointer(netlbl_unlhsh, hsh_tbl); spin_unlock(&netlbl_unlhsh_lock); register_netdevice_notifier(&netlbl_unlhsh_netdev_notifier); return 0; } /** * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet * @skb: the packet * @family: protocol family * @secattr: the security attributes * * Description: * Determine the security attributes, if any, for an unlabled packet and return * them in @secattr. Returns zero on success and negative values on failure. * */ int netlbl_unlabel_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr) { struct netlbl_unlhsh_iface *iface; rcu_read_lock(); iface = netlbl_unlhsh_search_iface(skb->skb_iif); if (iface == NULL) iface = rcu_dereference(netlbl_unlhsh_def); if (iface == NULL || !iface->valid) goto unlabel_getattr_nolabel; #if IS_ENABLED(CONFIG_IPV6) /* When resolving a fallback label, check the sk_buff version as * it is possible (e.g. SCTP) to have family = PF_INET6 while * receiving ip_hdr(skb)->version = 4. */ if (family == PF_INET6 && ip_hdr(skb)->version == 4) family = PF_INET; #endif /* IPv6 */ switch (family) { case PF_INET: { struct iphdr *hdr4; struct netlbl_af4list *addr4; hdr4 = ip_hdr(skb); addr4 = netlbl_af4list_search(hdr4->saddr, &iface->addr4_list); if (addr4 == NULL) goto unlabel_getattr_nolabel; secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid; break; } #if IS_ENABLED(CONFIG_IPV6) case PF_INET6: { struct ipv6hdr *hdr6; struct netlbl_af6list *addr6; hdr6 = ipv6_hdr(skb); addr6 = netlbl_af6list_search(&hdr6->saddr, &iface->addr6_list); if (addr6 == NULL) goto unlabel_getattr_nolabel; secattr->attr.secid = netlbl_unlhsh_addr6_entry(addr6)->secid; break; } #endif /* IPv6 */ default: goto unlabel_getattr_nolabel; } rcu_read_unlock(); secattr->flags |= NETLBL_SECATTR_SECID; secattr->type = NETLBL_NLTYPE_UNLABELED; return 0; unlabel_getattr_nolabel: rcu_read_unlock(); if (netlabel_unlabel_acceptflg == 0) return -ENOMSG; secattr->type = NETLBL_NLTYPE_UNLABELED; return 0; } /** * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets * * Description: * Set the default NetLabel configuration to allow incoming unlabeled packets * and to send unlabeled network traffic by default. * */ int __init netlbl_unlabel_defconf(void) { int ret_val; struct netlbl_dom_map *entry; struct netlbl_audit audit_info; /* Only the kernel is allowed to call this function and the only time * it is called is at bootup before the audit subsystem is reporting * messages so don't worry to much about these values. */ security_current_getlsmprop_subj(&audit_info.prop); audit_info.loginuid = GLOBAL_ROOT_UID; audit_info.sessionid = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return -ENOMEM; entry->family = AF_UNSPEC; entry->def.type = NETLBL_NLTYPE_UNLABELED; ret_val = netlbl_domhsh_add_default(entry, &audit_info); if (ret_val != 0) return ret_val; netlbl_unlabel_acceptflg_set(1, &audit_info); return 0; }
21 21 113 1 1 1 4 4 4 4 4 4 4 4 4 1 1 1 1 1 12 1 1 11 11 11 1 11 11 11 1 1 4 2 4 4 4 4 4 4 4 4 5 1 5 4 4 4 4 7 7 7 7 5 5 5 3 4 4 3 1 1 1 2 2 5 63 1 61 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 // SPDX-License-Identifier: GPL-2.0+ /* * NILFS segment usage file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * * Written by Koji Sato. * Revised by Ryusuke Konishi. */ #include <linux/kernel.h> #include <linux/fs.h> #include <linux/string.h> #include <linux/buffer_head.h> #include <linux/errno.h> #include "mdt.h" #include "sufile.h" #include <trace/events/nilfs2.h> /** * struct nilfs_sufile_info - on-memory private data of sufile * @mi: on-memory private data of metadata file * @ncleansegs: number of clean segments * @allocmin: lower limit of allocatable segment range * @allocmax: upper limit of allocatable segment range */ struct nilfs_sufile_info { struct nilfs_mdt_info mi; unsigned long ncleansegs;/* number of clean segments */ __u64 allocmin; /* lower limit of allocatable segment range */ __u64 allocmax; /* upper limit of allocatable segment range */ }; static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) { return (struct nilfs_sufile_info *)NILFS_MDT(sufile); } static inline unsigned long nilfs_sufile_segment_usages_per_block(const struct inode *sufile) { return NILFS_MDT(sufile)->mi_entries_per_block; } static unsigned long nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum) { __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; t = div64_ul(t, nilfs_sufile_segment_usages_per_block(sufile)); return (unsigned long)t; } static unsigned long nilfs_sufile_get_offset(const struct inode *sufile, __u64 segnum) { __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; return do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); } static unsigned long nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr, __u64 max) { return min_t(unsigned long, nilfs_sufile_segment_usages_per_block(sufile) - nilfs_sufile_get_offset(sufile, curr), max - curr + 1); } /** * nilfs_sufile_segment_usage_offset - calculate the byte offset of a segment * usage entry in the folio containing it * @sufile: segment usage file inode * @segnum: number of segment usage * @bh: buffer head of block containing segment usage indexed by @segnum * * Return: Byte offset in the folio of the segment usage entry. */ static size_t nilfs_sufile_segment_usage_offset(const struct inode *sufile, __u64 segnum, struct buffer_head *bh) { return offset_in_folio(bh->b_folio, bh->b_data) + nilfs_sufile_get_offset(sufile, segnum) * NILFS_MDT(sufile)->mi_entry_size; } static int nilfs_sufile_get_header_block(struct inode *sufile, struct buffer_head **bhp) { int err = nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); if (unlikely(err == -ENOENT)) { nilfs_error(sufile->i_sb, "missing header block in segment usage metadata"); err = -EIO; } return err; } static inline int nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, int create, struct buffer_head **bhp) { return nilfs_mdt_get_block(sufile, nilfs_sufile_get_blkoff(sufile, segnum), create, NULL, bhp); } static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile, __u64 segnum) { return nilfs_mdt_delete_block(sufile, nilfs_sufile_get_blkoff(sufile, segnum)); } static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, u64 ncleanadd, u64 ndirtyadd) { struct nilfs_sufile_header *header; header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->sh_ncleansegs, ncleanadd); le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); kunmap_local(header); mark_buffer_dirty(header_bh); } /** * nilfs_sufile_get_ncleansegs - return the number of clean segments * @sufile: inode of segment usage file * * Return: Number of clean segments. */ unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile) { return NILFS_SUI(sufile)->ncleansegs; } /** * nilfs_sufile_updatev - modify multiple segment usages at a time * @sufile: inode of segment usage file * @segnumv: array of segment numbers * @nsegs: size of @segnumv array * @create: creation flag * @ndone: place to store number of modified segments on @segnumv * @dofunc: primitive operation for the update * * Description: nilfs_sufile_updatev() repeatedly calls @dofunc * against the given array of segments. The @dofunc is called with * buffers of a header block and the sufile block in which the target * segment usage entry is contained. If @ndone is given, the number * of successfully modified segments from the head is stored in the * place @ndone points to. * * Return: 0 on success, or one of the following negative error codes on * failure: * * %-EINVAL - Invalid segment usage number * * %-EIO - I/O error (including metadata corruption). * * %-ENOENT - Given segment usage is in hole block (may be returned if * @create is zero) * * %-ENOMEM - Insufficient memory available. */ int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs, int create, size_t *ndone, void (*dofunc)(struct inode *, __u64, struct buffer_head *, struct buffer_head *)) { struct buffer_head *header_bh, *bh; unsigned long blkoff, prev_blkoff; __u64 *seg; size_t nerr = 0, n = 0; int ret = 0; if (unlikely(nsegs == 0)) goto out; down_write(&NILFS_MDT(sufile)->mi_sem); for (seg = segnumv; seg < segnumv + nsegs; seg++) { if (unlikely(*seg >= nilfs_sufile_get_nsegments(sufile))) { nilfs_warn(sufile->i_sb, "%s: invalid segment number: %llu", __func__, (unsigned long long)*seg); nerr++; } } if (nerr > 0) { ret = -EINVAL; goto out_sem; } ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; seg = segnumv; blkoff = nilfs_sufile_get_blkoff(sufile, *seg); ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh); if (ret < 0) goto out_header; for (;;) { dofunc(sufile, *seg, header_bh, bh); if (++seg >= segnumv + nsegs) break; prev_blkoff = blkoff; blkoff = nilfs_sufile_get_blkoff(sufile, *seg); if (blkoff == prev_blkoff) continue; /* get different block */ brelse(bh); ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh); if (unlikely(ret < 0)) goto out_header; } brelse(bh); out_header: n = seg - segnumv; brelse(header_bh); out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); out: if (ndone) *ndone = n; return ret; } int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, void (*dofunc)(struct inode *, __u64, struct buffer_head *, struct buffer_head *)) { struct buffer_head *header_bh, *bh; int ret; if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { nilfs_warn(sufile->i_sb, "%s: invalid segment number: %llu", __func__, (unsigned long long)segnum); return -EINVAL; } down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, create, &bh); if (!ret) { dofunc(sufile, segnum, header_bh, bh); brelse(bh); } brelse(header_bh); out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_set_alloc_range - limit range of segment to be allocated * @sufile: inode of segment usage file * @start: minimum segment number of allocatable region (inclusive) * @end: maximum segment number of allocatable region (inclusive) * * Return: 0 on success, or %-ERANGE if segment range is invalid. */ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end) { struct nilfs_sufile_info *sui = NILFS_SUI(sufile); __u64 nsegs; int ret = -ERANGE; down_write(&NILFS_MDT(sufile)->mi_sem); nsegs = nilfs_sufile_get_nsegments(sufile); if (start <= end && end < nsegs) { sui->allocmin = start; sui->allocmax = end; ret = 0; } up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_alloc - allocate a segment * @sufile: inode of segment usage file * @segnump: pointer to segment number * * Description: nilfs_sufile_alloc() allocates a clean segment, and stores * its segment number in the place pointed to by @segnump. * * Return: 0 on success, or one of the following negative error codes on * failure: * * %-EIO - I/O error (including metadata corruption). * * %-ENOMEM - Insufficient memory available. * * %-ENOSPC - No clean segment left. */ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) { struct buffer_head *header_bh, *su_bh; struct nilfs_sufile_header *header; struct nilfs_segment_usage *su; struct nilfs_sufile_info *sui = NILFS_SUI(sufile); size_t susz = NILFS_MDT(sufile)->mi_entry_size; __u64 segnum, maxsegnum, last_alloc; size_t offset; void *kaddr; unsigned long nsegments, nsus, cnt; int ret, j; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; header = kmap_local_folio(header_bh->b_folio, 0); last_alloc = le64_to_cpu(header->sh_last_alloc); kunmap_local(header); nsegments = nilfs_sufile_get_nsegments(sufile); maxsegnum = sui->allocmax; segnum = last_alloc + 1; if (segnum < sui->allocmin || segnum > sui->allocmax) segnum = sui->allocmin; for (cnt = 0; cnt < nsegments; cnt += nsus) { if (segnum > maxsegnum) { if (cnt < sui->allocmax - sui->allocmin + 1) { /* * wrap around in the limited region. * if allocation started from * sui->allocmin, this never happens. */ segnum = sui->allocmin; maxsegnum = last_alloc; } else if (segnum > sui->allocmin && sui->allocmax + 1 < nsegments) { segnum = sui->allocmax + 1; maxsegnum = nsegments - 1; } else if (sui->allocmin > 0) { segnum = 0; maxsegnum = sui->allocmin - 1; } else { break; /* never happens */ } } trace_nilfs2_segment_usage_check(sufile, segnum, cnt); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &su_bh); if (ret < 0) goto out_header; offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); su = kaddr = kmap_local_folio(su_bh->b_folio, offset); nsus = nilfs_sufile_segment_usages_in_block( sufile, segnum, maxsegnum); for (j = 0; j < nsus; j++, su = (void *)su + susz, segnum++) { if (!nilfs_segment_usage_clean(su)) continue; /* found a clean segment */ nilfs_segment_usage_set_dirty(su); kunmap_local(kaddr); header = kmap_local_folio(header_bh->b_folio, 0); le64_add_cpu(&header->sh_ncleansegs, -1); le64_add_cpu(&header->sh_ndirtysegs, 1); header->sh_last_alloc = cpu_to_le64(segnum); kunmap_local(header); sui->ncleansegs--; mark_buffer_dirty(header_bh); mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); brelse(su_bh); *segnump = segnum; trace_nilfs2_segment_usage_allocated(sufile, segnum); goto out_header; } kunmap_local(kaddr); brelse(su_bh); } /* no segments left */ ret = -ENOSPC; out_header: brelse(header_bh); out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; size_t offset; offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); su = kmap_local_folio(su_bh->b_folio, offset); if (unlikely(!nilfs_segment_usage_clean(su))) { nilfs_warn(sufile->i_sb, "%s: segment %llu must be clean", __func__, (unsigned long long)segnum); kunmap_local(su); return; } nilfs_segment_usage_set_dirty(su); kunmap_local(su); nilfs_sufile_mod_counter(header_bh, -1, 1); NILFS_SUI(sufile)->ncleansegs--; mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; size_t offset; int clean, dirty; offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); su = kmap_local_folio(su_bh->b_folio, offset); if (su->su_flags == cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)) && su->su_nblocks == cpu_to_le32(0)) { kunmap_local(su); return; } clean = nilfs_segment_usage_clean(su); dirty = nilfs_segment_usage_dirty(su); /* make the segment garbage */ su->su_lastmod = cpu_to_le64(0); su->su_nblocks = cpu_to_le32(0); su->su_flags = cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)); kunmap_local(su); nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); NILFS_SUI(sufile)->ncleansegs -= clean; mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; size_t offset; int sudirty; offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); su = kmap_local_folio(su_bh->b_folio, offset); if (nilfs_segment_usage_clean(su)) { nilfs_warn(sufile->i_sb, "%s: segment %llu is already clean", __func__, (unsigned long long)segnum); kunmap_local(su); return; } if (unlikely(nilfs_segment_usage_error(su))) nilfs_warn(sufile->i_sb, "free segment %llu marked in error", (unsigned long long)segnum); sudirty = nilfs_segment_usage_dirty(su); if (unlikely(!sudirty)) nilfs_warn(sufile->i_sb, "free unallocated segment %llu", (unsigned long long)segnum); nilfs_segment_usage_set_clean(su); kunmap_local(su); mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); NILFS_SUI(sufile)->ncleansegs++; nilfs_mdt_mark_dirty(sufile); trace_nilfs2_segment_usage_freed(sufile, segnum); } /** * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty * @sufile: inode of segment usage file * @segnum: segment number * * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; size_t offset; struct nilfs_segment_usage *su; int ret; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (unlikely(ret)) { if (ret == -ENOENT) { nilfs_error(sufile->i_sb, "segment usage for segment %llu is unreadable due to a hole block", (unsigned long long)segnum); ret = -EIO; } goto out_sem; } offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh); su = kmap_local_folio(bh->b_folio, offset); if (unlikely(nilfs_segment_usage_error(su))) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; kunmap_local(su); brelse(bh); if (nilfs_segment_is_active(nilfs, segnum)) { nilfs_error(sufile->i_sb, "active segment %llu is erroneous", (unsigned long long)segnum); } else { /* * Segments marked erroneous are never allocated by * nilfs_sufile_alloc(); only active segments, ie, * the segments indexed by ns_segnum or ns_nextnum, * can be erroneous here. */ WARN_ON_ONCE(1); } ret = -EIO; } else { nilfs_segment_usage_set_dirty(su); kunmap_local(su); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); brelse(bh); } out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_set_segment_usage - set usage of a segment * @sufile: inode of segment usage file * @segnum: segment number * @nblocks: number of live blocks in the segment * @modtime: modification time (option) * * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, unsigned long nblocks, time64_t modtime) { struct buffer_head *bh; struct nilfs_segment_usage *su; size_t offset; int ret; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (ret < 0) goto out_sem; offset = nilfs_sufile_segment_usage_offset(sufile, segnum, bh); su = kmap_local_folio(bh->b_folio, offset); if (modtime) { /* * Check segusage error and set su_lastmod only when updating * this entry with a valid timestamp, not for cancellation. */ WARN_ON_ONCE(nilfs_segment_usage_error(su)); su->su_lastmod = cpu_to_le64(modtime); } su->su_nblocks = cpu_to_le32(nblocks); kunmap_local(su); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); brelse(bh); out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_get_stat - get segment usage statistics * @sufile: inode of segment usage file * @sustat: pointer to a structure of segment usage statistics * * Description: nilfs_sufile_get_stat() retrieves segment usage statistics * and stores them in the location pointed to by @sustat. * * Return: 0 on success, or one of the following negative error codes on * failure: * * %-EIO - I/O error (including metadata corruption). * * %-ENOMEM - Insufficient memory available. */ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; int ret; down_read(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; header = kmap_local_folio(header_bh->b_folio, 0); sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); sustat->ss_ctime = nilfs->ns_ctime; sustat->ss_nongc_ctime = nilfs->ns_nongc_ctime; spin_lock(&nilfs->ns_last_segment_lock); sustat->ss_prot_seq = nilfs->ns_prot_seq; spin_unlock(&nilfs->ns_last_segment_lock); kunmap_local(header); brelse(header_bh); out_sem: up_read(&NILFS_MDT(sufile)->mi_sem); return ret; } void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; size_t offset; int suclean; offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); su = kmap_local_folio(su_bh->b_folio, offset); if (nilfs_segment_usage_error(su)) { kunmap_local(su); return; } suclean = nilfs_segment_usage_clean(su); nilfs_segment_usage_set_error(su); kunmap_local(su); if (suclean) { nilfs_sufile_mod_counter(header_bh, -1, 0); NILFS_SUI(sufile)->ncleansegs--; } mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } /** * nilfs_sufile_truncate_range - truncate range of segment array * @sufile: inode of segment usage file * @start: start segment number (inclusive) * @end: end segment number (inclusive) * * Return: 0 on success, or one of the following negative error codes on * failure: * * %-EBUSY - Dirty or active segments are present in the range. * * %-EINVAL - Invalid number of segments specified. * * %-EIO - I/O error (including metadata corruption). * * %-ENOMEM - Insufficient memory available. */ static int nilfs_sufile_truncate_range(struct inode *sufile, __u64 start, __u64 end) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; struct buffer_head *header_bh; struct buffer_head *su_bh; struct nilfs_segment_usage *su, *su2; size_t susz = NILFS_MDT(sufile)->mi_entry_size; unsigned long segusages_per_block; unsigned long nsegs, ncleaned; __u64 segnum; size_t offset; ssize_t n, nc; int ret; int j; nsegs = nilfs_sufile_get_nsegments(sufile); ret = -EINVAL; if (start > end || start >= nsegs) goto out; ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out; segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile); ncleaned = 0; for (segnum = start; segnum <= end; segnum += n) { n = min_t(unsigned long, segusages_per_block - nilfs_sufile_get_offset(sufile, segnum), end - segnum + 1); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); if (ret < 0) { if (ret != -ENOENT) goto out_header; /* hole */ continue; } offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); su = kmap_local_folio(su_bh->b_folio, offset); su2 = su; for (j = 0; j < n; j++, su = (void *)su + susz) { if ((le32_to_cpu(su->su_flags) & ~BIT(NILFS_SEGMENT_USAGE_ERROR)) || nilfs_segment_is_active(nilfs, segnum + j)) { ret = -EBUSY; kunmap_local(su2); brelse(su_bh); goto out_header; } } nc = 0; for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) { if (nilfs_segment_usage_error(su)) { nilfs_segment_usage_set_clean(su); nc++; } } kunmap_local(su2); if (nc > 0) { mark_buffer_dirty(su_bh); ncleaned += nc; } brelse(su_bh); if (n == segusages_per_block) { /* make hole */ nilfs_sufile_delete_segment_usage_block(sufile, segnum); } } ret = 0; out_header: if (ncleaned > 0) { NILFS_SUI(sufile)->ncleansegs += ncleaned; nilfs_sufile_mod_counter(header_bh, ncleaned, 0); nilfs_mdt_mark_dirty(sufile); } brelse(header_bh); out: return ret; } /** * nilfs_sufile_resize - resize segment array * @sufile: inode of segment usage file * @newnsegs: new number of segments * * Return: 0 on success, or one of the following negative error codes on * failure: * * %-EBUSY - Dirty or active segments exist in the region to be truncated. * * %-EIO - I/O error (including metadata corruption). * * %-ENOMEM - Insufficient memory available. * * %-ENOSPC - Enough free space is not left for shrinking. */ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; struct buffer_head *header_bh; struct nilfs_sufile_header *header; struct nilfs_sufile_info *sui = NILFS_SUI(sufile); unsigned long nsegs, nrsvsegs; int ret = 0; down_write(&NILFS_MDT(sufile)->mi_sem); nsegs = nilfs_sufile_get_nsegments(sufile); if (nsegs == newnsegs) goto out; ret = -ENOSPC; nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs); if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs) goto out; ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out; if (newnsegs > nsegs) { sui->ncleansegs += newnsegs - nsegs; } else /* newnsegs < nsegs */ { ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1); if (ret < 0) goto out_header; sui->ncleansegs -= nsegs - newnsegs; /* * If the sufile is successfully truncated, immediately adjust * the segment allocation space while locking the semaphore * "mi_sem" so that nilfs_sufile_alloc() never allocates * segments in the truncated space. */ sui->allocmax = newnsegs - 1; sui->allocmin = 0; } header = kmap_local_folio(header_bh->b_folio, 0); header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); kunmap_local(header); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(sufile); nilfs_set_nsegments(nilfs, newnsegs); out_header: brelse(header_bh); out: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_get_suinfo - get segment usage information * @sufile: inode of segment usage file * @segnum: segment number to start looking * @buf: array of suinfo * @sisz: byte size of suinfo * @nsi: size of suinfo array * * Return: Count of segment usage info items stored in the output buffer on * success, or one of the following negative error codes on failure: * * %-EIO - I/O error (including metadata corruption). * * %-ENOMEM - Insufficient memory available. */ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, unsigned int sisz, size_t nsi) { struct buffer_head *su_bh; struct nilfs_segment_usage *su; struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; size_t offset; void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; int ret, i, j; down_read(&NILFS_MDT(sufile)->mi_sem); segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile); nsegs = min_t(unsigned long, nilfs_sufile_get_nsegments(sufile) - segnum, nsi); for (i = 0; i < nsegs; i += n, segnum += n) { n = min_t(unsigned long, segusages_per_block - nilfs_sufile_get_offset(sufile, segnum), nsegs - i); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); if (ret < 0) { if (ret != -ENOENT) goto out; /* hole */ memset(si, 0, sisz * n); si = (void *)si + sisz * n; continue; } offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); su = kaddr = kmap_local_folio(su_bh->b_folio, offset); for (j = 0; j < n; j++, su = (void *)su + susz, si = (void *)si + sisz) { si->sui_lastmod = le64_to_cpu(su->su_lastmod); si->sui_nblocks = le32_to_cpu(su->su_nblocks); si->sui_flags = le32_to_cpu(su->su_flags) & ~BIT(NILFS_SEGMENT_USAGE_ACTIVE); if (nilfs_segment_is_active(nilfs, segnum + j)) si->sui_flags |= BIT(NILFS_SEGMENT_USAGE_ACTIVE); } kunmap_local(kaddr); brelse(su_bh); } ret = nsegs; out: up_read(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_set_suinfo - sets segment usage info * @sufile: inode of segment usage file * @buf: array of suinfo_update * @supsz: byte size of suinfo_update * @nsup: size of suinfo_update array * * Description: Takes an array of nilfs_suinfo_update structs and updates * segment usage accordingly. Only the fields indicated by the sup_flags * are updated. * * Return: 0 on success, or one of the following negative error codes on * failure: * * %-EINVAL - Invalid values in input (segment number, flags or nblocks). * * %-EIO - I/O error (including metadata corruption). * * %-ENOMEM - Insufficient memory available. */ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, unsigned int supsz, size_t nsup) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; struct buffer_head *header_bh, *bh; struct nilfs_suinfo_update *sup, *supend = buf + supsz * nsup; struct nilfs_segment_usage *su; size_t offset; unsigned long blkoff, prev_blkoff; int cleansi, cleansu, dirtysi, dirtysu; long ncleaned = 0, ndirtied = 0; int ret = 0; if (unlikely(nsup == 0)) return ret; for (sup = buf; sup < supend; sup = (void *)sup + supsz) { if (sup->sup_segnum >= nilfs->ns_nsegments || (sup->sup_flags & (~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS)) || (nilfs_suinfo_update_nblocks(sup) && sup->sup_sui.sui_nblocks > nilfs->ns_blocks_per_segment)) return -EINVAL; } down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; sup = buf; blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); if (ret < 0) goto out_header; for (;;) { offset = nilfs_sufile_segment_usage_offset( sufile, sup->sup_segnum, bh); su = kmap_local_folio(bh->b_folio, offset); if (nilfs_suinfo_update_lastmod(sup)) su->su_lastmod = cpu_to_le64(sup->sup_sui.sui_lastmod); if (nilfs_suinfo_update_nblocks(sup)) su->su_nblocks = cpu_to_le32(sup->sup_sui.sui_nblocks); if (nilfs_suinfo_update_flags(sup)) { /* * Active flag is a virtual flag projected by running * nilfs kernel code - drop it not to write it to * disk. */ sup->sup_sui.sui_flags &= ~BIT(NILFS_SEGMENT_USAGE_ACTIVE); cleansi = nilfs_suinfo_clean(&sup->sup_sui); cleansu = nilfs_segment_usage_clean(su); dirtysi = nilfs_suinfo_dirty(&sup->sup_sui); dirtysu = nilfs_segment_usage_dirty(su); if (cleansi && !cleansu) ++ncleaned; else if (!cleansi && cleansu) --ncleaned; if (dirtysi && !dirtysu) ++ndirtied; else if (!dirtysi && dirtysu) --ndirtied; su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags); } kunmap_local(su); sup = (void *)sup + supsz; if (sup >= supend) break; prev_blkoff = blkoff; blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); if (blkoff == prev_blkoff) continue; /* get different block */ mark_buffer_dirty(bh); put_bh(bh); ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); if (unlikely(ret < 0)) goto out_mark; } mark_buffer_dirty(bh); put_bh(bh); out_mark: if (ncleaned || ndirtied) { nilfs_sufile_mod_counter(header_bh, (u64)ncleaned, (u64)ndirtied); NILFS_SUI(sufile)->ncleansegs += ncleaned; } nilfs_mdt_mark_dirty(sufile); out_header: put_bh(header_bh); out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_trim_fs() - trim ioctl handle function * @sufile: inode of segment usage file * @range: fstrim_range structure * * start: First Byte to trim * len: number of Bytes to trim from start * minlen: minimum extent length in Bytes * * Decription: nilfs_sufile_trim_fs goes through all segments containing bytes * from start to start+len. start is rounded up to the next block boundary * and start+len is rounded down. For each clean segment blkdev_issue_discard * function is invoked. * * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; struct buffer_head *su_bh; struct nilfs_segment_usage *su; size_t offset; void *kaddr; size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size; sector_t seg_start, seg_end, start_block, end_block; sector_t start = 0, nblocks = 0; u64 segnum, segnum_end, minlen, len, max_blocks, ndiscarded = 0; int ret = 0; unsigned int sects_per_block; sects_per_block = (1 << nilfs->ns_blocksize_bits) / bdev_logical_block_size(nilfs->ns_bdev); len = range->len >> nilfs->ns_blocksize_bits; minlen = range->minlen >> nilfs->ns_blocksize_bits; max_blocks = ((u64)nilfs->ns_nsegments * nilfs->ns_blocks_per_segment); if (!len || range->start >= max_blocks << nilfs->ns_blocksize_bits) return -EINVAL; start_block = (range->start + nilfs->ns_blocksize - 1) >> nilfs->ns_blocksize_bits; /* * range->len can be very large (actually, it is set to * ULLONG_MAX by default) - truncate upper end of the range * carefully so as not to overflow. */ if (max_blocks - start_block < len) end_block = max_blocks - 1; else end_block = start_block + len - 1; segnum = nilfs_get_segnum_of_block(nilfs, start_block); segnum_end = nilfs_get_segnum_of_block(nilfs, end_block); down_read(&NILFS_MDT(sufile)->mi_sem); while (segnum <= segnum_end) { n = nilfs_sufile_segment_usages_in_block(sufile, segnum, segnum_end); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); if (ret < 0) { if (ret != -ENOENT) goto out_sem; /* hole */ segnum += n; continue; } offset = nilfs_sufile_segment_usage_offset(sufile, segnum, su_bh); su = kaddr = kmap_local_folio(su_bh->b_folio, offset); for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) { if (!nilfs_segment_usage_clean(su)) continue; nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); if (!nblocks) { /* start new extent */ start = seg_start; nblocks = seg_end - seg_start + 1; continue; } if (start + nblocks == seg_start) { /* add to previous extent */ nblocks += seg_end - seg_start + 1; continue; } /* discard previous extent */ if (start < start_block) { nblocks -= start_block - start; start = start_block; } if (nblocks >= minlen) { kunmap_local(kaddr); ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, GFP_NOFS); if (ret < 0) { put_bh(su_bh); goto out_sem; } ndiscarded += nblocks; offset = nilfs_sufile_segment_usage_offset( sufile, segnum, su_bh); su = kaddr = kmap_local_folio(su_bh->b_folio, offset); } /* start new extent */ start = seg_start; nblocks = seg_end - seg_start + 1; } kunmap_local(kaddr); put_bh(su_bh); } if (nblocks) { /* discard last extent */ if (start < start_block) { nblocks -= start_block - start; start = start_block; } if (start + nblocks > end_block + 1) nblocks = end_block - start + 1; if (nblocks >= minlen) { ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, GFP_NOFS); if (!ret) ndiscarded += nblocks; } } out_sem: up_read(&NILFS_MDT(sufile)->mi_sem); range->len = ndiscarded << nilfs->ns_blocksize_bits; return ret; } /** * nilfs_sufile_read - read or get sufile inode * @sb: super block instance * @susize: size of a segment usage entry * @raw_inode: on-disk sufile inode * @inodep: buffer to store the inode * * Return: 0 on success, or a negative error code on failure. */ int nilfs_sufile_read(struct super_block *sb, size_t susize, struct nilfs_inode *raw_inode, struct inode **inodep) { struct inode *sufile; struct nilfs_sufile_info *sui; struct buffer_head *header_bh; struct nilfs_sufile_header *header; int err; if (susize > sb->s_blocksize) { nilfs_err(sb, "too large segment usage size: %zu bytes", susize); return -EINVAL; } else if (susize < NILFS_MIN_SEGMENT_USAGE_SIZE) { nilfs_err(sb, "too small segment usage size: %zu bytes", susize); return -EINVAL; } sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); if (unlikely(!sufile)) return -ENOMEM; if (!(sufile->i_state & I_NEW)) goto out; err = nilfs_mdt_init(sufile, NILFS_MDT_GFP, sizeof(*sui)); if (err) goto failed; nilfs_mdt_set_entry_size(sufile, susize, sizeof(struct nilfs_sufile_header)); err = nilfs_read_inode_common(sufile, raw_inode); if (err) goto failed; err = nilfs_mdt_get_block(sufile, 0, 0, NULL, &header_bh); if (unlikely(err)) { if (err == -ENOENT) { nilfs_err(sb, "missing header block in segment usage metadata"); err = -EINVAL; } goto failed; } sui = NILFS_SUI(sufile); header = kmap_local_folio(header_bh->b_folio, 0); sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); kunmap_local(header); brelse(header_bh); sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; sui->allocmin = 0; unlock_new_inode(sufile); out: *inodep = sufile; return 0; failed: iget_failed(sufile); return err; }
9 9 3 6 11 11 1 10 10 1 1 1 4 1 5 2 3 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. * Copyright (c) 2012-2013 Red Hat, Inc. * All rights reserved. */ #include "xfs.h" #include "xfs_shared.h" #include "xfs_fs.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_dir2.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_bmap_btree.h" #include "xfs_quota.h" #include "xfs_symlink.h" #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_ialloc.h" #include "xfs_error.h" #include "xfs_health.h" #include "xfs_symlink_remote.h" #include "xfs_parent.h" #include "xfs_defer.h" int xfs_readlink( struct xfs_inode *ip, char *link) { struct xfs_mount *mp = ip->i_mount; xfs_fsize_t pathlen; int error; trace_xfs_readlink(ip); if (xfs_is_shutdown(mp)) return -EIO; if (xfs_ifork_zapped(ip, XFS_DATA_FORK)) return -EIO; xfs_ilock(ip, XFS_ILOCK_SHARED); pathlen = ip->i_disk_size; if (!pathlen) goto out_corrupt; if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) { xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)", __func__, (unsigned long long) ip->i_ino, (long long) pathlen); ASSERT(0); goto out_corrupt; } if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { /* * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED * if if_data is junk. */ if (XFS_IS_CORRUPT(ip->i_mount, !ip->i_df.if_data)) goto out_corrupt; memcpy(link, ip->i_df.if_data, pathlen + 1); error = 0; } else { error = xfs_symlink_remote_read(ip, link); } xfs_iunlock(ip, XFS_ILOCK_SHARED); return error; out_corrupt: xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_inode_mark_sick(ip, XFS_SICK_INO_SYMLINK); return -EFSCORRUPTED; } int xfs_symlink( struct mnt_idmap *idmap, struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, umode_t mode, struct xfs_inode **ipp) { struct xfs_mount *mp = dp->i_mount; struct xfs_icreate_args args = { .idmap = idmap, .pip = dp, .mode = S_IFLNK | (mode & ~S_IFMT), }; struct xfs_dir_update du = { .dp = dp, .name = link_name, }; struct xfs_trans *tp = NULL; int error = 0; int pathlen; bool unlock_dp_on_error = false; xfs_filblks_t fs_blocks; struct xfs_dquot *udqp; struct xfs_dquot *gdqp; struct xfs_dquot *pdqp; uint resblks; xfs_ino_t ino; *ipp = NULL; trace_xfs_symlink(dp, link_name); if (xfs_is_shutdown(mp)) return -EIO; /* * Check component lengths of the target path name. */ pathlen = strlen(target_path); if (pathlen >= XFS_SYMLINK_MAXLEN) /* total string too long */ return -ENAMETOOLONG; ASSERT(pathlen > 0); /* Make sure that we have allocated dquot(s) on disk. */ error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp); if (error) return error; /* * The symlink will fit into the inode data fork? * If there are no parent pointers, then there wont't be any attributes. * So we get the whole variable part, and do not need to reserve extra * blocks. Otherwise, we need to reserve the blocks. */ if (pathlen <= XFS_LITINO(mp) && !xfs_has_parent(mp)) fs_blocks = 0; else fs_blocks = xfs_symlink_blocks(mp, pathlen); resblks = xfs_symlink_space_res(mp, link_name->len, fs_blocks); error = xfs_parent_start(mp, &du.ppargs); if (error) goto out_release_dquots; error = xfs_trans_alloc_icreate(mp, &M_RES(mp)->tr_symlink, udqp, gdqp, pdqp, resblks, &tp); if (error) goto out_parent; xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); unlock_dp_on_error = true; /* * Check whether the directory allows new symlinks or not. */ if (dp->i_diflags & XFS_DIFLAG_NOSYMLINKS) { error = -EPERM; goto out_trans_cancel; } /* * Allocate an inode for the symlink. */ error = xfs_dialloc(&tp, &args, &ino); if (!error) error = xfs_icreate(tp, ino, &args, &du.ip); if (error) goto out_trans_cancel; /* * Now we join the directory inode to the transaction. We do not do it * earlier because xfs_dir_ialloc might commit the previous transaction * (and release all the locks). An error from here on will result in * the transaction cancel unlocking dp so don't do it explicitly in the * error path. */ xfs_trans_ijoin(tp, dp, 0); /* * Also attach the dquot(s) to it, if applicable. */ xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp); resblks -= XFS_IALLOC_SPACE_RES(mp); error = xfs_symlink_write_target(tp, du.ip, du.ip->i_ino, target_path, pathlen, fs_blocks, resblks); if (error) goto out_trans_cancel; resblks -= fs_blocks; i_size_write(VFS_I(du.ip), du.ip->i_disk_size); /* * Create the directory entry for the symlink. */ error = xfs_dir_create_child(tp, resblks, &du); if (error) goto out_trans_cancel; /* * If this is a synchronous mount, make sure that the * symlink transaction goes to disk before returning to * the user. */ if (xfs_has_wsync(mp) || xfs_has_dirsync(mp)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); if (error) goto out_release_inode; xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); *ipp = du.ip; xfs_iunlock(du.ip, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_parent_finish(mp, du.ppargs); return 0; out_trans_cancel: xfs_trans_cancel(tp); out_release_inode: /* * Wait until after the current transaction is aborted to finish the * setup of the inode and release the inode. This prevents recursive * transactions and deadlocks from xfs_inactive. */ if (du.ip) { xfs_iunlock(du.ip, XFS_ILOCK_EXCL); xfs_finish_inode_setup(du.ip); xfs_irele(du.ip); } out_parent: xfs_parent_finish(mp, du.ppargs); out_release_dquots: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; } /* * Free a symlink that has blocks associated with it. * * Note: zero length symlinks are not allowed to exist. When we set the size to * zero, also change it to a regular file so that it does not get written to * disk as a zero length symlink. The inode is on the unlinked list already, so * userspace cannot find this inode anymore, so this change is not user visible * but allows us to catch corrupt zero-length symlinks in the verifiers. */ STATIC int xfs_inactive_symlink_rmt( struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error; ASSERT(!xfs_need_iread_extents(&ip->i_df)); /* * We're freeing a symlink that has some * blocks allocated to it. Free the * blocks here. We know that we've got * either 1 or 2 extents and that we can * free them all in one bunmapi call. */ ASSERT(ip->i_df.if_nextents > 0 && ip->i_df.if_nextents <= 2); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); if (error) return error; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* * Lock the inode, fix the size, turn it into a regular file and join it * to the transaction. Hold it so in the normal path, we still have it * locked for the second transaction. In the error paths we need it * held so the cancel won't rele it, see below. */ ip->i_disk_size = 0; VFS_I(ip)->i_mode = (VFS_I(ip)->i_mode & ~S_IFMT) | S_IFREG; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_symlink_remote_truncate(tp, ip); if (error) goto error_trans_cancel; error = xfs_trans_commit(tp); if (error) { ASSERT(xfs_is_shutdown(mp)); goto error_unlock; } /* * Remove the memory for extent descriptions (just bookkeeping). */ if (ip->i_df.if_bytes) xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); ASSERT(ip->i_df.if_bytes == 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; error_trans_cancel: xfs_trans_cancel(tp); error_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } /* * xfs_inactive_symlink - free a symlink */ int xfs_inactive_symlink( struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; int pathlen; trace_xfs_inactive_symlink(ip); if (xfs_is_shutdown(mp)) return -EIO; xfs_ilock(ip, XFS_ILOCK_EXCL); pathlen = (int)ip->i_disk_size; ASSERT(pathlen); if (pathlen <= 0 || pathlen > XFS_SYMLINK_MAXLEN) { xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)", __func__, (unsigned long long)ip->i_ino, pathlen); xfs_iunlock(ip, XFS_ILOCK_EXCL); ASSERT(0); xfs_inode_mark_sick(ip, XFS_SICK_INO_SYMLINK); return -EFSCORRUPTED; } /* * Inline fork state gets removed by xfs_difree() so we have nothing to * do here in that case. */ if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; } xfs_iunlock(ip, XFS_ILOCK_EXCL); /* remove the remote symlink */ return xfs_inactive_symlink_rmt(ip); }
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 // SPDX-License-Identifier: GPL-2.0 #include <linux/cpufreq.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> extern const struct seq_operations cpuinfo_op; static int cpuinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &cpuinfo_op); } static const struct proc_ops cpuinfo_proc_ops = { .proc_flags = PROC_ENTRY_PERMANENT, .proc_open = cpuinfo_open, .proc_read_iter = seq_read_iter, .proc_lseek = seq_lseek, .proc_release = seq_release, }; static int __init proc_cpuinfo_init(void) { proc_create("cpuinfo", 0, NULL, &cpuinfo_proc_ops); return 0; } fs_initcall(proc_cpuinfo_init);
1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 // SPDX-License-Identifier: GPL-2.0+ /* * inode.c -- user mode filesystem api for usb gadget controllers * * Copyright (C) 2003-2004 David Brownell * Copyright (C) 2003 Agilent Technologies */ /* #define VERBOSE_DEBUG */ #include <linux/init.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/pagemap.h> #include <linux/uts.h> #include <linux/wait.h> #include <linux/compiler.h> #include <linux/uaccess.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/string_choices.h> #include <linux/poll.h> #include <linux/kthread.h> #include <linux/aio.h> #include <linux/uio.h> #include <linux/refcount.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/moduleparam.h> #include <linux/usb/gadgetfs.h> #include <linux/usb/gadget.h> #include <linux/usb/composite.h> /* for USB_GADGET_DELAYED_STATUS */ /* Undef helpers from linux/usb/composite.h as gadgetfs redefines them */ #undef DBG #undef ERROR #undef INFO /* * The gadgetfs API maps each endpoint to a file descriptor so that you * can use standard synchronous read/write calls for I/O. There's some * O_NONBLOCK and O_ASYNC/FASYNC style i/o support. Example usermode * drivers show how this works in practice. You can also use AIO to * eliminate I/O gaps between requests, to help when streaming data. * * Key parts that must be USB-specific are protocols defining how the * read/write operations relate to the hardware state machines. There * are two types of files. One type is for the device, implementing ep0. * The other type is for each IN or OUT endpoint. In both cases, the * user mode driver must configure the hardware before using it. * * - First, dev_config() is called when /dev/gadget/$CHIP is configured * (by writing configuration and device descriptors). Afterwards it * may serve as a source of device events, used to handle all control * requests other than basic enumeration. * * - Then, after a SET_CONFIGURATION control request, ep_config() is * called when each /dev/gadget/ep* file is configured (by writing * endpoint descriptors). Afterwards these files are used to write() * IN data or to read() OUT data. To halt the endpoint, a "wrong * direction" request is issued (like reading an IN endpoint). * * Unlike "usbfs" the only ioctl()s are for things that are rare, and maybe * not possible on all hardware. For example, precise fault handling with * respect to data left in endpoint fifos after aborted operations; or * selective clearing of endpoint halts, to implement SET_INTERFACE. */ #define DRIVER_DESC "USB Gadget filesystem" #define DRIVER_VERSION "24 Aug 2004" static const char driver_desc [] = DRIVER_DESC; static const char shortname [] = "gadgetfs"; MODULE_DESCRIPTION (DRIVER_DESC); MODULE_AUTHOR ("David Brownell"); MODULE_LICENSE ("GPL"); static int ep_open(struct inode *, struct file *); /*----------------------------------------------------------------------*/ #define GADGETFS_MAGIC 0xaee71ee7 /* /dev/gadget/$CHIP represents ep0 and the whole device */ enum ep0_state { /* DISABLED is the initial state. */ STATE_DEV_DISABLED = 0, /* Only one open() of /dev/gadget/$CHIP; only one file tracks * ep0/device i/o modes and binding to the controller. Driver * must always write descriptors to initialize the device, then * the device becomes UNCONNECTED until enumeration. */ STATE_DEV_OPENED, /* From then on, ep0 fd is in either of two basic modes: * - (UN)CONNECTED: read usb_gadgetfs_event(s) from it * - SETUP: read/write will transfer control data and succeed; * or if "wrong direction", performs protocol stall */ STATE_DEV_UNCONNECTED, STATE_DEV_CONNECTED, STATE_DEV_SETUP, /* UNBOUND means the driver closed ep0, so the device won't be * accessible again (DEV_DISABLED) until all fds are closed. */ STATE_DEV_UNBOUND, }; /* enough for the whole queue: most events invalidate others */ #define N_EVENT 5 #define RBUF_SIZE 256 struct dev_data { spinlock_t lock; refcount_t count; int udc_usage; enum ep0_state state; /* P: lock */ struct usb_gadgetfs_event event [N_EVENT]; unsigned ev_next; struct fasync_struct *fasync; u8 current_config; /* drivers reading ep0 MUST handle control requests (SETUP) * reported that way; else the host will time out. */ unsigned usermode_setup : 1, setup_in : 1, setup_can_stall : 1, setup_out_ready : 1, setup_out_error : 1, setup_abort : 1, gadget_registered : 1; unsigned setup_wLength; /* the rest is basically write-once */ struct usb_config_descriptor *config, *hs_config; struct usb_device_descriptor *dev; struct usb_request *req; struct usb_gadget *gadget; struct list_head epfiles; void *buf; wait_queue_head_t wait; struct super_block *sb; struct dentry *dentry; /* except this scratch i/o buffer for ep0 */ u8 rbuf[RBUF_SIZE]; }; static inline void get_dev (struct dev_data *data) { refcount_inc (&data->count); } static void put_dev (struct dev_data *data) { if (likely (!refcount_dec_and_test (&data->count))) return; /* needs no more cleanup */ BUG_ON (waitqueue_active (&data->wait)); kfree (data); } static struct dev_data *dev_new (void) { struct dev_data *dev; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; dev->state = STATE_DEV_DISABLED; refcount_set (&dev->count, 1); spin_lock_init (&dev->lock); INIT_LIST_HEAD (&dev->epfiles); init_waitqueue_head (&dev->wait); return dev; } /*----------------------------------------------------------------------*/ /* other /dev/gadget/$ENDPOINT files represent endpoints */ enum ep_state { STATE_EP_DISABLED = 0, STATE_EP_READY, STATE_EP_ENABLED, STATE_EP_UNBOUND, }; struct ep_data { struct mutex lock; enum ep_state state; refcount_t count; struct dev_data *dev; /* must hold dev->lock before accessing ep or req */ struct usb_ep *ep; struct usb_request *req; ssize_t status; char name [16]; struct usb_endpoint_descriptor desc, hs_desc; struct list_head epfiles; wait_queue_head_t wait; struct dentry *dentry; }; static inline void get_ep (struct ep_data *data) { refcount_inc (&data->count); } static void put_ep (struct ep_data *data) { if (likely (!refcount_dec_and_test (&data->count))) return; put_dev (data->dev); /* needs no more cleanup */ BUG_ON (!list_empty (&data->epfiles)); BUG_ON (waitqueue_active (&data->wait)); kfree (data); } /*----------------------------------------------------------------------*/ /* most "how to use the hardware" policy choices are in userspace: * mapping endpoint roles (which the driver needs) to the capabilities * which the usb controller has. most of those capabilities are exposed * implicitly, starting with the driver name and then endpoint names. */ static const char *CHIP; static DEFINE_MUTEX(sb_mutex); /* Serialize superblock operations */ /*----------------------------------------------------------------------*/ /* NOTE: don't use dev_printk calls before binding to the gadget * at the end of ep0 configuration, or after unbind. */ /* too wordy: dev_printk(level , &(d)->gadget->dev , fmt , ## args) */ #define xprintk(d,level,fmt,args...) \ printk(level "%s: " fmt , shortname , ## args) #ifdef DEBUG #define DBG(dev,fmt,args...) \ xprintk(dev , KERN_DEBUG , fmt , ## args) #else #define DBG(dev,fmt,args...) \ do { } while (0) #endif /* DEBUG */ #ifdef VERBOSE_DEBUG #define VDEBUG DBG #else #define VDEBUG(dev,fmt,args...) \ do { } while (0) #endif /* DEBUG */ #define ERROR(dev,fmt,args...) \ xprintk(dev , KERN_ERR , fmt , ## args) #define INFO(dev,fmt,args...) \ xprintk(dev , KERN_INFO , fmt , ## args) /*----------------------------------------------------------------------*/ /* SYNCHRONOUS ENDPOINT OPERATIONS (bulk/intr/iso) * * After opening, configure non-control endpoints. Then use normal * stream read() and write() requests; and maybe ioctl() to get more * precise FIFO status when recovering from cancellation. */ static void epio_complete (struct usb_ep *ep, struct usb_request *req) { struct ep_data *epdata = ep->driver_data; if (!req->context) return; if (req->status) epdata->status = req->status; else epdata->status = req->actual; complete ((struct completion *)req->context); } /* tasklock endpoint, returning when it's connected. * still need dev->lock to use epdata->ep. */ static int get_ready_ep (unsigned f_flags, struct ep_data *epdata, bool is_write) { int val; if (f_flags & O_NONBLOCK) { if (!mutex_trylock(&epdata->lock)) goto nonblock; if (epdata->state != STATE_EP_ENABLED && (!is_write || epdata->state != STATE_EP_READY)) { mutex_unlock(&epdata->lock); nonblock: val = -EAGAIN; } else val = 0; return val; } val = mutex_lock_interruptible(&epdata->lock); if (val < 0) return val; switch (epdata->state) { case STATE_EP_ENABLED: return 0; case STATE_EP_READY: /* not configured yet */ if (is_write) return 0; fallthrough; case STATE_EP_UNBOUND: /* clean disconnect */ break; // case STATE_EP_DISABLED: /* "can't happen" */ default: /* error! */ pr_debug ("%s: ep %p not available, state %d\n", shortname, epdata, epdata->state); } mutex_unlock(&epdata->lock); return -ENODEV; } static ssize_t ep_io (struct ep_data *epdata, void *buf, unsigned len) { DECLARE_COMPLETION_ONSTACK (done); int value; spin_lock_irq (&epdata->dev->lock); if (likely (epdata->ep != NULL)) { struct usb_request *req = epdata->req; req->context = &done; req->complete = epio_complete; req->buf = buf; req->length = len; value = usb_ep_queue (epdata->ep, req, GFP_ATOMIC); } else value = -ENODEV; spin_unlock_irq (&epdata->dev->lock); if (likely (value == 0)) { value = wait_for_completion_interruptible(&done); if (value != 0) { spin_lock_irq (&epdata->dev->lock); if (likely (epdata->ep != NULL)) { DBG (epdata->dev, "%s i/o interrupted\n", epdata->name); usb_ep_dequeue (epdata->ep, epdata->req); spin_unlock_irq (&epdata->dev->lock); wait_for_completion(&done); if (epdata->status == -ECONNRESET) epdata->status = -EINTR; } else { spin_unlock_irq (&epdata->dev->lock); DBG (epdata->dev, "endpoint gone\n"); wait_for_completion(&done); epdata->status = -ENODEV; } } return epdata->status; } return value; } static int ep_release (struct inode *inode, struct file *fd) { struct ep_data *data = fd->private_data; int value; value = mutex_lock_interruptible(&data->lock); if (value < 0) return value; /* clean up if this can be reopened */ if (data->state != STATE_EP_UNBOUND) { data->state = STATE_EP_DISABLED; data->desc.bDescriptorType = 0; data->hs_desc.bDescriptorType = 0; usb_ep_disable(data->ep); } mutex_unlock(&data->lock); put_ep (data); return 0; } static long ep_ioctl(struct file *fd, unsigned code, unsigned long value) { struct ep_data *data = fd->private_data; int status; if ((status = get_ready_ep (fd->f_flags, data, false)) < 0) return status; spin_lock_irq (&data->dev->lock); if (likely (data->ep != NULL)) { switch (code) { case GADGETFS_FIFO_STATUS: status = usb_ep_fifo_status (data->ep); break; case GADGETFS_FIFO_FLUSH: usb_ep_fifo_flush (data->ep); break; case GADGETFS_CLEAR_HALT: status = usb_ep_clear_halt (data->ep); break; default: status = -ENOTTY; } } else status = -ENODEV; spin_unlock_irq (&data->dev->lock); mutex_unlock(&data->lock); return status; } /*----------------------------------------------------------------------*/ /* ASYNCHRONOUS ENDPOINT I/O OPERATIONS (bulk/intr/iso) */ struct kiocb_priv { struct usb_request *req; struct ep_data *epdata; struct kiocb *iocb; struct mm_struct *mm; struct work_struct work; void *buf; struct iov_iter to; const void *to_free; unsigned actual; }; static int ep_aio_cancel(struct kiocb *iocb) { struct kiocb_priv *priv = iocb->private; struct ep_data *epdata; int value; local_irq_disable(); epdata = priv->epdata; // spin_lock(&epdata->dev->lock); if (likely(epdata && epdata->ep && priv->req)) value = usb_ep_dequeue (epdata->ep, priv->req); else value = -EINVAL; // spin_unlock(&epdata->dev->lock); local_irq_enable(); return value; } static void ep_user_copy_worker(struct work_struct *work) { struct kiocb_priv *priv = container_of(work, struct kiocb_priv, work); struct mm_struct *mm = priv->mm; struct kiocb *iocb = priv->iocb; size_t ret; kthread_use_mm(mm); ret = copy_to_iter(priv->buf, priv->actual, &priv->to); kthread_unuse_mm(mm); if (!ret) ret = -EFAULT; /* completing the iocb can drop the ctx and mm, don't touch mm after */ iocb->ki_complete(iocb, ret); kfree(priv->buf); kfree(priv->to_free); kfree(priv); } static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) { struct kiocb *iocb = req->context; struct kiocb_priv *priv = iocb->private; struct ep_data *epdata = priv->epdata; /* lock against disconnect (and ideally, cancel) */ spin_lock(&epdata->dev->lock); priv->req = NULL; priv->epdata = NULL; /* if this was a write or a read returning no data then we * don't need to copy anything to userspace, so we can * complete the aio request immediately. */ if (priv->to_free == NULL || unlikely(req->actual == 0)) { kfree(req->buf); kfree(priv->to_free); kfree(priv); iocb->private = NULL; iocb->ki_complete(iocb, req->actual ? req->actual : (long)req->status); } else { /* ep_copy_to_user() won't report both; we hide some faults */ if (unlikely(0 != req->status)) DBG(epdata->dev, "%s fault %d len %d\n", ep->name, req->status, req->actual); priv->buf = req->buf; priv->actual = req->actual; INIT_WORK(&priv->work, ep_user_copy_worker); schedule_work(&priv->work); } usb_ep_free_request(ep, req); spin_unlock(&epdata->dev->lock); put_ep(epdata); } static ssize_t ep_aio(struct kiocb *iocb, struct kiocb_priv *priv, struct ep_data *epdata, char *buf, size_t len) { struct usb_request *req; ssize_t value; iocb->private = priv; priv->iocb = iocb; kiocb_set_cancel_fn(iocb, ep_aio_cancel); get_ep(epdata); priv->epdata = epdata; priv->actual = 0; priv->mm = current->mm; /* mm teardown waits for iocbs in exit_aio() */ /* each kiocb is coupled to one usb_request, but we can't * allocate or submit those if the host disconnected. */ spin_lock_irq(&epdata->dev->lock); value = -ENODEV; if (unlikely(epdata->ep == NULL)) goto fail; req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC); value = -ENOMEM; if (unlikely(!req)) goto fail; priv->req = req; req->buf = buf; req->length = len; req->complete = ep_aio_complete; req->context = iocb; value = usb_ep_queue(epdata->ep, req, GFP_ATOMIC); if (unlikely(0 != value)) { usb_ep_free_request(epdata->ep, req); goto fail; } spin_unlock_irq(&epdata->dev->lock); return -EIOCBQUEUED; fail: spin_unlock_irq(&epdata->dev->lock); kfree(priv->to_free); kfree(priv); put_ep(epdata); return value; } static ssize_t ep_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct ep_data *epdata = file->private_data; size_t len = iov_iter_count(to); ssize_t value; char *buf; if ((value = get_ready_ep(file->f_flags, epdata, false)) < 0) return value; /* halt any endpoint by doing a "wrong direction" i/o call */ if (usb_endpoint_dir_in(&epdata->desc)) { if (usb_endpoint_xfer_isoc(&epdata->desc) || !is_sync_kiocb(iocb)) { mutex_unlock(&epdata->lock); return -EINVAL; } DBG (epdata->dev, "%s halt\n", epdata->name); spin_lock_irq(&epdata->dev->lock); if (likely(epdata->ep != NULL)) usb_ep_set_halt(epdata->ep); spin_unlock_irq(&epdata->dev->lock); mutex_unlock(&epdata->lock); return -EBADMSG; } buf = kmalloc(len, GFP_KERNEL); if (unlikely(!buf)) { mutex_unlock(&epdata->lock); return -ENOMEM; } if (is_sync_kiocb(iocb)) { value = ep_io(epdata, buf, len); if (value >= 0 && (copy_to_iter(buf, value, to) != value)) value = -EFAULT; } else { struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL); value = -ENOMEM; if (!priv) goto fail; priv->to_free = dup_iter(&priv->to, to, GFP_KERNEL); if (!iter_is_ubuf(&priv->to) && !priv->to_free) { kfree(priv); goto fail; } value = ep_aio(iocb, priv, epdata, buf, len); if (value == -EIOCBQUEUED) buf = NULL; } fail: kfree(buf); mutex_unlock(&epdata->lock); return value; } static ssize_t ep_config(struct ep_data *, const char *, size_t); static ssize_t ep_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct ep_data *epdata = file->private_data; size_t len = iov_iter_count(from); bool configured; ssize_t value; char *buf; if ((value = get_ready_ep(file->f_flags, epdata, true)) < 0) return value; configured = epdata->state == STATE_EP_ENABLED; /* halt any endpoint by doing a "wrong direction" i/o call */ if (configured && !usb_endpoint_dir_in(&epdata->desc)) { if (usb_endpoint_xfer_isoc(&epdata->desc) || !is_sync_kiocb(iocb)) { mutex_unlock(&epdata->lock); return -EINVAL; } DBG (epdata->dev, "%s halt\n", epdata->name); spin_lock_irq(&epdata->dev->lock); if (likely(epdata->ep != NULL)) usb_ep_set_halt(epdata->ep); spin_unlock_irq(&epdata->dev->lock); mutex_unlock(&epdata->lock); return -EBADMSG; } buf = kmalloc(len, GFP_KERNEL); if (unlikely(!buf)) { mutex_unlock(&epdata->lock); return -ENOMEM; } if (unlikely(!copy_from_iter_full(buf, len, from))) { value = -EFAULT; goto out; } if (unlikely(!configured)) { value = ep_config(epdata, buf, len); } else if (is_sync_kiocb(iocb)) { value = ep_io(epdata, buf, len); } else { struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL); value = -ENOMEM; if (priv) { value = ep_aio(iocb, priv, epdata, buf, len); if (value == -EIOCBQUEUED) buf = NULL; } } out: kfree(buf); mutex_unlock(&epdata->lock); return value; } /*----------------------------------------------------------------------*/ /* used after endpoint configuration */ static const struct file_operations ep_io_operations = { .owner = THIS_MODULE, .open = ep_open, .release = ep_release, .unlocked_ioctl = ep_ioctl, .read_iter = ep_read_iter, .write_iter = ep_write_iter, }; /* ENDPOINT INITIALIZATION * * fd = open ("/dev/gadget/$ENDPOINT", O_RDWR) * status = write (fd, descriptors, sizeof descriptors) * * That write establishes the endpoint configuration, configuring * the controller to process bulk, interrupt, or isochronous transfers * at the right maxpacket size, and so on. * * The descriptors are message type 1, identified by a host order u32 * at the beginning of what's written. Descriptor order is: full/low * speed descriptor, then optional high speed descriptor. */ static ssize_t ep_config (struct ep_data *data, const char *buf, size_t len) { struct usb_ep *ep; u32 tag; int value, length = len; if (data->state != STATE_EP_READY) { value = -EL2HLT; goto fail; } value = len; if (len < USB_DT_ENDPOINT_SIZE + 4) goto fail0; /* we might need to change message format someday */ memcpy(&tag, buf, 4); if (tag != 1) { DBG(data->dev, "config %s, bad tag %d\n", data->name, tag); goto fail0; } buf += 4; len -= 4; /* NOTE: audio endpoint extensions not accepted here; * just don't include the extra bytes. */ /* full/low speed descriptor, then high speed */ memcpy(&data->desc, buf, USB_DT_ENDPOINT_SIZE); if (data->desc.bLength != USB_DT_ENDPOINT_SIZE || data->desc.bDescriptorType != USB_DT_ENDPOINT) goto fail0; if (len != USB_DT_ENDPOINT_SIZE) { if (len != 2 * USB_DT_ENDPOINT_SIZE) goto fail0; memcpy(&data->hs_desc, buf + USB_DT_ENDPOINT_SIZE, USB_DT_ENDPOINT_SIZE); if (data->hs_desc.bLength != USB_DT_ENDPOINT_SIZE || data->hs_desc.bDescriptorType != USB_DT_ENDPOINT) { DBG(data->dev, "config %s, bad hs length or type\n", data->name); goto fail0; } } spin_lock_irq (&data->dev->lock); if (data->dev->state == STATE_DEV_UNBOUND) { value = -ENOENT; goto gone; } else { ep = data->ep; if (ep == NULL) { value = -ENODEV; goto gone; } } switch (data->dev->gadget->speed) { case USB_SPEED_LOW: case USB_SPEED_FULL: ep->desc = &data->desc; break; case USB_SPEED_HIGH: /* fails if caller didn't provide that descriptor... */ ep->desc = &data->hs_desc; break; default: DBG(data->dev, "unconnected, %s init abandoned\n", data->name); value = -EINVAL; goto gone; } value = usb_ep_enable(ep); if (value == 0) { data->state = STATE_EP_ENABLED; value = length; } gone: spin_unlock_irq (&data->dev->lock); if (value < 0) { fail: data->desc.bDescriptorType = 0; data->hs_desc.bDescriptorType = 0; } return value; fail0: value = -EINVAL; goto fail; } static int ep_open (struct inode *inode, struct file *fd) { struct ep_data *data = inode->i_private; int value = -EBUSY; if (mutex_lock_interruptible(&data->lock) != 0) return -EINTR; spin_lock_irq (&data->dev->lock); if (data->dev->state == STATE_DEV_UNBOUND) value = -ENOENT; else if (data->state == STATE_EP_DISABLED) { value = 0; data->state = STATE_EP_READY; get_ep (data); fd->private_data = data; VDEBUG (data->dev, "%s ready\n", data->name); } else DBG (data->dev, "%s state %d\n", data->name, data->state); spin_unlock_irq (&data->dev->lock); mutex_unlock(&data->lock); return value; } /*----------------------------------------------------------------------*/ /* EP0 IMPLEMENTATION can be partly in userspace. * * Drivers that use this facility receive various events, including * control requests the kernel doesn't handle. Drivers that don't * use this facility may be too simple-minded for real applications. */ static inline void ep0_readable (struct dev_data *dev) { wake_up (&dev->wait); kill_fasync (&dev->fasync, SIGIO, POLL_IN); } static void clean_req (struct usb_ep *ep, struct usb_request *req) { struct dev_data *dev = ep->driver_data; if (req->buf != dev->rbuf) { kfree(req->buf); req->buf = dev->rbuf; } req->complete = epio_complete; dev->setup_out_ready = 0; } static void ep0_complete (struct usb_ep *ep, struct usb_request *req) { struct dev_data *dev = ep->driver_data; unsigned long flags; int free = 1; /* for control OUT, data must still get to userspace */ spin_lock_irqsave(&dev->lock, flags); if (!dev->setup_in) { dev->setup_out_error = (req->status != 0); if (!dev->setup_out_error) free = 0; dev->setup_out_ready = 1; ep0_readable (dev); } /* clean up as appropriate */ if (free && req->buf != &dev->rbuf) clean_req (ep, req); req->complete = epio_complete; spin_unlock_irqrestore(&dev->lock, flags); } static int setup_req (struct usb_ep *ep, struct usb_request *req, u16 len) { struct dev_data *dev = ep->driver_data; if (dev->setup_out_ready) { DBG (dev, "ep0 request busy!\n"); return -EBUSY; } if (len > sizeof (dev->rbuf)) req->buf = kmalloc(len, GFP_ATOMIC); if (req->buf == NULL) { req->buf = dev->rbuf; return -ENOMEM; } req->complete = ep0_complete; req->length = len; req->zero = 0; return 0; } static ssize_t ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) { struct dev_data *dev = fd->private_data; ssize_t retval; enum ep0_state state; spin_lock_irq (&dev->lock); if (dev->state <= STATE_DEV_OPENED) { retval = -EINVAL; goto done; } /* report fd mode change before acting on it */ if (dev->setup_abort) { dev->setup_abort = 0; retval = -EIDRM; goto done; } /* control DATA stage */ if ((state = dev->state) == STATE_DEV_SETUP) { if (dev->setup_in) { /* stall IN */ VDEBUG(dev, "ep0in stall\n"); (void) usb_ep_set_halt (dev->gadget->ep0); retval = -EL2HLT; dev->state = STATE_DEV_CONNECTED; } else if (len == 0) { /* ack SET_CONFIGURATION etc */ struct usb_ep *ep = dev->gadget->ep0; struct usb_request *req = dev->req; if ((retval = setup_req (ep, req, 0)) == 0) { ++dev->udc_usage; spin_unlock_irq (&dev->lock); retval = usb_ep_queue (ep, req, GFP_KERNEL); spin_lock_irq (&dev->lock); --dev->udc_usage; } dev->state = STATE_DEV_CONNECTED; /* assume that was SET_CONFIGURATION */ if (dev->current_config) { unsigned power; if (gadget_is_dualspeed(dev->gadget) && (dev->gadget->speed == USB_SPEED_HIGH)) power = dev->hs_config->bMaxPower; else power = dev->config->bMaxPower; usb_gadget_vbus_draw(dev->gadget, 2 * power); } } else { /* collect OUT data */ if ((fd->f_flags & O_NONBLOCK) != 0 && !dev->setup_out_ready) { retval = -EAGAIN; goto done; } spin_unlock_irq (&dev->lock); retval = wait_event_interruptible (dev->wait, dev->setup_out_ready != 0); /* FIXME state could change from under us */ spin_lock_irq (&dev->lock); if (retval) goto done; if (dev->state != STATE_DEV_SETUP) { retval = -ECANCELED; goto done; } dev->state = STATE_DEV_CONNECTED; if (dev->setup_out_error) retval = -EIO; else { len = min (len, (size_t)dev->req->actual); ++dev->udc_usage; spin_unlock_irq(&dev->lock); if (copy_to_user (buf, dev->req->buf, len)) retval = -EFAULT; else retval = len; spin_lock_irq(&dev->lock); --dev->udc_usage; clean_req (dev->gadget->ep0, dev->req); /* NOTE userspace can't yet choose to stall */ } } goto done; } /* else normal: return event data */ if (len < sizeof dev->event [0]) { retval = -EINVAL; goto done; } len -= len % sizeof (struct usb_gadgetfs_event); dev->usermode_setup = 1; scan: /* return queued events right away */ if (dev->ev_next != 0) { unsigned i, n; n = len / sizeof (struct usb_gadgetfs_event); if (dev->ev_next < n) n = dev->ev_next; /* ep0 i/o has special semantics during STATE_DEV_SETUP */ for (i = 0; i < n; i++) { if (dev->event [i].type == GADGETFS_SETUP) { dev->state = STATE_DEV_SETUP; n = i + 1; break; } } spin_unlock_irq (&dev->lock); len = n * sizeof (struct usb_gadgetfs_event); if (copy_to_user (buf, &dev->event, len)) retval = -EFAULT; else retval = len; if (len > 0) { /* NOTE this doesn't guard against broken drivers; * concurrent ep0 readers may lose events. */ spin_lock_irq (&dev->lock); if (dev->ev_next > n) { memmove(&dev->event[0], &dev->event[n], sizeof (struct usb_gadgetfs_event) * (dev->ev_next - n)); } dev->ev_next -= n; spin_unlock_irq (&dev->lock); } return retval; } if (fd->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto done; } switch (state) { default: DBG (dev, "fail %s, state %d\n", __func__, state); retval = -ESRCH; break; case STATE_DEV_UNCONNECTED: case STATE_DEV_CONNECTED: spin_unlock_irq (&dev->lock); DBG (dev, "%s wait\n", __func__); /* wait for events */ retval = wait_event_interruptible (dev->wait, dev->ev_next != 0); if (retval < 0) return retval; spin_lock_irq (&dev->lock); goto scan; } done: spin_unlock_irq (&dev->lock); return retval; } static struct usb_gadgetfs_event * next_event (struct dev_data *dev, enum usb_gadgetfs_event_type type) { struct usb_gadgetfs_event *event; unsigned i; switch (type) { /* these events purge the queue */ case GADGETFS_DISCONNECT: if (dev->state == STATE_DEV_SETUP) dev->setup_abort = 1; fallthrough; case GADGETFS_CONNECT: dev->ev_next = 0; break; case GADGETFS_SETUP: /* previous request timed out */ case GADGETFS_SUSPEND: /* same effect */ /* these events can't be repeated */ for (i = 0; i != dev->ev_next; i++) { if (dev->event [i].type != type) continue; DBG(dev, "discard old event[%d] %d\n", i, type); dev->ev_next--; if (i == dev->ev_next) break; /* indices start at zero, for simplicity */ memmove (&dev->event [i], &dev->event [i + 1], sizeof (struct usb_gadgetfs_event) * (dev->ev_next - i)); } break; default: BUG (); } VDEBUG(dev, "event[%d] = %d\n", dev->ev_next, type); event = &dev->event [dev->ev_next++]; BUG_ON (dev->ev_next > N_EVENT); memset (event, 0, sizeof *event); event->type = type; return event; } static ssize_t ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) { struct dev_data *dev = fd->private_data; ssize_t retval = -ESRCH; /* report fd mode change before acting on it */ if (dev->setup_abort) { dev->setup_abort = 0; retval = -EIDRM; /* data and/or status stage for control request */ } else if (dev->state == STATE_DEV_SETUP) { len = min_t(size_t, len, dev->setup_wLength); if (dev->setup_in) { retval = setup_req (dev->gadget->ep0, dev->req, len); if (retval == 0) { dev->state = STATE_DEV_CONNECTED; ++dev->udc_usage; spin_unlock_irq (&dev->lock); if (copy_from_user (dev->req->buf, buf, len)) retval = -EFAULT; else { if (len < dev->setup_wLength) dev->req->zero = 1; retval = usb_ep_queue ( dev->gadget->ep0, dev->req, GFP_KERNEL); } spin_lock_irq(&dev->lock); --dev->udc_usage; if (retval < 0) { clean_req (dev->gadget->ep0, dev->req); } else retval = len; return retval; } /* can stall some OUT transfers */ } else if (dev->setup_can_stall) { VDEBUG(dev, "ep0out stall\n"); (void) usb_ep_set_halt (dev->gadget->ep0); retval = -EL2HLT; dev->state = STATE_DEV_CONNECTED; } else { DBG(dev, "bogus ep0out stall!\n"); } } else DBG (dev, "fail %s, state %d\n", __func__, dev->state); return retval; } static int ep0_fasync (int f, struct file *fd, int on) { struct dev_data *dev = fd->private_data; // caller must F_SETOWN before signal delivery happens VDEBUG(dev, "%s %s\n", __func__, str_on_off(on)); return fasync_helper (f, fd, on, &dev->fasync); } static struct usb_gadget_driver gadgetfs_driver; static int dev_release (struct inode *inode, struct file *fd) { struct dev_data *dev = fd->private_data; /* closing ep0 === shutdown all */ if (dev->gadget_registered) { usb_gadget_unregister_driver (&gadgetfs_driver); dev->gadget_registered = false; } /* at this point "good" hardware has disconnected the * device from USB; the host won't see it any more. * alternatively, all host requests will time out. */ kfree (dev->buf); dev->buf = NULL; /* other endpoints were all decoupled from this device */ spin_lock_irq(&dev->lock); dev->state = STATE_DEV_DISABLED; spin_unlock_irq(&dev->lock); put_dev (dev); return 0; } static __poll_t ep0_poll (struct file *fd, poll_table *wait) { struct dev_data *dev = fd->private_data; __poll_t mask = 0; if (dev->state <= STATE_DEV_OPENED) return DEFAULT_POLLMASK; poll_wait(fd, &dev->wait, wait); spin_lock_irq(&dev->lock); /* report fd mode change before acting on it */ if (dev->setup_abort) { dev->setup_abort = 0; mask = EPOLLHUP; goto out; } if (dev->state == STATE_DEV_SETUP) { if (dev->setup_in || dev->setup_can_stall) mask = EPOLLOUT; } else { if (dev->ev_next != 0) mask = EPOLLIN; } out: spin_unlock_irq(&dev->lock); return mask; } static long gadget_dev_ioctl (struct file *fd, unsigned code, unsigned long value) { struct dev_data *dev = fd->private_data; struct usb_gadget *gadget = dev->gadget; long ret = -ENOTTY; spin_lock_irq(&dev->lock); if (dev->state == STATE_DEV_OPENED || dev->state == STATE_DEV_UNBOUND) { /* Not bound to a UDC */ } else if (gadget->ops->ioctl) { ++dev->udc_usage; spin_unlock_irq(&dev->lock); ret = gadget->ops->ioctl (gadget, code, value); spin_lock_irq(&dev->lock); --dev->udc_usage; } spin_unlock_irq(&dev->lock); return ret; } /*----------------------------------------------------------------------*/ /* The in-kernel gadget driver handles most ep0 issues, in particular * enumerating the single configuration (as provided from user space). * * Unrecognized ep0 requests may be handled in user space. */ static void make_qualifier (struct dev_data *dev) { struct usb_qualifier_descriptor qual; struct usb_device_descriptor *desc; qual.bLength = sizeof qual; qual.bDescriptorType = USB_DT_DEVICE_QUALIFIER; qual.bcdUSB = cpu_to_le16 (0x0200); desc = dev->dev; qual.bDeviceClass = desc->bDeviceClass; qual.bDeviceSubClass = desc->bDeviceSubClass; qual.bDeviceProtocol = desc->bDeviceProtocol; /* assumes ep0 uses the same value for both speeds ... */ qual.bMaxPacketSize0 = dev->gadget->ep0->maxpacket; qual.bNumConfigurations = 1; qual.bRESERVED = 0; memcpy (dev->rbuf, &qual, sizeof qual); } static int config_buf (struct dev_data *dev, u8 type, unsigned index) { int len; int hs = 0; /* only one configuration */ if (index > 0) return -EINVAL; if (gadget_is_dualspeed(dev->gadget)) { hs = (dev->gadget->speed == USB_SPEED_HIGH); if (type == USB_DT_OTHER_SPEED_CONFIG) hs = !hs; } if (hs) { dev->req->buf = dev->hs_config; len = le16_to_cpu(dev->hs_config->wTotalLength); } else { dev->req->buf = dev->config; len = le16_to_cpu(dev->config->wTotalLength); } ((u8 *)dev->req->buf) [1] = type; return len; } static int gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) { struct dev_data *dev = get_gadget_data (gadget); struct usb_request *req = dev->req; int value = -EOPNOTSUPP; struct usb_gadgetfs_event *event; u16 w_value = le16_to_cpu(ctrl->wValue); u16 w_length = le16_to_cpu(ctrl->wLength); if (w_length > RBUF_SIZE) { if (ctrl->bRequestType & USB_DIR_IN) { /* Cast away the const, we are going to overwrite on purpose. */ __le16 *temp = (__le16 *)&ctrl->wLength; *temp = cpu_to_le16(RBUF_SIZE); w_length = RBUF_SIZE; } else { return value; } } spin_lock (&dev->lock); dev->setup_abort = 0; if (dev->state == STATE_DEV_UNCONNECTED) { if (gadget_is_dualspeed(gadget) && gadget->speed == USB_SPEED_HIGH && dev->hs_config == NULL) { spin_unlock(&dev->lock); ERROR (dev, "no high speed config??\n"); return -EINVAL; } dev->state = STATE_DEV_CONNECTED; INFO (dev, "connected\n"); event = next_event (dev, GADGETFS_CONNECT); event->u.speed = gadget->speed; ep0_readable (dev); /* host may have given up waiting for response. we can miss control * requests handled lower down (device/endpoint status and features); * then ep0_{read,write} will report the wrong status. controller * driver will have aborted pending i/o. */ } else if (dev->state == STATE_DEV_SETUP) dev->setup_abort = 1; req->buf = dev->rbuf; req->context = NULL; switch (ctrl->bRequest) { case USB_REQ_GET_DESCRIPTOR: if (ctrl->bRequestType != USB_DIR_IN) goto unrecognized; switch (w_value >> 8) { case USB_DT_DEVICE: value = min (w_length, (u16) sizeof *dev->dev); dev->dev->bMaxPacketSize0 = dev->gadget->ep0->maxpacket; req->buf = dev->dev; break; case USB_DT_DEVICE_QUALIFIER: if (!dev->hs_config) break; value = min (w_length, (u16) sizeof (struct usb_qualifier_descriptor)); make_qualifier (dev); break; case USB_DT_OTHER_SPEED_CONFIG: case USB_DT_CONFIG: value = config_buf (dev, w_value >> 8, w_value & 0xff); if (value >= 0) value = min (w_length, (u16) value); break; case USB_DT_STRING: goto unrecognized; default: // all others are errors break; } break; /* currently one config, two speeds */ case USB_REQ_SET_CONFIGURATION: if (ctrl->bRequestType != 0) goto unrecognized; if (0 == (u8) w_value) { value = 0; dev->current_config = 0; usb_gadget_vbus_draw(gadget, 8 /* mA */ ); // user mode expected to disable endpoints } else { u8 config, power; if (gadget_is_dualspeed(gadget) && gadget->speed == USB_SPEED_HIGH) { config = dev->hs_config->bConfigurationValue; power = dev->hs_config->bMaxPower; } else { config = dev->config->bConfigurationValue; power = dev->config->bMaxPower; } if (config == (u8) w_value) { value = 0; dev->current_config = config; usb_gadget_vbus_draw(gadget, 2 * power); } } /* report SET_CONFIGURATION like any other control request, * except that usermode may not stall this. the next * request mustn't be allowed start until this finishes: * endpoints and threads set up, etc. * * NOTE: older PXA hardware (before PXA 255: without UDCCFR) * has bad/racey automagic that prevents synchronizing here. * even kernel mode drivers often miss them. */ if (value == 0) { INFO (dev, "configuration #%d\n", dev->current_config); usb_gadget_set_state(gadget, USB_STATE_CONFIGURED); if (dev->usermode_setup) { dev->setup_can_stall = 0; goto delegate; } } break; #ifndef CONFIG_USB_PXA25X /* PXA automagically handles this request too */ case USB_REQ_GET_CONFIGURATION: if (ctrl->bRequestType != 0x80) goto unrecognized; *(u8 *)req->buf = dev->current_config; value = min (w_length, (u16) 1); break; #endif default: unrecognized: VDEBUG (dev, "%s req%02x.%02x v%04x i%04x l%d\n", dev->usermode_setup ? "delegate" : "fail", ctrl->bRequestType, ctrl->bRequest, w_value, le16_to_cpu(ctrl->wIndex), w_length); /* if there's an ep0 reader, don't stall */ if (dev->usermode_setup) { dev->setup_can_stall = 1; delegate: dev->setup_in = (ctrl->bRequestType & USB_DIR_IN) ? 1 : 0; dev->setup_wLength = w_length; dev->setup_out_ready = 0; dev->setup_out_error = 0; /* read DATA stage for OUT right away */ if (unlikely (!dev->setup_in && w_length)) { value = setup_req (gadget->ep0, dev->req, w_length); if (value < 0) break; ++dev->udc_usage; spin_unlock (&dev->lock); value = usb_ep_queue (gadget->ep0, dev->req, GFP_KERNEL); spin_lock (&dev->lock); --dev->udc_usage; if (value < 0) { clean_req (gadget->ep0, dev->req); break; } /* we can't currently stall these */ dev->setup_can_stall = 0; } /* state changes when reader collects event */ event = next_event (dev, GADGETFS_SETUP); event->u.setup = *ctrl; ep0_readable (dev); spin_unlock (&dev->lock); /* * Return USB_GADGET_DELAYED_STATUS as a workaround to * stop some UDC drivers (e.g. dwc3) from automatically * proceeding with the status stage for 0-length * transfers. * Should be removed once all UDC drivers are fixed to * always delay the status stage until a response is * queued to EP0. */ return w_length == 0 ? USB_GADGET_DELAYED_STATUS : 0; } } /* proceed with data transfer and status phases? */ if (value >= 0 && dev->state != STATE_DEV_SETUP) { req->length = value; req->zero = value < w_length; ++dev->udc_usage; spin_unlock (&dev->lock); value = usb_ep_queue (gadget->ep0, req, GFP_KERNEL); spin_lock(&dev->lock); --dev->udc_usage; spin_unlock(&dev->lock); if (value < 0) { DBG (dev, "ep_queue --> %d\n", value); req->status = 0; } return value; } /* device stalls when value < 0 */ spin_unlock (&dev->lock); return value; } static void destroy_ep_files (struct dev_data *dev) { DBG (dev, "%s %d\n", __func__, dev->state); /* dev->state must prevent interference */ spin_lock_irq (&dev->lock); while (!list_empty(&dev->epfiles)) { struct ep_data *ep; struct inode *parent; struct dentry *dentry; /* break link to FS */ ep = list_first_entry (&dev->epfiles, struct ep_data, epfiles); list_del_init (&ep->epfiles); spin_unlock_irq (&dev->lock); dentry = ep->dentry; ep->dentry = NULL; parent = d_inode(dentry->d_parent); /* break link to controller */ mutex_lock(&ep->lock); if (ep->state == STATE_EP_ENABLED) (void) usb_ep_disable (ep->ep); ep->state = STATE_EP_UNBOUND; usb_ep_free_request (ep->ep, ep->req); ep->ep = NULL; mutex_unlock(&ep->lock); wake_up (&ep->wait); put_ep (ep); /* break link to dcache */ inode_lock(parent); d_delete (dentry); dput (dentry); inode_unlock(parent); spin_lock_irq (&dev->lock); } spin_unlock_irq (&dev->lock); } static struct dentry * gadgetfs_create_file (struct super_block *sb, char const *name, void *data, const struct file_operations *fops); static int activate_ep_files (struct dev_data *dev) { struct usb_ep *ep; struct ep_data *data; gadget_for_each_ep (ep, dev->gadget) { data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto enomem0; data->state = STATE_EP_DISABLED; mutex_init(&data->lock); init_waitqueue_head (&data->wait); strscpy(data->name, ep->name); refcount_set (&data->count, 1); data->dev = dev; get_dev (dev); data->ep = ep; ep->driver_data = data; data->req = usb_ep_alloc_request (ep, GFP_KERNEL); if (!data->req) goto enomem1; data->dentry = gadgetfs_create_file (dev->sb, data->name, data, &ep_io_operations); if (!data->dentry) goto enomem2; list_add_tail (&data->epfiles, &dev->epfiles); } return 0; enomem2: usb_ep_free_request (ep, data->req); enomem1: put_dev (dev); kfree (data); enomem0: DBG (dev, "%s enomem\n", __func__); destroy_ep_files (dev); return -ENOMEM; } static void gadgetfs_unbind (struct usb_gadget *gadget) { struct dev_data *dev = get_gadget_data (gadget); DBG (dev, "%s\n", __func__); spin_lock_irq (&dev->lock); dev->state = STATE_DEV_UNBOUND; while (dev->udc_usage > 0) { spin_unlock_irq(&dev->lock); usleep_range(1000, 2000); spin_lock_irq(&dev->lock); } spin_unlock_irq (&dev->lock); destroy_ep_files (dev); gadget->ep0->driver_data = NULL; set_gadget_data (gadget, NULL); /* we've already been disconnected ... no i/o is active */ if (dev->req) usb_ep_free_request (gadget->ep0, dev->req); DBG (dev, "%s done\n", __func__); put_dev (dev); } static struct dev_data *the_device; static int gadgetfs_bind(struct usb_gadget *gadget, struct usb_gadget_driver *driver) { struct dev_data *dev = the_device; if (!dev) return -ESRCH; if (0 != strcmp (CHIP, gadget->name)) { pr_err("%s expected %s controller not %s\n", shortname, CHIP, gadget->name); return -ENODEV; } set_gadget_data (gadget, dev); dev->gadget = gadget; gadget->ep0->driver_data = dev; /* preallocate control response and buffer */ dev->req = usb_ep_alloc_request (gadget->ep0, GFP_KERNEL); if (!dev->req) goto enomem; dev->req->context = NULL; dev->req->complete = epio_complete; if (activate_ep_files (dev) < 0) goto enomem; INFO (dev, "bound to %s driver\n", gadget->name); spin_lock_irq(&dev->lock); dev->state = STATE_DEV_UNCONNECTED; spin_unlock_irq(&dev->lock); get_dev (dev); return 0; enomem: gadgetfs_unbind (gadget); return -ENOMEM; } static void gadgetfs_disconnect (struct usb_gadget *gadget) { struct dev_data *dev = get_gadget_data (gadget); unsigned long flags; spin_lock_irqsave (&dev->lock, flags); if (dev->state == STATE_DEV_UNCONNECTED) goto exit; dev->state = STATE_DEV_UNCONNECTED; INFO (dev, "disconnected\n"); next_event (dev, GADGETFS_DISCONNECT); ep0_readable (dev); exit: spin_unlock_irqrestore (&dev->lock, flags); } static void gadgetfs_suspend (struct usb_gadget *gadget) { struct dev_data *dev = get_gadget_data (gadget); unsigned long flags; INFO (dev, "suspended from state %d\n", dev->state); spin_lock_irqsave(&dev->lock, flags); switch (dev->state) { case STATE_DEV_SETUP: // VERY odd... host died?? case STATE_DEV_CONNECTED: case STATE_DEV_UNCONNECTED: next_event (dev, GADGETFS_SUSPEND); ep0_readable (dev); fallthrough; default: break; } spin_unlock_irqrestore(&dev->lock, flags); } static struct usb_gadget_driver gadgetfs_driver = { .function = (char *) driver_desc, .bind = gadgetfs_bind, .unbind = gadgetfs_unbind, .setup = gadgetfs_setup, .reset = gadgetfs_disconnect, .disconnect = gadgetfs_disconnect, .suspend = gadgetfs_suspend, .driver = { .name = shortname, }, }; /*----------------------------------------------------------------------*/ /* DEVICE INITIALIZATION * * fd = open ("/dev/gadget/$CHIP", O_RDWR) * status = write (fd, descriptors, sizeof descriptors) * * That write establishes the device configuration, so the kernel can * bind to the controller ... guaranteeing it can handle enumeration * at all necessary speeds. Descriptor order is: * * . message tag (u32, host order) ... for now, must be zero; it * would change to support features like multi-config devices * . full/low speed config ... all wTotalLength bytes (with interface, * class, altsetting, endpoint, and other descriptors) * . high speed config ... all descriptors, for high speed operation; * this one's optional except for high-speed hardware * . device descriptor * * Endpoints are not yet enabled. Drivers must wait until device * configuration and interface altsetting changes create * the need to configure (or unconfigure) them. * * After initialization, the device stays active for as long as that * $CHIP file is open. Events must then be read from that descriptor, * such as configuration notifications. */ static int is_valid_config(struct usb_config_descriptor *config, unsigned int total) { return config->bDescriptorType == USB_DT_CONFIG && config->bLength == USB_DT_CONFIG_SIZE && total >= USB_DT_CONFIG_SIZE && config->bConfigurationValue != 0 && (config->bmAttributes & USB_CONFIG_ATT_ONE) != 0 && (config->bmAttributes & USB_CONFIG_ATT_WAKEUP) == 0; /* FIXME if gadget->is_otg, _must_ include an otg descriptor */ /* FIXME check lengths: walk to end */ } static ssize_t dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) { struct dev_data *dev = fd->private_data; ssize_t value, length = len; unsigned total; u32 tag; char *kbuf; spin_lock_irq(&dev->lock); if (dev->state > STATE_DEV_OPENED) { value = ep0_write(fd, buf, len, ptr); spin_unlock_irq(&dev->lock); return value; } spin_unlock_irq(&dev->lock); if ((len < (USB_DT_CONFIG_SIZE + USB_DT_DEVICE_SIZE + 4)) || (len > PAGE_SIZE * 4)) return -EINVAL; /* we might need to change message format someday */ if (copy_from_user (&tag, buf, 4)) return -EFAULT; if (tag != 0) return -EINVAL; buf += 4; length -= 4; kbuf = memdup_user(buf, length); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); spin_lock_irq (&dev->lock); value = -EINVAL; if (dev->buf) { spin_unlock_irq(&dev->lock); kfree(kbuf); return value; } dev->buf = kbuf; /* full or low speed config */ dev->config = (void *) kbuf; total = le16_to_cpu(dev->config->wTotalLength); if (!is_valid_config(dev->config, total) || total > length - USB_DT_DEVICE_SIZE) goto fail; kbuf += total; length -= total; /* optional high speed config */ if (kbuf [1] == USB_DT_CONFIG) { dev->hs_config = (void *) kbuf; total = le16_to_cpu(dev->hs_config->wTotalLength); if (!is_valid_config(dev->hs_config, total) || total > length - USB_DT_DEVICE_SIZE) goto fail; kbuf += total; length -= total; } else { dev->hs_config = NULL; } /* could support multiple configs, using another encoding! */ /* device descriptor (tweaked for paranoia) */ if (length != USB_DT_DEVICE_SIZE) goto fail; dev->dev = (void *)kbuf; if (dev->dev->bLength != USB_DT_DEVICE_SIZE || dev->dev->bDescriptorType != USB_DT_DEVICE || dev->dev->bNumConfigurations != 1) goto fail; dev->dev->bcdUSB = cpu_to_le16 (0x0200); /* triggers gadgetfs_bind(); then we can enumerate. */ spin_unlock_irq (&dev->lock); if (dev->hs_config) gadgetfs_driver.max_speed = USB_SPEED_HIGH; else gadgetfs_driver.max_speed = USB_SPEED_FULL; value = usb_gadget_register_driver(&gadgetfs_driver); if (value != 0) { spin_lock_irq(&dev->lock); goto fail; } else { /* at this point "good" hardware has for the first time * let the USB the host see us. alternatively, if users * unplug/replug that will clear all the error state. * * note: everything running before here was guaranteed * to choke driver model style diagnostics. from here * on, they can work ... except in cleanup paths that * kick in after the ep0 descriptor is closed. */ value = len; dev->gadget_registered = true; } return value; fail: dev->config = NULL; dev->hs_config = NULL; dev->dev = NULL; spin_unlock_irq (&dev->lock); pr_debug ("%s: %s fail %zd, %p\n", shortname, __func__, value, dev); kfree (dev->buf); dev->buf = NULL; return value; } static int gadget_dev_open (struct inode *inode, struct file *fd) { struct dev_data *dev = inode->i_private; int value = -EBUSY; spin_lock_irq(&dev->lock); if (dev->state == STATE_DEV_DISABLED) { dev->ev_next = 0; dev->state = STATE_DEV_OPENED; fd->private_data = dev; get_dev (dev); value = 0; } spin_unlock_irq(&dev->lock); return value; } static const struct file_operations ep0_operations = { .open = gadget_dev_open, .read = ep0_read, .write = dev_config, .fasync = ep0_fasync, .poll = ep0_poll, .unlocked_ioctl = gadget_dev_ioctl, .release = dev_release, }; /*----------------------------------------------------------------------*/ /* FILESYSTEM AND SUPERBLOCK OPERATIONS * * Mounting the filesystem creates a controller file, used first for * device configuration then later for event monitoring. */ /* FIXME PAM etc could set this security policy without mount options * if epfiles inherited ownership and permissons from ep0 ... */ static unsigned default_uid; static unsigned default_gid; static unsigned default_perm = S_IRUSR | S_IWUSR; module_param (default_uid, uint, 0644); module_param (default_gid, uint, 0644); module_param (default_perm, uint, 0644); static struct inode * gadgetfs_make_inode (struct super_block *sb, void *data, const struct file_operations *fops, int mode) { struct inode *inode = new_inode (sb); if (inode) { inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = make_kuid(&init_user_ns, default_uid); inode->i_gid = make_kgid(&init_user_ns, default_gid); simple_inode_init_ts(inode); inode->i_private = data; inode->i_fop = fops; } return inode; } /* creates in fs root directory, so non-renamable and non-linkable. * so inode and dentry are paired, until device reconfig. */ static struct dentry * gadgetfs_create_file (struct super_block *sb, char const *name, void *data, const struct file_operations *fops) { struct dentry *dentry; struct inode *inode; dentry = d_alloc_name(sb->s_root, name); if (!dentry) return NULL; inode = gadgetfs_make_inode (sb, data, fops, S_IFREG | (default_perm & S_IRWXUGO)); if (!inode) { dput(dentry); return NULL; } d_add (dentry, inode); return dentry; } static const struct super_operations gadget_fs_operations = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, }; static int gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) { struct inode *inode; struct dev_data *dev; int rc; mutex_lock(&sb_mutex); if (the_device) { rc = -ESRCH; goto Done; } CHIP = usb_get_gadget_udc_name(); if (!CHIP) { rc = -ENODEV; goto Done; } /* superblock */ sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = GADGETFS_MAGIC; sb->s_op = &gadget_fs_operations; sb->s_time_gran = 1; /* root inode */ inode = gadgetfs_make_inode (sb, NULL, &simple_dir_operations, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) goto Enomem; inode->i_op = &simple_dir_inode_operations; if (!(sb->s_root = d_make_root (inode))) goto Enomem; /* the ep0 file is named after the controller we expect; * user mode code can use it for sanity checks, like we do. */ dev = dev_new (); if (!dev) goto Enomem; dev->sb = sb; dev->dentry = gadgetfs_create_file(sb, CHIP, dev, &ep0_operations); if (!dev->dentry) { put_dev(dev); goto Enomem; } /* other endpoint files are available after hardware setup, * from binding to a controller. */ the_device = dev; rc = 0; goto Done; Enomem: kfree(CHIP); CHIP = NULL; rc = -ENOMEM; Done: mutex_unlock(&sb_mutex); return rc; } /* "mount -t gadgetfs path /dev/gadget" ends up here */ static int gadgetfs_get_tree(struct fs_context *fc) { return get_tree_single(fc, gadgetfs_fill_super); } static const struct fs_context_operations gadgetfs_context_ops = { .get_tree = gadgetfs_get_tree, }; static int gadgetfs_init_fs_context(struct fs_context *fc) { fc->ops = &gadgetfs_context_ops; return 0; } static void gadgetfs_kill_sb (struct super_block *sb) { mutex_lock(&sb_mutex); kill_litter_super (sb); if (the_device) { put_dev (the_device); the_device = NULL; } kfree(CHIP); CHIP = NULL; mutex_unlock(&sb_mutex); } /*----------------------------------------------------------------------*/ static struct file_system_type gadgetfs_type = { .owner = THIS_MODULE, .name = shortname, .init_fs_context = gadgetfs_init_fs_context, .kill_sb = gadgetfs_kill_sb, }; MODULE_ALIAS_FS("gadgetfs"); /*----------------------------------------------------------------------*/ static int __init gadgetfs_init (void) { int status; status = register_filesystem (&gadgetfs_type); if (status == 0) pr_info ("%s: %s, version " DRIVER_VERSION "\n", shortname, driver_desc); return status; } module_init (gadgetfs_init); static void __exit gadgetfs_cleanup (void) { pr_debug ("unregister %s\n", shortname); unregister_filesystem (&gadgetfs_type); } module_exit (gadgetfs_cleanup);
10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * V4L2 controls support header. * * Copyright (C) 2010 Hans Verkuil <hverkuil@xs4all.nl> */ #ifndef _V4L2_CTRLS_H #define _V4L2_CTRLS_H #include <linux/list.h> #include <linux/mutex.h> #include <linux/videodev2.h> #include <media/media-request.h> /* forward references */ struct file; struct poll_table_struct; struct v4l2_ctrl; struct v4l2_ctrl_handler; struct v4l2_ctrl_helper; struct v4l2_fh; struct v4l2_fwnode_device_properties; struct v4l2_subdev; struct v4l2_subscribed_event; struct video_device; /** * union v4l2_ctrl_ptr - A pointer to a control value. * @p_s32: Pointer to a 32-bit signed value. * @p_s64: Pointer to a 64-bit signed value. * @p_u8: Pointer to a 8-bit unsigned value. * @p_u16: Pointer to a 16-bit unsigned value. * @p_u32: Pointer to a 32-bit unsigned value. * @p_char: Pointer to a string. * @p_mpeg2_sequence: Pointer to a MPEG2 sequence structure. * @p_mpeg2_picture: Pointer to a MPEG2 picture structure. * @p_mpeg2_quantisation: Pointer to a MPEG2 quantisation data structure. * @p_fwht_params: Pointer to a FWHT stateless parameters structure. * @p_h264_sps: Pointer to a struct v4l2_ctrl_h264_sps. * @p_h264_pps: Pointer to a struct v4l2_ctrl_h264_pps. * @p_h264_scaling_matrix: Pointer to a struct v4l2_ctrl_h264_scaling_matrix. * @p_h264_slice_params: Pointer to a struct v4l2_ctrl_h264_slice_params. * @p_h264_decode_params: Pointer to a struct v4l2_ctrl_h264_decode_params. * @p_h264_pred_weights: Pointer to a struct v4l2_ctrl_h264_pred_weights. * @p_vp8_frame: Pointer to a VP8 frame params structure. * @p_vp9_compressed_hdr_probs: Pointer to a VP9 frame compressed header probs structure. * @p_vp9_frame: Pointer to a VP9 frame params structure. * @p_hevc_sps: Pointer to an HEVC sequence parameter set structure. * @p_hevc_pps: Pointer to an HEVC picture parameter set structure. * @p_hevc_slice_params: Pointer to an HEVC slice parameters structure. * @p_hdr10_cll: Pointer to an HDR10 Content Light Level structure. * @p_hdr10_mastering: Pointer to an HDR10 Mastering Display structure. * @p_area: Pointer to an area. * @p_av1_sequence: Pointer to an AV1 sequence structure. * @p_av1_tile_group_entry: Pointer to an AV1 tile group entry structure. * @p_av1_frame: Pointer to an AV1 frame structure. * @p_av1_film_grain: Pointer to an AV1 film grain structure. * @p_rect: Pointer to a rectangle. * @p: Pointer to a compound value. * @p_const: Pointer to a constant compound value. */ union v4l2_ctrl_ptr { s32 *p_s32; s64 *p_s64; u8 *p_u8; u16 *p_u16; u32 *p_u32; char *p_char; struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence; struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture; struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quantisation; struct v4l2_ctrl_fwht_params *p_fwht_params; struct v4l2_ctrl_h264_sps *p_h264_sps; struct v4l2_ctrl_h264_pps *p_h264_pps; struct v4l2_ctrl_h264_scaling_matrix *p_h264_scaling_matrix; struct v4l2_ctrl_h264_slice_params *p_h264_slice_params; struct v4l2_ctrl_h264_decode_params *p_h264_decode_params; struct v4l2_ctrl_h264_pred_weights *p_h264_pred_weights; struct v4l2_ctrl_vp8_frame *p_vp8_frame; struct v4l2_ctrl_hevc_sps *p_hevc_sps; struct v4l2_ctrl_hevc_pps *p_hevc_pps; struct v4l2_ctrl_hevc_slice_params *p_hevc_slice_params; struct v4l2_ctrl_vp9_compressed_hdr *p_vp9_compressed_hdr_probs; struct v4l2_ctrl_vp9_frame *p_vp9_frame; struct v4l2_ctrl_hdr10_cll_info *p_hdr10_cll; struct v4l2_ctrl_hdr10_mastering_display *p_hdr10_mastering; struct v4l2_area *p_area; struct v4l2_ctrl_av1_sequence *p_av1_sequence; struct v4l2_ctrl_av1_tile_group_entry *p_av1_tile_group_entry; struct v4l2_ctrl_av1_frame *p_av1_frame; struct v4l2_ctrl_av1_film_grain *p_av1_film_grain; struct v4l2_rect *p_rect; void *p; const void *p_const; }; /** * v4l2_ctrl_ptr_create() - Helper function to return a v4l2_ctrl_ptr from a * void pointer * @ptr: The void pointer */ static inline union v4l2_ctrl_ptr v4l2_ctrl_ptr_create(void *ptr) { union v4l2_ctrl_ptr p = { .p = ptr }; return p; } /** * struct v4l2_ctrl_ops - The control operations that the driver has to provide. * * @g_volatile_ctrl: Get a new value for this control. Generally only relevant * for volatile (and usually read-only) controls such as a control * that returns the current signal strength which changes * continuously. * If not set, then the currently cached value will be returned. * @try_ctrl: Test whether the control's value is valid. Only relevant when * the usual min/max/step checks are not sufficient. * @s_ctrl: Actually set the new control value. s_ctrl is compulsory. The * ctrl->handler->lock is held when these ops are called, so no * one else can access controls owned by that handler. */ struct v4l2_ctrl_ops { int (*g_volatile_ctrl)(struct v4l2_ctrl *ctrl); int (*try_ctrl)(struct v4l2_ctrl *ctrl); int (*s_ctrl)(struct v4l2_ctrl *ctrl); }; /** * struct v4l2_ctrl_type_ops - The control type operations that the driver * has to provide. * * @equal: return true if all ctrl->elems array elements are equal. * @init: initialize the value for array elements from from_idx to ctrl->elems. * @minimum: set the value to the minimum value of the control. * @maximum: set the value to the maximum value of the control. * @log: log the value. * @validate: validate the value for ctrl->new_elems array elements. * Return 0 on success and a negative value otherwise. */ struct v4l2_ctrl_type_ops { bool (*equal)(const struct v4l2_ctrl *ctrl, union v4l2_ctrl_ptr ptr1, union v4l2_ctrl_ptr ptr2); void (*init)(const struct v4l2_ctrl *ctrl, u32 from_idx, union v4l2_ctrl_ptr ptr); void (*minimum)(const struct v4l2_ctrl *ctrl, u32 idx, union v4l2_ctrl_ptr ptr); void (*maximum)(const struct v4l2_ctrl *ctrl, u32 idx, union v4l2_ctrl_ptr ptr); void (*log)(const struct v4l2_ctrl *ctrl); int (*validate)(const struct v4l2_ctrl *ctrl, union v4l2_ctrl_ptr ptr); }; /** * typedef v4l2_ctrl_notify_fnc - typedef for a notify argument with a function * that should be called when a control value has changed. * * @ctrl: pointer to struct &v4l2_ctrl * @priv: control private data * * This typedef definition is used as an argument to v4l2_ctrl_notify() * and as an argument at struct &v4l2_ctrl_handler. */ typedef void (*v4l2_ctrl_notify_fnc)(struct v4l2_ctrl *ctrl, void *priv); /** * struct v4l2_ctrl - The control structure. * * @node: The list node. * @ev_subs: The list of control event subscriptions. * @handler: The handler that owns the control. * @cluster: Point to start of cluster array. * @ncontrols: Number of controls in cluster array. * @done: Internal flag: set for each processed control. * @is_new: Set when the user specified a new value for this control. It * is also set when called from v4l2_ctrl_handler_setup(). Drivers * should never set this flag. * @has_changed: Set when the current value differs from the new value. Drivers * should never use this flag. * @is_private: If set, then this control is private to its handler and it * will not be added to any other handlers. Drivers can set * this flag. * @is_auto: If set, then this control selects whether the other cluster * members are in 'automatic' mode or 'manual' mode. This is * used for autogain/gain type clusters. Drivers should never * set this flag directly. * @is_int: If set, then this control has a simple integer value (i.e. it * uses ctrl->val). * @is_string: If set, then this control has type %V4L2_CTRL_TYPE_STRING. * @is_ptr: If set, then this control is an array and/or has type >= * %V4L2_CTRL_COMPOUND_TYPES * and/or has type %V4L2_CTRL_TYPE_STRING. In other words, &struct * v4l2_ext_control uses field p to point to the data. * @is_array: If set, then this control contains an N-dimensional array. * @is_dyn_array: If set, then this control contains a dynamically sized 1-dimensional array. * If this is set, then @is_array is also set. * @has_volatiles: If set, then one or more members of the cluster are volatile. * Drivers should never touch this flag. * @call_notify: If set, then call the handler's notify function whenever the * control's value changes. * @manual_mode_value: If the is_auto flag is set, then this is the value * of the auto control that determines if that control is in * manual mode. So if the value of the auto control equals this * value, then the whole cluster is in manual mode. Drivers should * never set this flag directly. * @ops: The control ops. * @type_ops: The control type ops. * @id: The control ID. * @name: The control name. * @type: The control type. * @minimum: The control's minimum value. * @maximum: The control's maximum value. * @default_value: The control's default value. * @step: The control's step value for non-menu controls. * @elems: The number of elements in the N-dimensional array. * @elem_size: The size in bytes of the control. * @new_elems: The number of elements in p_new. This is the same as @elems, * except for dynamic arrays. In that case it is in the range of * 1 to @p_array_alloc_elems. * @dims: The size of each dimension. * @nr_of_dims:The number of dimensions in @dims. * @menu_skip_mask: The control's skip mask for menu controls. This makes it * easy to skip menu items that are not valid. If bit X is set, * then menu item X is skipped. Of course, this only works for * menus with <= 32 menu items. There are no menus that come * close to that number, so this is OK. Should we ever need more, * then this will have to be extended to a u64 or a bit array. * @qmenu: A const char * array for all menu items. Array entries that are * empty strings ("") correspond to non-existing menu items (this * is in addition to the menu_skip_mask above). The last entry * must be NULL. * Used only if the @type is %V4L2_CTRL_TYPE_MENU. * @qmenu_int: A 64-bit integer array for with integer menu items. * The size of array must be equal to the menu size, e. g.: * :math:`ceil(\frac{maximum - minimum}{step}) + 1`. * Used only if the @type is %V4L2_CTRL_TYPE_INTEGER_MENU. * @flags: The control's flags. * @priv: The control's private pointer. For use by the driver. It is * untouched by the control framework. Note that this pointer is * not freed when the control is deleted. Should this be needed * then a new internal bitfield can be added to tell the framework * to free this pointer. * @p_array: Pointer to the allocated array. Only valid if @is_array is true. * @p_array_alloc_elems: The number of elements in the allocated * array for both the cur and new values. So @p_array is actually * sized for 2 * @p_array_alloc_elems * @elem_size. Only valid if * @is_array is true. * @cur: Structure to store the current value. * @cur.val: The control's current value, if the @type is represented via * a u32 integer (see &enum v4l2_ctrl_type). * @val: The control's new s32 value. * @p_def: The control's default value represented via a union which * provides a standard way of accessing control types * through a pointer (for compound controls only). * @p_min: The control's minimum value represented via a union which * provides a standard way of accessing control types * through a pointer (for compound controls only). * @p_max: The control's maximum value represented via a union which * provides a standard way of accessing control types * through a pointer (for compound controls only). * @p_cur: The control's current value represented via a union which * provides a standard way of accessing control types * through a pointer. * @p_new: The control's new value represented via a union which provides * a standard way of accessing control types * through a pointer. */ struct v4l2_ctrl { /* Administrative fields */ struct list_head node; struct list_head ev_subs; struct v4l2_ctrl_handler *handler; struct v4l2_ctrl **cluster; unsigned int ncontrols; unsigned int done:1; unsigned int is_new:1; unsigned int has_changed:1; unsigned int is_private:1; unsigned int is_auto:1; unsigned int is_int:1; unsigned int is_string:1; unsigned int is_ptr:1; unsigned int is_array:1; unsigned int is_dyn_array:1; unsigned int has_volatiles:1; unsigned int call_notify:1; unsigned int manual_mode_value:8; const struct v4l2_ctrl_ops *ops; const struct v4l2_ctrl_type_ops *type_ops; u32 id; const char *name; enum v4l2_ctrl_type type; s64 minimum, maximum, default_value; u32 elems; u32 elem_size; u32 new_elems; u32 dims[V4L2_CTRL_MAX_DIMS]; u32 nr_of_dims; union { u64 step; u64 menu_skip_mask; }; union { const char * const *qmenu; const s64 *qmenu_int; }; unsigned long flags; void *priv; void *p_array; u32 p_array_alloc_elems; s32 val; struct { s32 val; } cur; union v4l2_ctrl_ptr p_def; union v4l2_ctrl_ptr p_min; union v4l2_ctrl_ptr p_max; union v4l2_ctrl_ptr p_new; union v4l2_ctrl_ptr p_cur; }; /** * struct v4l2_ctrl_ref - The control reference. * * @node: List node for the sorted list. * @next: Single-link list node for the hash. * @ctrl: The actual control information. * @helper: Pointer to helper struct. Used internally in * ``prepare_ext_ctrls`` function at ``v4l2-ctrl.c``. * @from_other_dev: If true, then @ctrl was defined in another * device than the &struct v4l2_ctrl_handler. * @req_done: Internal flag: if the control handler containing this control * reference is bound to a media request, then this is set when * the control has been applied. This prevents applying controls * from a cluster with multiple controls twice (when the first * control of a cluster is applied, they all are). * @p_req_valid: If set, then p_req contains the control value for the request. * @p_req_array_enomem: If set, then p_req is invalid since allocating space for * an array failed. Attempting to read this value shall * result in ENOMEM. Only valid if ctrl->is_array is true. * @p_req_array_alloc_elems: The number of elements allocated for the * array. Only valid if @p_req_valid and ctrl->is_array are * true. * @p_req_elems: The number of elements in @p_req. This is the same as * ctrl->elems, except for dynamic arrays. In that case it is in * the range of 1 to @p_req_array_alloc_elems. Only valid if * @p_req_valid is true. * @p_req: If the control handler containing this control reference * is bound to a media request, then this points to the * value of the control that must be applied when the request * is executed, or to the value of the control at the time * that the request was completed. If @p_req_valid is false, * then this control was never set for this request and the * control will not be updated when this request is applied. * * Each control handler has a list of these refs. The list_head is used to * keep a sorted-by-control-ID list of all controls, while the next pointer * is used to link the control in the hash's bucket. */ struct v4l2_ctrl_ref { struct list_head node; struct v4l2_ctrl_ref *next; struct v4l2_ctrl *ctrl; struct v4l2_ctrl_helper *helper; bool from_other_dev; bool req_done; bool p_req_valid; bool p_req_array_enomem; u32 p_req_array_alloc_elems; u32 p_req_elems; union v4l2_ctrl_ptr p_req; }; /** * struct v4l2_ctrl_handler - The control handler keeps track of all the * controls: both the controls owned by the handler and those inherited * from other handlers. * * @_lock: Default for "lock". * @lock: Lock to control access to this handler and its controls. * May be replaced by the user right after init. * @ctrls: The list of controls owned by this handler. * @ctrl_refs: The list of control references. * @cached: The last found control reference. It is common that the same * control is needed multiple times, so this is a simple * optimization. * @buckets: Buckets for the hashing. Allows for quick control lookup. * @notify: A notify callback that is called whenever the control changes * value. * Note that the handler's lock is held when the notify function * is called! * @notify_priv: Passed as argument to the v4l2_ctrl notify callback. * @nr_of_buckets: Total number of buckets in the array. * @error: The error code of the first failed control addition. * @request_is_queued: True if the request was queued. * @requests: List to keep track of open control handler request objects. * For the parent control handler (@req_obj.ops == NULL) this * is the list header. When the parent control handler is * removed, it has to unbind and put all these requests since * they refer to the parent. * @requests_queued: List of the queued requests. This determines the order * in which these controls are applied. Once the request is * completed it is removed from this list. * @req_obj: The &struct media_request_object, used to link into a * &struct media_request. This request object has a refcount. */ struct v4l2_ctrl_handler { struct mutex _lock; struct mutex *lock; struct list_head ctrls; struct list_head ctrl_refs; struct v4l2_ctrl_ref *cached; struct v4l2_ctrl_ref **buckets; v4l2_ctrl_notify_fnc notify; void *notify_priv; u16 nr_of_buckets; int error; bool request_is_queued; struct list_head requests; struct list_head requests_queued; struct media_request_object req_obj; }; /** * struct v4l2_ctrl_config - Control configuration structure. * * @ops: The control ops. * @type_ops: The control type ops. Only needed for compound controls. * @id: The control ID. * @name: The control name. * @type: The control type. * @min: The control's minimum value. * @max: The control's maximum value. * @step: The control's step value for non-menu controls. * @def: The control's default value. * @p_def: The control's default value for compound controls. * @p_min: The control's minimum value for compound controls. * @p_max: The control's maximum value for compound controls. * @dims: The size of each dimension. * @elem_size: The size in bytes of the control. * @flags: The control's flags. * @menu_skip_mask: The control's skip mask for menu controls. This makes it * easy to skip menu items that are not valid. If bit X is set, * then menu item X is skipped. Of course, this only works for * menus with <= 64 menu items. There are no menus that come * close to that number, so this is OK. Should we ever need more, * then this will have to be extended to a bit array. * @qmenu: A const char * array for all menu items. Array entries that are * empty strings ("") correspond to non-existing menu items (this * is in addition to the menu_skip_mask above). The last entry * must be NULL. * @qmenu_int: A const s64 integer array for all menu items of the type * V4L2_CTRL_TYPE_INTEGER_MENU. * @is_private: If set, then this control is private to its handler and it * will not be added to any other handlers. */ struct v4l2_ctrl_config { const struct v4l2_ctrl_ops *ops; const struct v4l2_ctrl_type_ops *type_ops; u32 id; const char *name; enum v4l2_ctrl_type type; s64 min; s64 max; u64 step; s64 def; union v4l2_ctrl_ptr p_def; union v4l2_ctrl_ptr p_min; union v4l2_ctrl_ptr p_max; u32 dims[V4L2_CTRL_MAX_DIMS]; u32 elem_size; u32 flags; u64 menu_skip_mask; const char * const *qmenu; const s64 *qmenu_int; unsigned int is_private:1; }; /** * v4l2_ctrl_fill - Fill in the control fields based on the control ID. * * @id: ID of the control * @name: pointer to be filled with a string with the name of the control * @type: pointer for storing the type of the control * @min: pointer for storing the minimum value for the control * @max: pointer for storing the maximum value for the control * @step: pointer for storing the control step * @def: pointer for storing the default value for the control * @flags: pointer for storing the flags to be used on the control * * This works for all standard V4L2 controls. * For non-standard controls it will only fill in the given arguments * and @name content will be set to %NULL. * * This function will overwrite the contents of @name, @type and @flags. * The contents of @min, @max, @step and @def may be modified depending on * the type. * * .. note:: * * Do not use in drivers! It is used internally for backwards compatibility * control handling only. Once all drivers are converted to use the new * control framework this function will no longer be exported. */ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, s64 *min, s64 *max, u64 *step, s64 *def, u32 *flags); /** * v4l2_ctrl_handler_init_class() - Initialize the control handler. * @hdl: The control handler. * @nr_of_controls_hint: A hint of how many controls this handler is * expected to refer to. This is the total number, so including * any inherited controls. It doesn't have to be precise, but if * it is way off, then you either waste memory (too many buckets * are allocated) or the control lookup becomes slower (not enough * buckets are allocated, so there are more slow list lookups). * It will always work, though. * @key: Used by the lock validator if CONFIG_LOCKDEP is set. * @name: Used by the lock validator if CONFIG_LOCKDEP is set. * * .. attention:: * * Never use this call directly, always use the v4l2_ctrl_handler_init() * macro that hides the @key and @name arguments. * * Return: returns an error if the buckets could not be allocated. This * error will also be stored in @hdl->error. */ int v4l2_ctrl_handler_init_class(struct v4l2_ctrl_handler *hdl, unsigned int nr_of_controls_hint, struct lock_class_key *key, const char *name); #ifdef CONFIG_LOCKDEP /** * v4l2_ctrl_handler_init - helper function to create a static struct * &lock_class_key and calls v4l2_ctrl_handler_init_class() * * @hdl: The control handler. * @nr_of_controls_hint: A hint of how many controls this handler is * expected to refer to. This is the total number, so including * any inherited controls. It doesn't have to be precise, but if * it is way off, then you either waste memory (too many buckets * are allocated) or the control lookup becomes slower (not enough * buckets are allocated, so there are more slow list lookups). * It will always work, though. * * This helper function creates a static struct &lock_class_key and * calls v4l2_ctrl_handler_init_class(), providing a proper name for the lock * validador. * * Use this helper function to initialize a control handler. */ #define v4l2_ctrl_handler_init(hdl, nr_of_controls_hint) \ ( \ ({ \ static struct lock_class_key _key; \ v4l2_ctrl_handler_init_class(hdl, nr_of_controls_hint, \ &_key, \ KBUILD_BASENAME ":" \ __stringify(__LINE__) ":" \ "(" #hdl ")->_lock"); \ }) \ ) #else #define v4l2_ctrl_handler_init(hdl, nr_of_controls_hint) \ v4l2_ctrl_handler_init_class(hdl, nr_of_controls_hint, NULL, NULL) #endif /** * v4l2_ctrl_handler_free() - Free all controls owned by the handler and free * the control list. * @hdl: The control handler. * * Does nothing if @hdl == NULL. */ void v4l2_ctrl_handler_free(struct v4l2_ctrl_handler *hdl); /** * v4l2_ctrl_lock() - Helper function to lock the handler * associated with the control. * @ctrl: The control to lock. */ static inline void v4l2_ctrl_lock(struct v4l2_ctrl *ctrl) { mutex_lock(ctrl->handler->lock); } /** * v4l2_ctrl_unlock() - Helper function to unlock the handler * associated with the control. * @ctrl: The control to unlock. */ static inline void v4l2_ctrl_unlock(struct v4l2_ctrl *ctrl) { mutex_unlock(ctrl->handler->lock); } /** * __v4l2_ctrl_handler_setup() - Call the s_ctrl op for all controls belonging * to the handler to initialize the hardware to the current control values. The * caller is responsible for acquiring the control handler mutex on behalf of * __v4l2_ctrl_handler_setup(). * @hdl: The control handler. * * Button controls will be skipped, as are read-only controls. * * If @hdl == NULL, then this just returns 0. */ int __v4l2_ctrl_handler_setup(struct v4l2_ctrl_handler *hdl); /** * v4l2_ctrl_handler_setup() - Call the s_ctrl op for all controls belonging * to the handler to initialize the hardware to the current control values. * @hdl: The control handler. * * Button controls will be skipped, as are read-only controls. * * If @hdl == NULL, then this just returns 0. */ int v4l2_ctrl_handler_setup(struct v4l2_ctrl_handler *hdl); /** * v4l2_ctrl_handler_log_status() - Log all controls owned by the handler. * @hdl: The control handler. * @prefix: The prefix to use when logging the control values. If the * prefix does not end with a space, then ": " will be added * after the prefix. If @prefix == NULL, then no prefix will be * used. * * For use with VIDIOC_LOG_STATUS. * * Does nothing if @hdl == NULL. */ void v4l2_ctrl_handler_log_status(struct v4l2_ctrl_handler *hdl, const char *prefix); /** * v4l2_ctrl_new_custom() - Allocate and initialize a new custom V4L2 * control. * * @hdl: The control handler. * @cfg: The control's configuration data. * @priv: The control's driver-specific private data. * * If the &v4l2_ctrl struct could not be allocated then NULL is returned * and @hdl->error is set to the error code (if it wasn't set already). */ struct v4l2_ctrl *v4l2_ctrl_new_custom(struct v4l2_ctrl_handler *hdl, const struct v4l2_ctrl_config *cfg, void *priv); /** * v4l2_ctrl_new_std() - Allocate and initialize a new standard V4L2 non-menu * control. * * @hdl: The control handler. * @ops: The control ops. * @id: The control ID. * @min: The control's minimum value. * @max: The control's maximum value. * @step: The control's step value * @def: The control's default value. * * If the &v4l2_ctrl struct could not be allocated, or the control * ID is not known, then NULL is returned and @hdl->error is set to the * appropriate error code (if it wasn't set already). * * If @id refers to a menu control, then this function will return NULL. * * Use v4l2_ctrl_new_std_menu() when adding menu controls. */ struct v4l2_ctrl *v4l2_ctrl_new_std(struct v4l2_ctrl_handler *hdl, const struct v4l2_ctrl_ops *ops, u32 id, s64 min, s64 max, u64 step, s64 def); /** * v4l2_ctrl_new_std_menu() - Allocate and initialize a new standard V4L2 * menu control. * * @hdl: The control handler. * @ops: The control ops. * @id: The control ID. * @max: The control's maximum value. * @mask: The control's skip mask for menu controls. This makes it * easy to skip menu items that are not valid. If bit X is set, * then menu item X is skipped. Of course, this only works for * menus with <= 64 menu items. There are no menus that come * close to that number, so this is OK. Should we ever need more, * then this will have to be extended to a bit array. * @def: The control's default value. * * Same as v4l2_ctrl_new_std(), but @min is set to 0 and the @mask value * determines which menu items are to be skipped. * * If @id refers to a non-menu control, then this function will return NULL. */ struct v4l2_ctrl *v4l2_ctrl_new_std_menu(struct v4l2_ctrl_handler *hdl, const struct v4l2_ctrl_ops *ops, u32 id, u8 max, u64 mask, u8 def); /** * v4l2_ctrl_new_std_menu_items() - Create a new standard V4L2 menu control * with driver specific menu. * * @hdl: The control handler. * @ops: The control ops. * @id: The control ID. * @max: The control's maximum value. * @mask: The control's skip mask for menu controls. This makes it * easy to skip menu items that are not valid. If bit X is set, * then menu item X is skipped. Of course, this only works for * menus with <= 64 menu items. There are no menus that come * close to that number, so this is OK. Should we ever need more, * then this will have to be extended to a bit array. * @def: The control's default value. * @qmenu: The new menu. * * Same as v4l2_ctrl_new_std_menu(), but @qmenu will be the driver specific * menu of this control. * */ struct v4l2_ctrl *v4l2_ctrl_new_std_menu_items(struct v4l2_ctrl_handler *hdl, const struct v4l2_ctrl_ops *ops, u32 id, u8 max, u64 mask, u8 def, const char * const *qmenu); /** * v4l2_ctrl_new_std_compound() - Allocate and initialize a new standard V4L2 * compound control. * * @hdl: The control handler. * @ops: The control ops. * @id: The control ID. * @p_def: The control's default value. * @p_min: The control's minimum value. * @p_max: The control's maximum value. * * Same as v4l2_ctrl_new_std(), but with support for compound controls. * To fill in the @p_def, @p_min and @p_max fields, use v4l2_ctrl_ptr_create() * to convert a pointer to a const union v4l2_ctrl_ptr. * Use v4l2_ctrl_ptr_create(NULL) if you want the default, minimum or maximum * value of the compound control to be all zeroes. * If the compound control does not set the ``V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX`` * flag, then it does not has minimum and maximum values. In that case just use * v4l2_ctrl_ptr_create(NULL) for the @p_min and @p_max arguments. * */ struct v4l2_ctrl *v4l2_ctrl_new_std_compound(struct v4l2_ctrl_handler *hdl, const struct v4l2_ctrl_ops *ops, u32 id, const union v4l2_ctrl_ptr p_def, const union v4l2_ctrl_ptr p_min, const union v4l2_ctrl_ptr p_max); /** * v4l2_ctrl_new_int_menu() - Create a new standard V4L2 integer menu control. * * @hdl: The control handler. * @ops: The control ops. * @id: The control ID. * @max: The control's maximum value. * @def: The control's default value. * @qmenu_int: The control's menu entries. * * Same as v4l2_ctrl_new_std_menu(), but @mask is set to 0 and it additionally * takes as an argument an array of integers determining the menu items. * * If @id refers to a non-integer-menu control, then this function will * return %NULL. */ struct v4l2_ctrl *v4l2_ctrl_new_int_menu(struct v4l2_ctrl_handler *hdl, const struct v4l2_ctrl_ops *ops, u32 id, u8 max, u8 def, const s64 *qmenu_int); /** * typedef v4l2_ctrl_filter - Typedef to define the filter function to be * used when adding a control handler. * * @ctrl: pointer to struct &v4l2_ctrl. */ typedef bool (*v4l2_ctrl_filter)(const struct v4l2_ctrl *ctrl); /** * v4l2_ctrl_add_handler() - Add all controls from handler @add to * handler @hdl. * * @hdl: The control handler. * @add: The control handler whose controls you want to add to * the @hdl control handler. * @filter: This function will filter which controls should be added. * @from_other_dev: If true, then the controls in @add were defined in another * device than @hdl. * * Does nothing if either of the two handlers is a NULL pointer. * If @filter is NULL, then all controls are added. Otherwise only those * controls for which @filter returns true will be added. * In case of an error @hdl->error will be set to the error code (if it * wasn't set already). */ int v4l2_ctrl_add_handler(struct v4l2_ctrl_handler *hdl, struct v4l2_ctrl_handler *add, v4l2_ctrl_filter filter, bool from_other_dev); /** * v4l2_ctrl_radio_filter() - Standard filter for radio controls. * * @ctrl: The control that is filtered. * * This will return true for any controls that are valid for radio device * nodes. Those are all of the V4L2_CID_AUDIO_* user controls and all FM * transmitter class controls. * * This function is to be used with v4l2_ctrl_add_handler(). */ bool v4l2_ctrl_radio_filter(const struct v4l2_ctrl *ctrl); /** * v4l2_ctrl_cluster() - Mark all controls in the cluster as belonging * to that cluster. * * @ncontrols: The number of controls in this cluster. * @controls: The cluster control array of size @ncontrols. */ void v4l2_ctrl_cluster(unsigned int ncontrols, struct v4l2_ctrl **controls); /** * v4l2_ctrl_auto_cluster() - Mark all controls in the cluster as belonging * to that cluster and set it up for autofoo/foo-type handling. * * @ncontrols: The number of controls in this cluster. * @controls: The cluster control array of size @ncontrols. The first control * must be the 'auto' control (e.g. autogain, autoexposure, etc.) * @manual_val: The value for the first control in the cluster that equals the * manual setting. * @set_volatile: If true, then all controls except the first auto control will * be volatile. * * Use for control groups where one control selects some automatic feature and * the other controls are only active whenever the automatic feature is turned * off (manual mode). Typical examples: autogain vs gain, auto-whitebalance vs * red and blue balance, etc. * * The behavior of such controls is as follows: * * When the autofoo control is set to automatic, then any manual controls * are set to inactive and any reads will call g_volatile_ctrl (if the control * was marked volatile). * * When the autofoo control is set to manual, then any manual controls will * be marked active, and any reads will just return the current value without * going through g_volatile_ctrl. * * In addition, this function will set the %V4L2_CTRL_FLAG_UPDATE flag * on the autofoo control and %V4L2_CTRL_FLAG_INACTIVE on the foo control(s) * if autofoo is in auto mode. */ void v4l2_ctrl_auto_cluster(unsigned int ncontrols, struct v4l2_ctrl **controls, u8 manual_val, bool set_volatile); /** * v4l2_ctrl_find() - Find a control with the given ID. * * @hdl: The control handler. * @id: The control ID to find. * * If @hdl == NULL this will return NULL as well. Will lock the handler so * do not use from inside &v4l2_ctrl_ops. */ struct v4l2_ctrl *v4l2_ctrl_find(struct v4l2_ctrl_handler *hdl, u32 id); /** * v4l2_ctrl_activate() - Make the control active or inactive. * @ctrl: The control to (de)activate. * @active: True if the control should become active. * * This sets or clears the V4L2_CTRL_FLAG_INACTIVE flag atomically. * Does nothing if @ctrl == NULL. * This will usually be called from within the s_ctrl op. * The V4L2_EVENT_CTRL event will be generated afterwards. * * This function assumes that the control handler is locked. */ void v4l2_ctrl_activate(struct v4l2_ctrl *ctrl, bool active); /** * __v4l2_ctrl_grab() - Unlocked variant of v4l2_ctrl_grab. * * @ctrl: The control to (de)activate. * @grabbed: True if the control should become grabbed. * * This sets or clears the V4L2_CTRL_FLAG_GRABBED flag atomically. * Does nothing if @ctrl == NULL. * The V4L2_EVENT_CTRL event will be generated afterwards. * This will usually be called when starting or stopping streaming in the * driver. * * This function assumes that the control handler is locked by the caller. */ void __v4l2_ctrl_grab(struct v4l2_ctrl *ctrl, bool grabbed); /** * v4l2_ctrl_grab() - Mark the control as grabbed or not grabbed. * * @ctrl: The control to (de)activate. * @grabbed: True if the control should become grabbed. * * This sets or clears the V4L2_CTRL_FLAG_GRABBED flag atomically. * Does nothing if @ctrl == NULL. * The V4L2_EVENT_CTRL event will be generated afterwards. * This will usually be called when starting or stopping streaming in the * driver. * * This function assumes that the control handler is not locked and will * take the lock itself. */ static inline void v4l2_ctrl_grab(struct v4l2_ctrl *ctrl, bool grabbed) { if (!ctrl) return; v4l2_ctrl_lock(ctrl); __v4l2_ctrl_grab(ctrl, grabbed); v4l2_ctrl_unlock(ctrl); } /** *__v4l2_ctrl_modify_range() - Unlocked variant of v4l2_ctrl_modify_range() * * @ctrl: The control to update. * @min: The control's minimum value. * @max: The control's maximum value. * @step: The control's step value * @def: The control's default value. * * Update the range of a control on the fly. This works for control types * INTEGER, BOOLEAN, MENU, INTEGER MENU and BITMASK. For menu controls the * @step value is interpreted as a menu_skip_mask. * * An error is returned if one of the range arguments is invalid for this * control type. * * The caller is responsible for acquiring the control handler mutex on behalf * of __v4l2_ctrl_modify_range(). */ int __v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl, s64 min, s64 max, u64 step, s64 def); /** * v4l2_ctrl_modify_range() - Update the range of a control. * * @ctrl: The control to update. * @min: The control's minimum value. * @max: The control's maximum value. * @step: The control's step value * @def: The control's default value. * * Update the range of a control on the fly. This works for control types * INTEGER, BOOLEAN, MENU, INTEGER MENU and BITMASK. For menu controls the * @step value is interpreted as a menu_skip_mask. * * An error is returned if one of the range arguments is invalid for this * control type. * * This function assumes that the control handler is not locked and will * take the lock itself. */ static inline int v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl, s64 min, s64 max, u64 step, s64 def) { int rval; v4l2_ctrl_lock(ctrl); rval = __v4l2_ctrl_modify_range(ctrl, min, max, step, def); v4l2_ctrl_unlock(ctrl); return rval; } /** *__v4l2_ctrl_modify_dimensions() - Unlocked variant of v4l2_ctrl_modify_dimensions() * * @ctrl: The control to update. * @dims: The control's new dimensions. * * Update the dimensions of an array control on the fly. The elements of the * array are reset to their default value, even if the dimensions are * unchanged. * * An error is returned if @dims is invalid for this control. * * The caller is responsible for acquiring the control handler mutex on behalf * of __v4l2_ctrl_modify_dimensions(). * * Note: calling this function when the same control is used in pending requests * is untested. It should work (a request with the wrong size of the control * will drop that control silently), but it will be very confusing. */ int __v4l2_ctrl_modify_dimensions(struct v4l2_ctrl *ctrl, u32 dims[V4L2_CTRL_MAX_DIMS]); /** * v4l2_ctrl_modify_dimensions() - Update the dimensions of an array control. * * @ctrl: The control to update. * @dims: The control's new dimensions. * * Update the dimensions of an array control on the fly. The elements of the * array are reset to their default value, even if the dimensions are * unchanged. * * An error is returned if @dims is invalid for this control type. * * This function assumes that the control handler is not locked and will * take the lock itself. * * Note: calling this function when the same control is used in pending requests * is untested. It should work (a request with the wrong size of the control * will drop that control silently), but it will be very confusing. */ static inline int v4l2_ctrl_modify_dimensions(struct v4l2_ctrl *ctrl, u32 dims[V4L2_CTRL_MAX_DIMS]) { int rval; v4l2_ctrl_lock(ctrl); rval = __v4l2_ctrl_modify_dimensions(ctrl, dims); v4l2_ctrl_unlock(ctrl); return rval; } /** * v4l2_ctrl_notify() - Function to set a notify callback for a control. * * @ctrl: The control. * @notify: The callback function. * @priv: The callback private handle, passed as argument to the callback. * * This function sets a callback function for the control. If @ctrl is NULL, * then it will do nothing. If @notify is NULL, then the notify callback will * be removed. * * There can be only one notify. If another already exists, then a WARN_ON * will be issued and the function will do nothing. */ void v4l2_ctrl_notify(struct v4l2_ctrl *ctrl, v4l2_ctrl_notify_fnc notify, void *priv); /** * v4l2_ctrl_get_name() - Get the name of the control * * @id: The control ID. * * This function returns the name of the given control ID or NULL if it isn't * a known control. */ const char *v4l2_ctrl_get_name(u32 id); /** * v4l2_ctrl_get_menu() - Get the menu string array of the control * * @id: The control ID. * * This function returns the NULL-terminated menu string array name of the * given control ID or NULL if it isn't a known menu control. */ const char * const *v4l2_ctrl_get_menu(u32 id); /** * v4l2_ctrl_get_int_menu() - Get the integer menu array of the control * * @id: The control ID. * @len: The size of the integer array. * * This function returns the integer array of the given control ID or NULL if it * if it isn't a known integer menu control. */ const s64 *v4l2_ctrl_get_int_menu(u32 id, u32 *len); /** * v4l2_ctrl_g_ctrl() - Helper function to get the control's value from * within a driver. * * @ctrl: The control. * * This returns the control's value safely by going through the control * framework. This function will lock the control's handler, so it cannot be * used from within the &v4l2_ctrl_ops functions. * * This function is for integer type controls only. */ s32 v4l2_ctrl_g_ctrl(struct v4l2_ctrl *ctrl); /** * __v4l2_ctrl_s_ctrl() - Unlocked variant of v4l2_ctrl_s_ctrl(). * * @ctrl: The control. * @val: The new value. * * This sets the control's new value safely by going through the control * framework. This function assumes the control's handler is already locked, * allowing it to be used from within the &v4l2_ctrl_ops functions. * * This function is for integer type controls only. */ int __v4l2_ctrl_s_ctrl(struct v4l2_ctrl *ctrl, s32 val); /** * v4l2_ctrl_s_ctrl() - Helper function to set the control's value from * within a driver. * @ctrl: The control. * @val: The new value. * * This sets the control's new value safely by going through the control * framework. This function will lock the control's handler, so it cannot be * used from within the &v4l2_ctrl_ops functions. * * This function is for integer type controls only. */ static inline int v4l2_ctrl_s_ctrl(struct v4l2_ctrl *ctrl, s32 val) { int rval; v4l2_ctrl_lock(ctrl); rval = __v4l2_ctrl_s_ctrl(ctrl, val); v4l2_ctrl_unlock(ctrl); return rval; } /** * v4l2_ctrl_g_ctrl_int64() - Helper function to get a 64-bit control's value * from within a driver. * * @ctrl: The control. * * This returns the control's value safely by going through the control * framework. This function will lock the control's handler, so it cannot be * used from within the &v4l2_ctrl_ops functions. * * This function is for 64-bit integer type controls only. */ s64 v4l2_ctrl_g_ctrl_int64(struct v4l2_ctrl *ctrl); /** * __v4l2_ctrl_s_ctrl_int64() - Unlocked variant of v4l2_ctrl_s_ctrl_int64(). * * @ctrl: The control. * @val: The new value. * * This sets the control's new value safely by going through the control * framework. This function assumes the control's handler is already locked, * allowing it to be used from within the &v4l2_ctrl_ops functions. * * This function is for 64-bit integer type controls only. */ int __v4l2_ctrl_s_ctrl_int64(struct v4l2_ctrl *ctrl, s64 val); /** * v4l2_ctrl_s_ctrl_int64() - Helper function to set a 64-bit control's value * from within a driver. * * @ctrl: The control. * @val: The new value. * * This sets the control's new value safely by going through the control * framework. This function will lock the control's handler, so it cannot be * used from within the &v4l2_ctrl_ops functions. * * This function is for 64-bit integer type controls only. */ static inline int v4l2_ctrl_s_ctrl_int64(struct v4l2_ctrl *ctrl, s64 val) { int rval; v4l2_ctrl_lock(ctrl); rval = __v4l2_ctrl_s_ctrl_int64(ctrl, val); v4l2_ctrl_unlock(ctrl); return rval; } /** * __v4l2_ctrl_s_ctrl_string() - Unlocked variant of v4l2_ctrl_s_ctrl_string(). * * @ctrl: The control. * @s: The new string. * * This sets the control's new string safely by going through the control * framework. This function assumes the control's handler is already locked, * allowing it to be used from within the &v4l2_ctrl_ops functions. * * This function is for string type controls only. */ int __v4l2_ctrl_s_ctrl_string(struct v4l2_ctrl *ctrl, const char *s); /** * v4l2_ctrl_s_ctrl_string() - Helper function to set a control's string value * from within a driver. * * @ctrl: The control. * @s: The new string. * * This sets the control's new string safely by going through the control * framework. This function will lock the control's handler, so it cannot be * used from within the &v4l2_ctrl_ops functions. * * This function is for string type controls only. */ static inline int v4l2_ctrl_s_ctrl_string(struct v4l2_ctrl *ctrl, const char *s) { int rval; v4l2_ctrl_lock(ctrl); rval = __v4l2_ctrl_s_ctrl_string(ctrl, s); v4l2_ctrl_unlock(ctrl); return rval; } /** * __v4l2_ctrl_s_ctrl_compound() - Unlocked variant to set a compound control * * @ctrl: The control. * @type: The type of the data. * @p: The new compound payload. * * This sets the control's new compound payload safely by going through the * control framework. This function assumes the control's handler is already * locked, allowing it to be used from within the &v4l2_ctrl_ops functions. * * This function is for compound type controls only. */ int __v4l2_ctrl_s_ctrl_compound(struct v4l2_ctrl *ctrl, enum v4l2_ctrl_type type, const void *p); /** * v4l2_ctrl_s_ctrl_compound() - Helper function to set a compound control * from within a driver. * * @ctrl: The control. * @type: The type of the data. * @p: The new compound payload. * * This sets the control's new compound payload safely by going through the * control framework. This function will lock the control's handler, so it * cannot be used from within the &v4l2_ctrl_ops functions. * * This function is for compound type controls only. */ static inline int v4l2_ctrl_s_ctrl_compound(struct v4l2_ctrl *ctrl, enum v4l2_ctrl_type type, const void *p) { int rval; v4l2_ctrl_lock(ctrl); rval = __v4l2_ctrl_s_ctrl_compound(ctrl, type, p); v4l2_ctrl_unlock(ctrl); return rval; } /* Helper defines for area type controls */ #define __v4l2_ctrl_s_ctrl_area(ctrl, area) \ __v4l2_ctrl_s_ctrl_compound((ctrl), V4L2_CTRL_TYPE_AREA, (area)) #define v4l2_ctrl_s_ctrl_area(ctrl, area) \ v4l2_ctrl_s_ctrl_compound((ctrl), V4L2_CTRL_TYPE_AREA, (area)) /* Internal helper functions that deal with control events. */ extern const struct v4l2_subscribed_event_ops v4l2_ctrl_sub_ev_ops; /** * v4l2_ctrl_replace - Function to be used as a callback to * &struct v4l2_subscribed_event_ops replace\(\) * * @old: pointer to struct &v4l2_event with the reported * event; * @new: pointer to struct &v4l2_event with the modified * event; */ void v4l2_ctrl_replace(struct v4l2_event *old, const struct v4l2_event *new); /** * v4l2_ctrl_merge - Function to be used as a callback to * &struct v4l2_subscribed_event_ops merge(\) * * @old: pointer to struct &v4l2_event with the reported * event; * @new: pointer to struct &v4l2_event with the merged * event; */ void v4l2_ctrl_merge(const struct v4l2_event *old, struct v4l2_event *new); /** * v4l2_ctrl_log_status - helper function to implement %VIDIOC_LOG_STATUS ioctl * * @file: pointer to struct file * @fh: unused. Kept just to be compatible to the arguments expected by * &struct v4l2_ioctl_ops.vidioc_log_status. * * Can be used as a vidioc_log_status function that just dumps all controls * associated with the filehandle. */ int v4l2_ctrl_log_status(struct file *file, void *fh); /** * v4l2_ctrl_subscribe_event - Subscribes to an event * * * @fh: pointer to struct v4l2_fh * @sub: pointer to &struct v4l2_event_subscription * * Can be used as a vidioc_subscribe_event function that just subscribes * control events. */ int v4l2_ctrl_subscribe_event(struct v4l2_fh *fh, const struct v4l2_event_subscription *sub); /** * v4l2_ctrl_poll - function to be used as a callback to the poll() * That just polls for control events. * * @file: pointer to struct file * @wait: pointer to struct poll_table_struct */ __poll_t v4l2_ctrl_poll(struct file *file, struct poll_table_struct *wait); /** * v4l2_ctrl_request_setup - helper function to apply control values in a request * * @req: The request * @parent: The parent control handler ('priv' in media_request_object_find()) * * This is a helper function to call the control handler's s_ctrl callback with * the control values contained in the request. Do note that this approach of * applying control values in a request is only applicable to memory-to-memory * devices. */ int v4l2_ctrl_request_setup(struct media_request *req, struct v4l2_ctrl_handler *parent); /** * v4l2_ctrl_request_complete - Complete a control handler request object * * @req: The request * @parent: The parent control handler ('priv' in media_request_object_find()) * * This function is to be called on each control handler that may have had a * request object associated with it, i.e. control handlers of a driver that * supports requests. * * The function first obtains the values of any volatile controls in the control * handler and attach them to the request. Then, the function completes the * request object. */ void v4l2_ctrl_request_complete(struct media_request *req, struct v4l2_ctrl_handler *parent); /** * v4l2_ctrl_request_hdl_find - Find the control handler in the request * * @req: The request * @parent: The parent control handler ('priv' in media_request_object_find()) * * This function finds the control handler in the request. It may return * NULL if not found. When done, you must call v4l2_ctrl_request_hdl_put() * with the returned handler pointer. * * If the request is not in state VALIDATING or QUEUED, then this function * will always return NULL. * * Note that in state VALIDATING the req_queue_mutex is held, so * no objects can be added or deleted from the request. * * In state QUEUED it is the driver that will have to ensure this. */ struct v4l2_ctrl_handler *v4l2_ctrl_request_hdl_find(struct media_request *req, struct v4l2_ctrl_handler *parent); /** * v4l2_ctrl_request_hdl_put - Put the control handler * * @hdl: Put this control handler * * This function released the control handler previously obtained from' * v4l2_ctrl_request_hdl_find(). */ static inline void v4l2_ctrl_request_hdl_put(struct v4l2_ctrl_handler *hdl) { if (hdl) media_request_object_put(&hdl->req_obj); } /** * v4l2_ctrl_request_hdl_ctrl_find() - Find a control with the given ID. * * @hdl: The control handler from the request. * @id: The ID of the control to find. * * This function returns a pointer to the control if this control is * part of the request or NULL otherwise. */ struct v4l2_ctrl * v4l2_ctrl_request_hdl_ctrl_find(struct v4l2_ctrl_handler *hdl, u32 id); /* Helpers for ioctl_ops */ /** * v4l2_queryctrl - Helper function to implement * :ref:`VIDIOC_QUERYCTRL <vidioc_queryctrl>` ioctl * * @hdl: pointer to &struct v4l2_ctrl_handler * @qc: pointer to &struct v4l2_queryctrl * * If hdl == NULL then they will all return -EINVAL. */ int v4l2_queryctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_queryctrl *qc); /** * v4l2_query_ext_ctrl_to_v4l2_queryctrl - Convert a qec to qe. * * @to: The v4l2_queryctrl to write to. * @from: The v4l2_query_ext_ctrl to read from. * * This function is a helper to convert a v4l2_query_ext_ctrl into a * v4l2_queryctrl. */ void v4l2_query_ext_ctrl_to_v4l2_queryctrl(struct v4l2_queryctrl *to, const struct v4l2_query_ext_ctrl *from); /** * v4l2_query_ext_ctrl - Helper function to implement * :ref:`VIDIOC_QUERY_EXT_CTRL <vidioc_queryctrl>` ioctl * * @hdl: pointer to &struct v4l2_ctrl_handler * @qc: pointer to &struct v4l2_query_ext_ctrl * * If hdl == NULL then they will all return -EINVAL. */ int v4l2_query_ext_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_query_ext_ctrl *qc); /** * v4l2_querymenu - Helper function to implement * :ref:`VIDIOC_QUERYMENU <vidioc_queryctrl>` ioctl * * @hdl: pointer to &struct v4l2_ctrl_handler * @qm: pointer to &struct v4l2_querymenu * * If hdl == NULL then they will all return -EINVAL. */ int v4l2_querymenu(struct v4l2_ctrl_handler *hdl, struct v4l2_querymenu *qm); /** * v4l2_g_ctrl - Helper function to implement * :ref:`VIDIOC_G_CTRL <vidioc_g_ctrl>` ioctl * * @hdl: pointer to &struct v4l2_ctrl_handler * @ctrl: pointer to &struct v4l2_control * * If hdl == NULL then they will all return -EINVAL. */ int v4l2_g_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_control *ctrl); /** * v4l2_s_ctrl - Helper function to implement * :ref:`VIDIOC_S_CTRL <vidioc_g_ctrl>` ioctl * * @fh: pointer to &struct v4l2_fh * @hdl: pointer to &struct v4l2_ctrl_handler * * @ctrl: pointer to &struct v4l2_control * * If hdl == NULL then they will all return -EINVAL. */ int v4l2_s_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl, struct v4l2_control *ctrl); /** * v4l2_g_ext_ctrls - Helper function to implement * :ref:`VIDIOC_G_EXT_CTRLS <vidioc_g_ext_ctrls>` ioctl * * @hdl: pointer to &struct v4l2_ctrl_handler * @vdev: pointer to &struct video_device * @mdev: pointer to &struct media_device * @c: pointer to &struct v4l2_ext_controls * * If hdl == NULL then they will all return -EINVAL. */ int v4l2_g_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct video_device *vdev, struct media_device *mdev, struct v4l2_ext_controls *c); /** * v4l2_try_ext_ctrls - Helper function to implement * :ref:`VIDIOC_TRY_EXT_CTRLS <vidioc_g_ext_ctrls>` ioctl * * @hdl: pointer to &struct v4l2_ctrl_handler * @vdev: pointer to &struct video_device * @mdev: pointer to &struct media_device * @c: pointer to &struct v4l2_ext_controls * * If hdl == NULL then they will all return -EINVAL. */ int v4l2_try_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct video_device *vdev, struct media_device *mdev, struct v4l2_ext_controls *c); /** * v4l2_s_ext_ctrls - Helper function to implement * :ref:`VIDIOC_S_EXT_CTRLS <vidioc_g_ext_ctrls>` ioctl * * @fh: pointer to &struct v4l2_fh * @hdl: pointer to &struct v4l2_ctrl_handler * @vdev: pointer to &struct video_device * @mdev: pointer to &struct media_device * @c: pointer to &struct v4l2_ext_controls * * If hdl == NULL then they will all return -EINVAL. */ int v4l2_s_ext_ctrls(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl, struct video_device *vdev, struct media_device *mdev, struct v4l2_ext_controls *c); /** * v4l2_ctrl_subdev_subscribe_event - Helper function to implement * as a &struct v4l2_subdev_core_ops subscribe_event function * that just subscribes control events. * * @sd: pointer to &struct v4l2_subdev * @fh: pointer to &struct v4l2_fh * @sub: pointer to &struct v4l2_event_subscription */ int v4l2_ctrl_subdev_subscribe_event(struct v4l2_subdev *sd, struct v4l2_fh *fh, struct v4l2_event_subscription *sub); /** * v4l2_ctrl_subdev_log_status - Log all controls owned by subdev's control * handler. * * @sd: pointer to &struct v4l2_subdev */ int v4l2_ctrl_subdev_log_status(struct v4l2_subdev *sd); /** * v4l2_ctrl_new_fwnode_properties() - Register controls for the device * properties * * @hdl: pointer to &struct v4l2_ctrl_handler to register controls on * @ctrl_ops: pointer to &struct v4l2_ctrl_ops to register controls with * @p: pointer to &struct v4l2_fwnode_device_properties * * This function registers controls associated to device properties, using the * property values contained in @p parameter, if the property has been set to * a value. * * Currently the following v4l2 controls are parsed and registered: * - V4L2_CID_CAMERA_ORIENTATION * - V4L2_CID_CAMERA_SENSOR_ROTATION; * * Controls already registered by the caller with the @hdl control handler are * not overwritten. Callers should register the controls they want to handle * themselves before calling this function. * * Return: 0 on success, a negative error code on failure. */ int v4l2_ctrl_new_fwnode_properties(struct v4l2_ctrl_handler *hdl, const struct v4l2_ctrl_ops *ctrl_ops, const struct v4l2_fwnode_device_properties *p); /** * v4l2_ctrl_type_op_equal - Default v4l2_ctrl_type_ops equal callback. * * @ctrl: The v4l2_ctrl pointer. * @ptr1: A v4l2 control value. * @ptr2: A v4l2 control value. * * Return: true if values are equal, otherwise false. */ bool v4l2_ctrl_type_op_equal(const struct v4l2_ctrl *ctrl, union v4l2_ctrl_ptr ptr1, union v4l2_ctrl_ptr ptr2); /** * v4l2_ctrl_type_op_init - Default v4l2_ctrl_type_ops init callback. * * @ctrl: The v4l2_ctrl pointer. * @from_idx: Starting element index. * @ptr: The v4l2 control value. * * Return: void */ void v4l2_ctrl_type_op_init(const struct v4l2_ctrl *ctrl, u32 from_idx, union v4l2_ctrl_ptr ptr); /** * v4l2_ctrl_type_op_log - Default v4l2_ctrl_type_ops log callback. * * @ctrl: The v4l2_ctrl pointer. * * Return: void */ void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl); /** * v4l2_ctrl_type_op_validate - Default v4l2_ctrl_type_ops validate callback. * * @ctrl: The v4l2_ctrl pointer. * @ptr: The v4l2 control value. * * Return: 0 on success, a negative error code on failure. */ int v4l2_ctrl_type_op_validate(const struct v4l2_ctrl *ctrl, union v4l2_ctrl_ptr ptr); #endif
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 // SPDX-License-Identifier: GPL-2.0-only #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/rtnetlink.h> #include <net/busy_poll.h> #include <net/net_namespace.h> #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> #include <net/sock.h> #include <net/xdp.h> #include <net/xdp_sock.h> #include <net/page_pool/memory_provider.h> #include "dev.h" #include "devmem.h" #include "netdev-genl-gen.h" struct netdev_nl_dump_ctx { unsigned long ifindex; unsigned int rxq_idx; unsigned int txq_idx; unsigned int napi_id; }; static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb) { NL_ASSERT_CTX_FITS(struct netdev_nl_dump_ctx); return (struct netdev_nl_dump_ctx *)cb->ctx; } static int netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { u64 xsk_features = 0; u64 xdp_rx_meta = 0; void *hdr; netdev_assert_locked(netdev); /* note: rtnl_lock may not be held! */ hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; #define XDP_METADATA_KFUNC(_, flag, __, xmo) \ if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \ xdp_rx_meta |= flag; XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC if (netdev->xsk_tx_metadata_ops) { if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp) xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP; if (netdev->xsk_tx_metadata_ops->tmo_request_checksum) xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM; if (netdev->xsk_tx_metadata_ops->tmo_request_launch_time) xsk_features |= NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO; } if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, netdev->xdp_features, NETDEV_A_DEV_PAD) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, xdp_rx_meta, NETDEV_A_DEV_PAD) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES, xsk_features, NETDEV_A_DEV_PAD)) goto err_cancel_msg; if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) { if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, netdev->xdp_zc_max_segs)) goto err_cancel_msg; } genlmsg_end(rsp, hdr); return 0; err_cancel_msg: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; } static void netdev_genl_dev_notify(struct net_device *netdev, int cmd) { struct genl_info info; struct sk_buff *ntf; if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev), NETDEV_NLGRP_MGMT)) return; genl_info_init_ntf(&info, &netdev_nl_family, cmd); ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!ntf) return; if (netdev_nl_dev_fill(netdev, ntf, &info)) { nlmsg_free(ntf); return; } genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf, 0, NETDEV_NLGRP_MGMT, GFP_KERNEL); } int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info) { struct net_device *netdev; struct sk_buff *rsp; u32 ifindex; int err; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX)) return -EINVAL; ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!rsp) return -ENOMEM; netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); if (!netdev) { err = -ENODEV; goto err_free_msg; } err = netdev_nl_dev_fill(netdev, rsp, info); netdev_unlock(netdev); if (err) goto err_free_msg; return genlmsg_reply(rsp, info); err_free_msg: nlmsg_free(rsp); return err; } int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); struct net *net = sock_net(skb->sk); int err; for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb)); if (err < 0) return err; } return 0; } static int netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, const struct genl_info *info) { unsigned long irq_suspend_timeout; unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; void *hdr; pid_t pid; if (!napi->dev->up) return 0; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; if (nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id)) goto nla_put_failure; if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex)) goto nla_put_failure; if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq)) goto nla_put_failure; if (napi->thread) { pid = task_pid_nr(napi->thread); if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid)) goto nla_put_failure; } napi_defer_hard_irqs = napi_get_defer_hard_irqs(napi); if (nla_put_s32(rsp, NETDEV_A_NAPI_DEFER_HARD_IRQS, napi_defer_hard_irqs)) goto nla_put_failure; irq_suspend_timeout = napi_get_irq_suspend_timeout(napi); if (nla_put_uint(rsp, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, irq_suspend_timeout)) goto nla_put_failure; gro_flush_timeout = napi_get_gro_flush_timeout(napi); if (nla_put_uint(rsp, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, gro_flush_timeout)) goto nla_put_failure; genlmsg_end(rsp, hdr); return 0; nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; } int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) { struct napi_struct *napi; struct sk_buff *rsp; u32 napi_id; int err; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) return -EINVAL; napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!rsp) return -ENOMEM; napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id); if (napi) { err = netdev_nl_napi_fill_one(rsp, napi, info); netdev_unlock(napi->dev); } else { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); err = -ENOENT; } if (err) { goto err_free_msg; } else if (!rsp->len) { err = -ENOENT; goto err_free_msg; } return genlmsg_reply(rsp, info); err_free_msg: nlmsg_free(rsp); return err; } static int netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info, struct netdev_nl_dump_ctx *ctx) { struct napi_struct *napi; unsigned int prev_id; int err = 0; if (!netdev->up) return err; prev_id = UINT_MAX; list_for_each_entry(napi, &netdev->napi_list, dev_list) { if (!napi_id_valid(napi->napi_id)) continue; /* Dump continuation below depends on the list being sorted */ WARN_ON_ONCE(napi->napi_id >= prev_id); prev_id = napi->napi_id; if (ctx->napi_id && napi->napi_id >= ctx->napi_id) continue; err = netdev_nl_napi_fill_one(rsp, napi, info); if (err) return err; ctx->napi_id = napi->napi_id; } return err; } int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); const struct genl_info *info = genl_info_dump(cb); struct net *net = sock_net(skb->sk); struct net_device *netdev; u32 ifindex = 0; int err = 0; if (info->attrs[NETDEV_A_NAPI_IFINDEX]) ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]); if (ifindex) { netdev = netdev_get_by_index_lock(net, ifindex); if (netdev) { err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); netdev_unlock(netdev); } else { err = -ENODEV; } } else { for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); if (err < 0) break; ctx->napi_id = 0; } } return err; } static int netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) { u64 irq_suspend_timeout = 0; u64 gro_flush_timeout = 0; u32 defer = 0; if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) { defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]); napi_set_defer_hard_irqs(napi, defer); } if (info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]) { irq_suspend_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT]); napi_set_irq_suspend_timeout(napi, irq_suspend_timeout); } if (info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]) { gro_flush_timeout = nla_get_uint(info->attrs[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT]); napi_set_gro_flush_timeout(napi, gro_flush_timeout); } return 0; } int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) { struct napi_struct *napi; unsigned int napi_id; int err; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) return -EINVAL; napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id); if (napi) { err = netdev_nl_napi_set_config(napi, info); netdev_unlock(napi->dev); } else { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); err = -ENOENT; } return err; } static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi) { if (napi && napi_id_valid(napi->napi_id)) return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id); return 0; } static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { struct pp_memory_provider_params *params; struct netdev_rx_queue *rxq; struct netdev_queue *txq; void *hdr; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) || nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) || nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex)) goto nla_put_failure; switch (q_type) { case NETDEV_QUEUE_TYPE_RX: rxq = __netif_get_rx_queue(netdev, q_idx); if (nla_put_napi_id(rsp, rxq->napi)) goto nla_put_failure; params = &rxq->mp_params; if (params->mp_ops && params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) goto nla_put_failure; #ifdef CONFIG_XDP_SOCKETS if (rxq->pool) if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) goto nla_put_failure; #endif break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); if (nla_put_napi_id(rsp, txq->napi)) goto nla_put_failure; #ifdef CONFIG_XDP_SOCKETS if (txq->pool) if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) goto nla_put_failure; #endif break; } genlmsg_end(rsp, hdr); return 0; nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; } static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id, u32 q_type) { switch (q_type) { case NETDEV_QUEUE_TYPE_RX: if (q_id >= netdev->real_num_rx_queues) return -EINVAL; return 0; case NETDEV_QUEUE_TYPE_TX: if (q_id >= netdev->real_num_tx_queues) return -EINVAL; } return 0; } static int netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { int err; if (!netdev->up) return -ENOENT; err = netdev_nl_queue_validate(netdev, q_idx, q_type); if (err) return err; return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info); } int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info) { u32 q_id, q_type, ifindex; struct net_device *netdev; struct sk_buff *rsp; int err; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) || GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) || GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX)) return -EINVAL; q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]); q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]); ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!rsp) return -ENOMEM; netdev = netdev_get_by_index_lock_ops_compat(genl_info_net(info), ifindex); if (netdev) { err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info); netdev_unlock_ops_compat(netdev); } else { err = -ENODEV; } if (err) goto err_free_msg; return genlmsg_reply(rsp, info); err_free_msg: nlmsg_free(rsp); return err; } static int netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info, struct netdev_nl_dump_ctx *ctx) { int err = 0; if (!netdev->up) return err; for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) { err = netdev_nl_queue_fill_one(rsp, netdev, ctx->rxq_idx, NETDEV_QUEUE_TYPE_RX, info); if (err) return err; } for (; ctx->txq_idx < netdev->real_num_tx_queues; ctx->txq_idx++) { err = netdev_nl_queue_fill_one(rsp, netdev, ctx->txq_idx, NETDEV_QUEUE_TYPE_TX, info); if (err) return err; } return err; } int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); const struct genl_info *info = genl_info_dump(cb); struct net *net = sock_net(skb->sk); struct net_device *netdev; u32 ifindex = 0; int err = 0; if (info->attrs[NETDEV_A_QUEUE_IFINDEX]) ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); if (ifindex) { netdev = netdev_get_by_index_lock_ops_compat(net, ifindex); if (netdev) { err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); netdev_unlock_ops_compat(netdev); } else { err = -ENODEV; } } else { for_each_netdev_lock_ops_compat_scoped(net, netdev, ctx->ifindex) { err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); if (err < 0) break; ctx->rxq_idx = 0; ctx->txq_idx = 0; } } return err; } #define NETDEV_STAT_NOT_SET (~0ULL) static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size) { const u64 *add = _add; u64 *sum = _sum; while (size) { if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET) *sum += *add; sum++; add++; size -= 8; } } static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value) { if (value == NETDEV_STAT_NOT_SET) return 0; return nla_put_uint(rsp, attr_id, value); } static int netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) { if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits)) return -EMSGSIZE; return 0; } static int netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx) { if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) || netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake)) return -EMSGSIZE; return 0; } static int netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp, u32 q_type, int i, const struct genl_info *info) { const struct netdev_stat_ops *ops = netdev->stat_ops; struct netdev_queue_stats_rx rx; struct netdev_queue_stats_tx tx; void *hdr; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) || nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) || nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i)) goto nla_put_failure; switch (q_type) { case NETDEV_QUEUE_TYPE_RX: memset(&rx, 0xff, sizeof(rx)); ops->get_queue_stats_rx(netdev, i, &rx); if (!memchr_inv(&rx, 0xff, sizeof(rx))) goto nla_cancel; if (netdev_nl_stats_write_rx(rsp, &rx)) goto nla_put_failure; break; case NETDEV_QUEUE_TYPE_TX: memset(&tx, 0xff, sizeof(tx)); ops->get_queue_stats_tx(netdev, i, &tx); if (!memchr_inv(&tx, 0xff, sizeof(tx))) goto nla_cancel; if (netdev_nl_stats_write_tx(rsp, &tx)) goto nla_put_failure; break; } genlmsg_end(rsp, hdr); return 0; nla_cancel: genlmsg_cancel(rsp, hdr); return 0; nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; } static int netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info, struct netdev_nl_dump_ctx *ctx) { const struct netdev_stat_ops *ops = netdev->stat_ops; int i, err; if (!(netdev->flags & IFF_UP)) return 0; i = ctx->rxq_idx; while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) { err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX, i, info); if (err) return err; ctx->rxq_idx = ++i; } i = ctx->txq_idx; while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) { err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX, i, info); if (err) return err; ctx->txq_idx = ++i; } ctx->rxq_idx = 0; ctx->txq_idx = 0; return 0; } /** * netdev_stat_queue_sum() - add up queue stats from range of queues * @netdev: net_device * @rx_start: index of the first Rx queue to query * @rx_end: index after the last Rx queue (first *not* to query) * @rx_sum: output Rx stats, should be already initialized * @tx_start: index of the first Tx queue to query * @tx_end: index after the last Tx queue (first *not* to query) * @tx_sum: output Tx stats, should be already initialized * * Add stats from [start, end) range of queue IDs to *x_sum structs. * The sum structs must be already initialized. Usually this * helper is invoked from the .get_base_stats callbacks of drivers * to account for stats of disabled queues. In that case the ranges * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues). */ void netdev_stat_queue_sum(struct net_device *netdev, int rx_start, int rx_end, struct netdev_queue_stats_rx *rx_sum, int tx_start, int tx_end, struct netdev_queue_stats_tx *tx_sum) { const struct netdev_stat_ops *ops; struct netdev_queue_stats_rx rx; struct netdev_queue_stats_tx tx; int i; ops = netdev->stat_ops; for (i = rx_start; i < rx_end; i++) { memset(&rx, 0xff, sizeof(rx)); if (ops->get_queue_stats_rx) ops->get_queue_stats_rx(netdev, i, &rx); netdev_nl_stats_add(rx_sum, &rx, sizeof(rx)); } for (i = tx_start; i < tx_end; i++) { memset(&tx, 0xff, sizeof(tx)); if (ops->get_queue_stats_tx) ops->get_queue_stats_tx(netdev, i, &tx); netdev_nl_stats_add(tx_sum, &tx, sizeof(tx)); } } EXPORT_SYMBOL(netdev_stat_queue_sum); static int netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { struct netdev_queue_stats_rx rx_sum; struct netdev_queue_stats_tx tx_sum; void *hdr; /* Netdev can't guarantee any complete counters */ if (!netdev->stat_ops->get_base_stats) return 0; memset(&rx_sum, 0xff, sizeof(rx_sum)); memset(&tx_sum, 0xff, sizeof(tx_sum)); netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum); /* The op was there, but nothing reported, don't bother */ if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) && !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum))) return 0; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex)) goto nla_put_failure; netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum, 0, netdev->real_num_tx_queues, &tx_sum); if (netdev_nl_stats_write_rx(rsp, &rx_sum) || netdev_nl_stats_write_tx(rsp, &tx_sum)) goto nla_put_failure; genlmsg_end(rsp, hdr); return 0; nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; } static int netdev_nl_qstats_get_dump_one(struct net_device *netdev, unsigned int scope, struct sk_buff *skb, const struct genl_info *info, struct netdev_nl_dump_ctx *ctx) { if (!netdev->stat_ops) return 0; switch (scope) { case 0: return netdev_nl_stats_by_netdev(netdev, skb, info); case NETDEV_QSTATS_SCOPE_QUEUE: return netdev_nl_stats_by_queue(netdev, skb, info, ctx); } return -EINVAL; /* Should not happen, per netlink policy */ } int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); const struct genl_info *info = genl_info_dump(cb); struct net *net = sock_net(skb->sk); struct net_device *netdev; unsigned int ifindex; unsigned int scope; int err = 0; scope = 0; if (info->attrs[NETDEV_A_QSTATS_SCOPE]) scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]); ifindex = 0; if (info->attrs[NETDEV_A_QSTATS_IFINDEX]) ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]); if (ifindex) { netdev = netdev_get_by_index_lock_ops_compat(net, ifindex); if (!netdev) { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QSTATS_IFINDEX]); return -ENODEV; } if (netdev->stat_ops) { err = netdev_nl_qstats_get_dump_one(netdev, scope, skb, info, ctx); } else { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QSTATS_IFINDEX]); err = -EOPNOTSUPP; } netdev_unlock_ops_compat(netdev); return err; } for_each_netdev_lock_ops_compat_scoped(net, netdev, ctx->ifindex) { err = netdev_nl_qstats_get_dump_one(netdev, scope, skb, info, ctx); if (err < 0) break; } return err; } int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; struct net_devmem_dmabuf_binding *binding; u32 ifindex, dmabuf_fd, rxq_idx; struct netdev_nl_sock *priv; struct net_device *netdev; struct sk_buff *rsp; struct nlattr *attr; int rem, err = 0; void *hdr; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) || GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) || GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES)) return -EINVAL; ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]); priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk); if (IS_ERR(priv)) return PTR_ERR(priv); rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!rsp) return -ENOMEM; hdr = genlmsg_iput(rsp, info); if (!hdr) { err = -EMSGSIZE; goto err_genlmsg_free; } mutex_lock(&priv->lock); err = 0; netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); if (!netdev) { err = -ENODEV; goto err_unlock_sock; } if (!netif_device_present(netdev)) err = -ENODEV; else if (!netdev_need_ops_lock(netdev)) err = -EOPNOTSUPP; if (err) { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_DEV_IFINDEX]); goto err_unlock; } binding = net_devmem_bind_dmabuf(netdev, DMA_FROM_DEVICE, dmabuf_fd, priv, info->extack); if (IS_ERR(binding)) { err = PTR_ERR(binding); goto err_unlock; } nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES, genlmsg_data(info->genlhdr), genlmsg_len(info->genlhdr), rem) { err = nla_parse_nested( tb, ARRAY_SIZE(netdev_queue_id_nl_policy) - 1, attr, netdev_queue_id_nl_policy, info->extack); if (err < 0) goto err_unbind; if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) || NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) { err = -EINVAL; goto err_unbind; } if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]); err = -EINVAL; goto err_unbind; } rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]); err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding, info->extack); if (err) goto err_unbind; } nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); genlmsg_end(rsp, hdr); err = genlmsg_reply(rsp, info); if (err) goto err_unbind; netdev_unlock(netdev); mutex_unlock(&priv->lock); return 0; err_unbind: net_devmem_unbind_dmabuf(binding); err_unlock: netdev_unlock(netdev); err_unlock_sock: mutex_unlock(&priv->lock); err_genlmsg_free: nlmsg_free(rsp); return err; } int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info) { struct net_devmem_dmabuf_binding *binding; struct netdev_nl_sock *priv; struct net_device *netdev; u32 ifindex, dmabuf_fd; struct sk_buff *rsp; int err = 0; void *hdr; if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) || GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD)) return -EINVAL; ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]); priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk); if (IS_ERR(priv)) return PTR_ERR(priv); rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!rsp) return -ENOMEM; hdr = genlmsg_iput(rsp, info); if (!hdr) { err = -EMSGSIZE; goto err_genlmsg_free; } mutex_lock(&priv->lock); netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); if (!netdev) { err = -ENODEV; goto err_unlock_sock; } if (!netif_device_present(netdev)) { err = -ENODEV; goto err_unlock_netdev; } if (!netdev->netmem_tx) { err = -EOPNOTSUPP; NL_SET_ERR_MSG(info->extack, "Driver does not support netmem TX"); goto err_unlock_netdev; } binding = net_devmem_bind_dmabuf(netdev, DMA_TO_DEVICE, dmabuf_fd, priv, info->extack); if (IS_ERR(binding)) { err = PTR_ERR(binding); goto err_unlock_netdev; } nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); genlmsg_end(rsp, hdr); netdev_unlock(netdev); mutex_unlock(&priv->lock); return genlmsg_reply(rsp, info); err_unlock_netdev: netdev_unlock(netdev); err_unlock_sock: mutex_unlock(&priv->lock); err_genlmsg_free: nlmsg_free(rsp); return err; } void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) { INIT_LIST_HEAD(&priv->bindings); mutex_init(&priv->lock); } void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) { struct net_devmem_dmabuf_binding *binding; struct net_devmem_dmabuf_binding *temp; netdevice_tracker dev_tracker; struct net_device *dev; mutex_lock(&priv->lock); list_for_each_entry_safe(binding, temp, &priv->bindings, list) { mutex_lock(&binding->lock); dev = binding->dev; if (!dev) { mutex_unlock(&binding->lock); net_devmem_unbind_dmabuf(binding); continue; } netdev_hold(dev, &dev_tracker, GFP_KERNEL); mutex_unlock(&binding->lock); netdev_lock(dev); net_devmem_unbind_dmabuf(binding); netdev_unlock(dev); netdev_put(dev, &dev_tracker); } mutex_unlock(&priv->lock); } static int netdev_genl_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_REGISTER: netdev_lock_ops_to_full(netdev); netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF); netdev_unlock_full_to_ops(netdev); break; case NETDEV_UNREGISTER: netdev_lock(netdev); netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF); netdev_unlock(netdev); break; case NETDEV_XDP_FEAT_CHANGE: netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF); break; } return NOTIFY_OK; } static struct notifier_block netdev_genl_nb = { .notifier_call = netdev_genl_netdevice_event, }; static int __init netdev_genl_init(void) { int err; err = register_netdevice_notifier(&netdev_genl_nb); if (err) return err; err = genl_register_family(&netdev_nl_family); if (err) goto err_unreg_ntf; return 0; err_unreg_ntf: unregister_netdevice_notifier(&netdev_genl_nb); return err; } subsys_initcall(netdev_genl_init);
14 7 7 14 12 10 4 6 6 6 10 6 10 14 14 14 14 14 27 27 27 27 17 25 4 4 4 10 10 10 8 7 10 10 10 1 1 5 5 3 5 5 3 2 2 2 5 5 3 2 3 5 5 5 1 5 5 3 5 2 3 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 // SPDX-License-Identifier: GPL-2.0 /* * io_misc.c - fallocate, fpunch, truncate: */ #include "bcachefs.h" #include "alloc_foreground.h" #include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" #include "clock.h" #include "error.h" #include "extents.h" #include "extent_update.h" #include "inode.h" #include "io_misc.h" #include "io_write.h" #include "logged_ops.h" #include "rebalance.h" #include "subvolume.h" /* Overwrites whatever was present with zeroes: */ int bch2_extent_fallocate(struct btree_trans *trans, subvol_inum inum, struct btree_iter *iter, u64 sectors, struct bch_io_opts opts, s64 *i_sectors_delta, struct write_point_specifier write_point) { struct bch_fs *c = trans->c; struct disk_reservation disk_res = { 0 }; struct closure cl; struct open_buckets open_buckets = { 0 }; struct bkey_s_c k; struct bkey_buf old, new; unsigned sectors_allocated = 0, new_replicas; bool unwritten = opts.nocow && c->sb.version >= bcachefs_metadata_version_unwritten_extents; int ret; bch2_bkey_buf_init(&old); bch2_bkey_buf_init(&new); closure_init_stack(&cl); k = bch2_btree_iter_peek_slot(trans, iter); ret = bkey_err(k); if (ret) return ret; sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); new_replicas = max(0, (int) opts.data_replicas - (int) bch2_bkey_nr_ptrs_fully_allocated(k)); /* * Get a disk reservation before (in the nocow case) calling * into the allocator: */ ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); if (unlikely(ret)) goto err_noprint; bch2_bkey_buf_reassemble(&old, c, k); if (!unwritten) { struct bkey_i_reservation *reservation; bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64)); reservation = bkey_reservation_init(new.k); reservation->k.p = iter->pos; bch2_key_resize(&reservation->k, sectors); reservation->v.nr_replicas = opts.data_replicas; } else { struct bkey_i_extent *e; struct bch_devs_list devs_have; struct write_point *wp; devs_have.nr = 0; bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX); e = bkey_extent_init(new.k); e->k.p = iter->pos; ret = bch2_alloc_sectors_start_trans(trans, opts.foreground_target, false, write_point, &devs_have, opts.data_replicas, opts.data_replicas, BCH_WATERMARK_normal, 0, &cl, &wp); if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) ret = bch_err_throw(c, transaction_restart_nested); if (ret) goto err; sectors = min_t(u64, sectors, wp->sectors_free); sectors_allocated = sectors; bch2_key_resize(&e->k, sectors); bch2_open_bucket_get(c, wp, &open_buckets); bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); bch2_alloc_sectors_done(c, wp); extent_for_each_ptr(extent_i_to_s(e), ptr) ptr->unwritten = true; } ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, 0, i_sectors_delta, true); err: if (!ret && sectors_allocated) bch2_increment_clock(c, sectors_allocated, WRITE); if (should_print_err(ret)) { struct printbuf buf = PRINTBUF; lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9)); prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); printbuf_exit(&buf); } err_noprint: bch2_open_buckets_put(c, &open_buckets); bch2_disk_reservation_put(c, &disk_res); bch2_bkey_buf_exit(&new, c); bch2_bkey_buf_exit(&old, c); if (closure_nr_remaining(&cl) != 1) { bch2_trans_unlock_long(trans); bch2_wait_on_allocator(c, &cl); } return ret; } /* * Returns -BCH_ERR_transacton_restart if we had to drop locks: */ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, subvol_inum inum, u64 end, s64 *i_sectors_delta) { struct bch_fs *c = trans->c; unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); struct bpos end_pos = POS(inum.inum, end); struct bkey_s_c k; int ret = 0, ret2 = 0; u32 snapshot; while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; if (ret) ret2 = ret; bch2_trans_begin(trans); ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) continue; bch2_btree_iter_set_snapshot(trans, iter, snapshot); /* * peek_max() doesn't have ideal semantics for extents: */ k = bch2_btree_iter_peek_max(trans, iter, end_pos); if (!k.k) break; ret = bkey_err(k); if (ret) continue; bkey_init(&delete.k); delete.k.p = iter->pos; /* create the biggest key we can */ bch2_key_resize(&delete.k, max_sectors); bch2_cut_back(end_pos, &delete); ret = bch2_extent_update(trans, inum, iter, &delete, &disk_res, 0, i_sectors_delta, false); bch2_disk_reservation_put(c, &disk_res); } return ret ?: ret2; } int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, s64 *i_sectors_delta) { struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, start), BTREE_ITER_intent); ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; return ret; } /* truncate: */ void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_logged_op_truncate op = bkey_s_c_to_logged_op_truncate(k); prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol)); prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum)); prt_printf(out, " new_i_size=%llu", le64_to_cpu(op.v->new_i_size)); } static int truncate_set_isize(struct btree_trans *trans, subvol_inum inum, u64 new_i_size, bool warn) { struct btree_iter iter = {}; struct bch_inode_unpacked inode_u; int ret; ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn) ?: (inode_u.bi_size = new_i_size, 0) ?: bch2_inode_write(trans, &iter, &inode_u); bch2_trans_iter_exit(trans, &iter); return ret; } static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k, u64 *i_sectors_delta) { struct bch_fs *c = trans->c; struct btree_iter fpunch_iter; struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; u64 new_i_size = le64_to_cpu(op->v.new_i_size); bool warn_errors = i_sectors_delta != NULL; int ret; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, truncate_set_isize(trans, inum, new_i_size, i_sectors_delta != NULL)); if (ret) goto err; bch2_trans_iter_init(trans, &fpunch_iter, BTREE_ID_extents, POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9), BTREE_ITER_intent); ret = bch2_fpunch_at(trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta); bch2_trans_iter_exit(trans, &fpunch_iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; err: if (warn_errors) bch_err_fn(c, ret); return ret; } int bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k) { return __bch2_resume_logged_op_truncate(trans, op_k, NULL); } int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta) { struct bkey_i_logged_op_truncate op; bkey_logged_op_truncate_init(&op.k_i); op.v.subvol = cpu_to_le32(inum.subvol); op.v.inum = cpu_to_le64(inum.inum); op.v.new_i_size = cpu_to_le64(new_i_size); /* * Logged ops aren't atomic w.r.t. snapshot creation: creating a * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); struct btree_trans *trans = bch2_trans_get(c); int ret = bch2_logged_op_start(trans, &op.k_i); if (ret) goto out; ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta); ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; out: bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; } /* finsert/fcollapse: */ void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_logged_op_finsert op = bkey_s_c_to_logged_op_finsert(k); prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol)); prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum)); prt_printf(out, " dst_offset=%lli", le64_to_cpu(op.v->dst_offset)); prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset)); } static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len, bool warn) { struct btree_iter iter; struct bch_inode_unpacked inode_u; int ret; offset <<= 9; len <<= 9; ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn); if (ret) return ret; if (len > 0) { if (MAX_LFS_FILESIZE - inode_u.bi_size < len) { ret = -EFBIG; goto err; } if (offset >= inode_u.bi_size) { ret = -EINVAL; goto err; } } inode_u.bi_size += len; inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c); ret = bch2_inode_write(trans, &iter, &inode_u); err: bch2_trans_iter_exit(trans, &iter); return ret; } static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k, u64 *i_sectors_delta) { struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; struct bch_io_opts opts; u64 dst_offset = le64_to_cpu(op->v.dst_offset); u64 src_offset = le64_to_cpu(op->v.src_offset); s64 shift = dst_offset - src_offset; u64 len = abs(shift); u64 pos = le64_to_cpu(op->v.pos); bool insert = shift > 0; u32 snapshot; bool warn_errors = i_sectors_delta != NULL; int ret = 0; ret = bch2_inum_opts_get(trans, inum, &opts); if (ret) return ret; /* * check for missing subvolume before fpunch, as in resume we don't want * it to be a fatal error */ ret = lockrestart_do(trans, __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors)); if (ret) return ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, 0), BTREE_ITER_intent); switch (op->v.state) { case LOGGED_OP_FINSERT_start: op->v.state = LOGGED_OP_FINSERT_shift_extents; if (insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, src_offset, len, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); if (ret) goto err; } else { bch2_btree_iter_set_pos(trans, &iter, POS(inum.inum, src_offset)); ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto err; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_logged_op_update(trans, &op->k_i)); } fallthrough; case LOGGED_OP_FINSERT_shift_extents: while (1) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete, *copy; struct bkey_s_c k; struct bpos src_pos = POS(inum.inum, src_offset); bch2_trans_begin(trans); ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors); if (ret) goto btree_err; bch2_btree_iter_set_snapshot(trans, &iter, snapshot); bch2_btree_iter_set_pos(trans, &iter, SPOS(inum.inum, pos, snapshot)); k = insert ? bch2_btree_iter_peek_prev_min(trans, &iter, POS(inum.inum, 0)) : bch2_btree_iter_peek_max(trans, &iter, POS(inum.inum, U64_MAX)); if ((ret = bkey_err(k))) goto btree_err; if (!k.k || k.k->p.inode != inum.inum || bkey_le(k.k->p, POS(inum.inum, src_offset))) break; copy = bch2_bkey_make_mut_noupdate(trans, k); if ((ret = PTR_ERR_OR_ZERO(copy))) goto btree_err; if (insert && bkey_lt(bkey_start_pos(k.k), src_pos)) { bch2_cut_front(src_pos, copy); /* Splitting compressed extent? */ bch2_disk_reservation_add(c, &disk_res, copy->k.size * bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy)), BCH_DISK_RESERVATION_NOFAIL); } bkey_init(&delete.k); delete.k.p = copy->k.p; delete.k.p.snapshot = snapshot; delete.k.size = copy->k.size; copy->k.p.offset += shift; copy->k.p.snapshot = snapshot; op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); ret = bch2_bkey_set_needs_rebalance(c, &opts, copy) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: bch2_logged_op_update(trans, &op->k_i) ?: bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc); btree_err: bch2_disk_reservation_put(c, &disk_res); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) goto err; pos = le64_to_cpu(op->v.pos); } op->v.state = LOGGED_OP_FINSERT_finish; if (!insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, src_offset, shift, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } else { /* We need an inode update to update bi_journal_seq for fsync: */ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, 0, 0, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } break; case LOGGED_OP_FINSERT_finish: break; } err: bch2_trans_iter_exit(trans, &iter); if (warn_errors) bch_err_fn(c, ret); return ret; } int bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k) { return __bch2_resume_logged_op_finsert(trans, op_k, NULL); } int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, u64 offset, u64 len, bool insert, s64 *i_sectors_delta) { struct bkey_i_logged_op_finsert op; s64 shift = insert ? len : -len; bkey_logged_op_finsert_init(&op.k_i); op.v.subvol = cpu_to_le32(inum.subvol); op.v.inum = cpu_to_le64(inum.inum); op.v.dst_offset = cpu_to_le64(offset + shift); op.v.src_offset = cpu_to_le64(offset); op.v.pos = cpu_to_le64(insert ? U64_MAX : offset); /* * Logged ops aren't atomic w.r.t. snapshot creation: creating a * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); struct btree_trans *trans = bch2_trans_get(c); int ret = bch2_logged_op_start(trans, &op.k_i); if (ret) goto out; ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta); ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; out: bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; }
49 1 1 38 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHEFS_IO_READ_H #define _BCACHEFS_IO_READ_H #include "bkey_buf.h" #include "btree_iter.h" #include "extents_types.h" #include "reflink.h" struct bch_read_bio { struct bch_fs *c; u64 start_time; u64 submit_time; /* * Reads will often have to be split, and if the extent being read from * was checksummed or compressed we'll also have to allocate bounce * buffers and copy the data back into the original bio. * * If we didn't have to split, we have to save and restore the original * bi_end_io - @split below indicates which: */ union { struct bch_read_bio *parent; bio_end_io_t *end_io; }; /* * Saved copy of bio->bi_iter, from submission time - allows us to * resubmit on IO error, and also to copy data back to the original bio * when we're bouncing: */ struct bvec_iter bvec_iter; unsigned offset_into_extent; u16 flags; union { struct { u16 data_update:1, promote:1, bounce:1, split:1, have_ioref:1, narrow_crcs:1, saw_error:1, self_healing:1, context:2; }; u16 _state; }; s16 ret; #ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS unsigned list_idx; #endif struct extent_ptr_decoded pick; /* * pos we read from - different from data_pos for indirect extents: */ u32 subvol; struct bpos read_pos; /* * start pos of data we read (may not be pos of data we want) - for * promote, narrow extents paths: */ enum btree_id data_btree; struct bpos data_pos; struct bversion version; struct bch_io_opts opts; struct work_struct work; struct bio bio; }; #define to_rbio(_bio) container_of((_bio), struct bch_read_bio, bio) struct bch_devs_mask; struct cache_promote_op; struct extent_ptr_decoded; static inline int bch2_read_indirect_extent(struct btree_trans *trans, enum btree_id *data_btree, s64 *offset_into_extent, struct bkey_buf *extent) { if (extent->k->k.type != KEY_TYPE_reflink_p) return 0; *data_btree = BTREE_ID_reflink; struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k = bch2_lookup_indirect_extent(trans, &iter, offset_into_extent, bkey_i_to_s_c_reflink_p(extent->k), true, 0); int ret = bkey_err(k); if (ret) return ret; if (bkey_deleted(k.k)) { bch2_trans_iter_exit(trans, &iter); return bch_err_throw(c, missing_indirect_extent); } bch2_bkey_buf_reassemble(extent, c, k); bch2_trans_iter_exit(trans, &iter); return 0; } #define BCH_READ_FLAGS() \ x(retry_if_stale) \ x(may_promote) \ x(user_mapped) \ x(last_fragment) \ x(must_bounce) \ x(must_clone) \ x(in_retry) enum __bch_read_flags { #define x(n) __BCH_READ_##n, BCH_READ_FLAGS() #undef x }; enum bch_read_flags { #define x(n) BCH_READ_##n = BIT(__BCH_READ_##n), BCH_READ_FLAGS() #undef x }; int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, struct bvec_iter, struct bpos, enum btree_id, struct bkey_s_c, unsigned, struct bch_io_failures *, unsigned, int); static inline void bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, struct bpos read_pos, enum btree_id data_btree, struct bkey_s_c k, unsigned offset_into_extent, unsigned flags) { int ret = __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, data_btree, k, offset_into_extent, NULL, flags, -1); /* __bch2_read_extent only returns errors if BCH_READ_in_retry is set */ WARN(ret, "unhandled error from __bch2_read_extent()"); } int __bch2_read(struct btree_trans *, struct bch_read_bio *, struct bvec_iter, subvol_inum, struct bch_io_failures *, struct bkey_buf *, unsigned flags); static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, subvol_inum inum) { BUG_ON(rbio->_state); rbio->subvol = inum.subvol; bch2_trans_run(c, __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL, BCH_READ_retry_if_stale| BCH_READ_may_promote| BCH_READ_user_mapped)); } static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, struct bch_read_bio *orig) { struct bch_read_bio *rbio = to_rbio(bio); rbio->c = orig->c; rbio->_state = 0; rbio->flags = 0; rbio->ret = 0; rbio->split = true; rbio->parent = orig; rbio->opts = orig->opts; #ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS rbio->list_idx = 0; #endif return rbio; } static inline struct bch_read_bio *rbio_init(struct bio *bio, struct bch_fs *c, struct bch_io_opts opts, bio_end_io_t end_io) { struct bch_read_bio *rbio = to_rbio(bio); rbio->start_time = local_clock(); rbio->c = c; rbio->_state = 0; rbio->flags = 0; rbio->ret = 0; rbio->opts = opts; rbio->bio.bi_end_io = end_io; #ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS rbio->list_idx = 0; #endif return rbio; } struct promote_op; void bch2_promote_op_to_text(struct printbuf *, struct promote_op *); void bch2_read_bio_to_text(struct printbuf *, struct bch_read_bio *); void bch2_fs_io_read_exit(struct bch_fs *); int bch2_fs_io_read_init(struct bch_fs *); #endif /* _BCACHEFS_IO_READ_H */
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 1999-2002 Vojtech Pavlik */ #ifndef _INPUT_H #define _INPUT_H #include <linux/time.h> #include <linux/list.h> #include <uapi/linux/input.h> /* Implementation details, userspace should not care about these */ #define ABS_MT_FIRST ABS_MT_TOUCH_MAJOR #define ABS_MT_LAST ABS_MT_TOOL_Y /* * In-kernel definitions. */ #include <linux/device.h> #include <linux/fs.h> #include <linux/timer.h> #include <linux/mod_devicetable.h> struct input_dev_poller; /** * struct input_value - input value representation * @type: type of value (EV_KEY, EV_ABS, etc) * @code: the value code * @value: the value */ struct input_value { __u16 type; __u16 code; __s32 value; }; enum input_clock_type { INPUT_CLK_REAL = 0, INPUT_CLK_MONO, INPUT_CLK_BOOT, INPUT_CLK_MAX }; /** * struct input_dev - represents an input device * @name: name of the device * @phys: physical path to the device in the system hierarchy * @uniq: unique identification code for the device (if device has it) * @id: id of the device (struct input_id) * @propbit: bitmap of device properties and quirks * @evbit: bitmap of types of events supported by the device (EV_KEY, * EV_REL, etc.) * @keybit: bitmap of keys/buttons this device has * @relbit: bitmap of relative axes for the device * @absbit: bitmap of absolute axes for the device * @mscbit: bitmap of miscellaneous events supported by the device * @ledbit: bitmap of leds present on the device * @sndbit: bitmap of sound effects supported by the device * @ffbit: bitmap of force feedback effects supported by the device * @swbit: bitmap of switches present on the device * @hint_events_per_packet: average number of events generated by the * device in a packet (between EV_SYN/SYN_REPORT events). Used by * event handlers to estimate size of the buffer needed to hold * events. * @keycodemax: size of keycode table * @keycodesize: size of elements in keycode table * @keycode: map of scancodes to keycodes for this device * @getkeycode: optional legacy method to retrieve current keymap. * @setkeycode: optional method to alter current keymap, used to implement * sparse keymaps. If not supplied default mechanism will be used. * The method is being called while holding event_lock and thus must * not sleep * @ff: force feedback structure associated with the device if device * supports force feedback effects * @poller: poller structure associated with the device if device is * set up to use polling mode * @repeat_key: stores key code of the last key pressed; used to implement * software autorepeat * @timer: timer for software autorepeat * @rep: current values for autorepeat parameters (delay, rate) * @mt: pointer to multitouch state * @absinfo: array of &struct input_absinfo elements holding information * about absolute axes (current value, min, max, flat, fuzz, * resolution) * @key: reflects current state of device's keys/buttons * @led: reflects current state of device's LEDs * @snd: reflects current state of sound effects * @sw: reflects current state of device's switches * @open: this method is called when the very first user calls * input_open_device(). The driver must prepare the device * to start generating events (start polling thread, * request an IRQ, submit URB, etc.). The meaning of open() is * to start providing events to the input core. * @close: this method is called when the very last user calls * input_close_device(). The meaning of close() is to stop * providing events to the input core. * @flush: purges the device. Most commonly used to get rid of force * feedback effects loaded into the device when disconnecting * from it * @event: event handler for events sent _to_ the device, like EV_LED * or EV_SND. The device is expected to carry out the requested * action (turn on a LED, play sound, etc.) The call is protected * by @event_lock and must not sleep * @grab: input handle that currently has the device grabbed (via * EVIOCGRAB ioctl). When a handle grabs a device it becomes sole * recipient for all input events coming from the device * @event_lock: this spinlock is taken when input core receives * and processes a new event for the device (in input_event()). * Code that accesses and/or modifies parameters of a device * (such as keymap or absmin, absmax, absfuzz, etc.) after device * has been registered with input core must take this lock. * @mutex: serializes calls to open(), close() and flush() methods * @users: stores number of users (input handlers) that opened this * device. It is used by input_open_device() and input_close_device() * to make sure that dev->open() is only called when the first * user opens device and dev->close() is called when the very * last user closes the device * @going_away: marks devices that are in a middle of unregistering and * causes input_open_device*() fail with -ENODEV. * @dev: driver model's view of this device * @h_list: list of input handles associated with the device. When * accessing the list dev->mutex must be held * @node: used to place the device onto input_dev_list * @num_vals: number of values queued in the current frame * @max_vals: maximum number of values queued in a frame * @vals: array of values queued in the current frame * @devres_managed: indicates that devices is managed with devres framework * and needs not be explicitly unregistered or freed. * @timestamp: storage for a timestamp set by input_set_timestamp called * by a driver * @inhibited: indicates that the input device is inhibited. If that is * the case then input core ignores any events generated by the device. * Device's close() is called when it is being inhibited and its open() * is called when it is being uninhibited. */ struct input_dev { const char *name; const char *phys; const char *uniq; struct input_id id; unsigned long propbit[BITS_TO_LONGS(INPUT_PROP_CNT)]; unsigned long evbit[BITS_TO_LONGS(EV_CNT)]; unsigned long keybit[BITS_TO_LONGS(KEY_CNT)]; unsigned long relbit[BITS_TO_LONGS(REL_CNT)]; unsigned long absbit[BITS_TO_LONGS(ABS_CNT)]; unsigned long mscbit[BITS_TO_LONGS(MSC_CNT)]; unsigned long ledbit[BITS_TO_LONGS(LED_CNT)]; unsigned long sndbit[BITS_TO_LONGS(SND_CNT)]; unsigned long ffbit[BITS_TO_LONGS(FF_CNT)]; unsigned long swbit[BITS_TO_LONGS(SW_CNT)]; unsigned int hint_events_per_packet; unsigned int keycodemax; unsigned int keycodesize; void *keycode; int (*setkeycode)(struct input_dev *dev, const struct input_keymap_entry *ke, unsigned int *old_keycode); int (*getkeycode)(struct input_dev *dev, struct input_keymap_entry *ke); struct ff_device *ff; struct input_dev_poller *poller; unsigned int repeat_key; struct timer_list timer; int rep[REP_CNT]; struct input_mt *mt; struct input_absinfo *absinfo; unsigned long key[BITS_TO_LONGS(KEY_CNT)]; unsigned long led[BITS_TO_LONGS(LED_CNT)]; unsigned long snd[BITS_TO_LONGS(SND_CNT)]; unsigned long sw[BITS_TO_LONGS(SW_CNT)]; int (*open)(struct input_dev *dev); void (*close)(struct input_dev *dev); int (*flush)(struct input_dev *dev, struct file *file); int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); struct input_handle __rcu *grab; spinlock_t event_lock; struct mutex mutex; unsigned int users; bool going_away; struct device dev; struct list_head h_list; struct list_head node; unsigned int num_vals; unsigned int max_vals; struct input_value *vals; bool devres_managed; ktime_t timestamp[INPUT_CLK_MAX]; bool inhibited; }; #define to_input_dev(d) container_of(d, struct input_dev, dev) /* * Verify that we are in sync with input_device_id mod_devicetable.h #defines */ #if EV_MAX != INPUT_DEVICE_ID_EV_MAX #error "EV_MAX and INPUT_DEVICE_ID_EV_MAX do not match" #endif #if KEY_MIN_INTERESTING != INPUT_DEVICE_ID_KEY_MIN_INTERESTING #error "KEY_MIN_INTERESTING and INPUT_DEVICE_ID_KEY_MIN_INTERESTING do not match" #endif #if KEY_MAX != INPUT_DEVICE_ID_KEY_MAX #error "KEY_MAX and INPUT_DEVICE_