Total coverage: 221966 (12%)of 1891823
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (c) 2015 Tom Herbert <tom@herbertland.com> */ #ifndef __ILA_H #define __ILA_H #include <linux/errno.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/types.h> #include <net/checksum.h> #include <net/genetlink.h> #include <net/ip.h> #include <net/protocol.h> #include <uapi/linux/ila.h> struct ila_locator { union { __u8 v8[8]; __be16 v16[4]; __be32 v32[2]; __be64 v64; }; }; struct ila_identifier { union { struct { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 __space:4; u8 csum_neutral:1; u8 type:3; #elif defined(__BIG_ENDIAN_BITFIELD) u8 type:3; u8 csum_neutral:1; u8 __space:4; #else #error "Adjust your <asm/byteorder.h> defines" #endif u8 __space2[7]; }; __u8 v8[8]; __be16 v16[4]; __be32 v32[2]; __be64 v64; }; }; #define CSUM_NEUTRAL_FLAG htonl(0x10000000) struct ila_addr { union { struct in6_addr addr; struct { struct ila_locator loc; struct ila_identifier ident; }; }; }; static inline struct ila_addr *ila_a2i(struct in6_addr *addr) { return (struct ila_addr *)addr; } struct ila_params { struct ila_locator locator; struct ila_locator locator_match; __wsum csum_diff; u8 csum_mode; u8 ident_type; }; static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) { __be32 diff[] = { ~from[0], ~from[1], to[0], to[1], }; return csum_partial(diff, sizeof(diff), 0); } static inline bool ila_csum_neutral_set(struct ila_identifier ident) { return !!(ident.csum_neutral); } void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, bool set_csum_neutral); void ila_init_saved_csum(struct ila_params *p); struct ila_net { struct { struct rhashtable rhash_table; spinlock_t *locks; /* Bucket locks for entry manipulation */ unsigned int locks_mask; bool hooks_registered; } xlat; }; int ila_lwt_init(void); void ila_lwt_fini(void); int ila_xlat_init_net(struct net *net); void ila_xlat_pre_exit_net(struct net *net); void ila_xlat_exit_net(struct net *net); int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info); int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info); int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info); int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info); int ila_xlat_nl_dump_start(struct netlink_callback *cb); int ila_xlat_nl_dump_done(struct netlink_callback *cb); int ila_xlat_nl_dump(struct sk_buff *skb, struct netlink_callback *cb); extern unsigned int ila_net_id; extern struct genl_family ila_nl_family; #endif /* __ILA_H */
381 382 6 6 466 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 107 106 107 103 75 38 88 88 6 89 6 14 14 89 88 6 89 89 13 25 79 81 89 88 36 36 13 35 89 81 29 29 88 159 405 209 458 459 79 457 13 456 23 23 460 391 60 43 460 415 6 381 382 368 103 103 102 6 6 5 6 380 380 41 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 // SPDX-License-Identifier: GPL-2.0-only /* * Resizable, Scalable, Concurrent Hash Table * * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * * Code partially derived from nft_hash * Rewritten with rehash code from br_multicast plus single list * pointer as suggested by Josh Triplett */ #include <linux/atomic.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/log2.h> #include <linux/sched.h> #include <linux/rculist.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/rhashtable.h> #include <linux/err.h> #include <linux/export.h> #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U union nested_table { union nested_table __rcu *table; struct rhash_lock_head __rcu *bucket; }; static u32 head_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he) { return rht_head_hashfn(ht, tbl, he, ht->p); } #ifdef CONFIG_PROVE_LOCKING #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) int lockdep_rht_mutex_is_held(struct rhashtable *ht) { return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1; } EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) { if (!debug_locks) return 1; if (unlikely(tbl->nest)) return 1; return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]); } EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #else #define ASSERT_RHT_MUTEX(HT) #endif static inline union nested_table *nested_table_top( const struct bucket_table *tbl) { /* The top-level bucket entry does not need RCU protection * because it's set at the same time as tbl->nest. */ return (void *)rcu_dereference_protected(tbl->buckets[0], 1); } static void nested_table_free(union nested_table *ntbl, unsigned int size) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); const unsigned int len = 1 << shift; unsigned int i; ntbl = rcu_dereference_protected(ntbl->table, 1); if (!ntbl) return; if (size > len) { size >>= shift; for (i = 0; i < len; i++) nested_table_free(ntbl + i, size); } kfree(ntbl); } static void nested_bucket_table_free(const struct bucket_table *tbl) { unsigned int size = tbl->size >> tbl->nest; unsigned int len = 1 << tbl->nest; union nested_table *ntbl; unsigned int i; ntbl = nested_table_top(tbl); for (i = 0; i < len; i++) nested_table_free(ntbl + i, size); kfree(ntbl); } static void bucket_table_free(const struct bucket_table *tbl) { if (tbl->nest) nested_bucket_table_free(tbl); kvfree(tbl); } static void bucket_table_free_rcu(struct rcu_head *head) { bucket_table_free(container_of(head, struct bucket_table, rcu)); } static union nested_table *nested_table_alloc(struct rhashtable *ht, union nested_table __rcu **prev, bool leaf) { union nested_table *ntbl; int i; ntbl = rcu_dereference(*prev); if (ntbl) return ntbl; ntbl = alloc_hooks_tag(ht->alloc_tag, kmalloc_noprof(PAGE_SIZE, GFP_ATOMIC|__GFP_ZERO)); if (ntbl && leaf) { for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++) INIT_RHT_NULLS_HEAD(ntbl[i].bucket); } if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL) return ntbl; /* Raced with another thread. */ kfree(ntbl); return rcu_dereference(*prev); } static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, size_t nbuckets, gfp_t gfp) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); struct bucket_table *tbl; size_t size; if (nbuckets < (1 << (shift + 1))) return NULL; size = sizeof(*tbl) + sizeof(tbl->buckets[0]); tbl = alloc_hooks_tag(ht->alloc_tag, kmalloc_noprof(size, gfp|__GFP_ZERO)); if (!tbl) return NULL; if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, false)) { kfree(tbl); return NULL; } tbl->nest = (ilog2(nbuckets) - 1) % shift + 1; return tbl; } static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, size_t nbuckets, gfp_t gfp) { struct bucket_table *tbl = NULL; size_t size; int i; static struct lock_class_key __key; tbl = alloc_hooks_tag(ht->alloc_tag, kvmalloc_node_align_noprof(struct_size(tbl, buckets, nbuckets), 1, gfp|__GFP_ZERO, NUMA_NO_NODE)); size = nbuckets; if (tbl == NULL && !gfpflags_allow_blocking(gfp)) { tbl = nested_bucket_table_alloc(ht, nbuckets, gfp); nbuckets = 0; } if (tbl == NULL) return NULL; lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0); tbl->size = size; rcu_head_init(&tbl->rcu); INIT_LIST_HEAD(&tbl->walkers); tbl->hash_rnd = get_random_u32(); for (i = 0; i < nbuckets; i++) INIT_RHT_NULLS_HEAD(tbl->buckets[i]); return tbl; } static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, struct bucket_table *tbl) { struct bucket_table *new_tbl; do { new_tbl = tbl; tbl = rht_dereference_rcu(tbl->future_tbl, ht); } while (tbl); return new_tbl; } static int rhashtable_rehash_one(struct rhashtable *ht, struct rhash_lock_head __rcu **bkt, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl); int err = -EAGAIN; struct rhash_head *head, *next, *entry; struct rhash_head __rcu **pprev = NULL; unsigned int new_hash; unsigned long flags; if (new_tbl->nest) goto out; err = -ENOENT; rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash), old_tbl, old_hash) { err = 0; next = rht_dereference_bucket(entry->next, old_tbl, old_hash); if (rht_is_a_nulls(next)) break; pprev = &entry->next; } if (err) goto out; new_hash = head_hashfn(ht, new_tbl, entry); flags = rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING); head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash); RCU_INIT_POINTER(entry->next, head); rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry, flags); if (pprev) rcu_assign_pointer(*pprev, next); else /* Need to preserved the bit lock. */ rht_assign_locked(bkt, next); out: return err; } static int rhashtable_rehash_chain(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash); unsigned long flags; int err; if (!bkt) return 0; flags = rht_lock(old_tbl, bkt); while (!(err = rhashtable_rehash_one(ht, bkt, old_hash))) ; if (err == -ENOENT) err = 0; rht_unlock(old_tbl, bkt, flags); return err; } static int rhashtable_rehash_attach(struct rhashtable *ht, struct bucket_table *old_tbl, struct bucket_table *new_tbl) { /* Make insertions go into the new, empty table right away. Deletions * and lookups will be attempted in both tables until we synchronize. * As cmpxchg() provides strong barriers, we do not need * rcu_assign_pointer(). */ if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL, new_tbl) != NULL) return -EEXIST; return 0; } static int rhashtable_rehash_table(struct rhashtable *ht) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl; struct rhashtable_walker *walker; unsigned int old_hash; int err; new_tbl = rht_dereference(old_tbl->future_tbl, ht); if (!new_tbl) return 0; for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { err = rhashtable_rehash_chain(ht, old_hash); if (err) return err; cond_resched(); } /* Publish the new table pointer. */ rcu_assign_pointer(ht->tbl, new_tbl); spin_lock(&ht->lock); list_for_each_entry(walker, &old_tbl->walkers, list) walker->tbl = NULL; /* Wait for readers. All new readers will see the new * table, and thus no references to the old table will * remain. * We do this inside the locked region so that * rhashtable_walk_stop() can use rcu_head_after_call_rcu() * to check if it should not re-link the table. */ call_rcu(&old_tbl->rcu, bucket_table_free_rcu); spin_unlock(&ht->lock); return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } static int rhashtable_rehash_alloc(struct rhashtable *ht, struct bucket_table *old_tbl, unsigned int size) { struct bucket_table *new_tbl; int err; ASSERT_RHT_MUTEX(ht); new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); if (new_tbl == NULL) return -ENOMEM; err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); if (err) bucket_table_free(new_tbl); return err; } /** * rhashtable_shrink - Shrink hash table while allowing concurrent lookups * @ht: the hash table to shrink * * This function shrinks the hash table to fit, i.e., the smallest * size would not cause it to expand right away automatically. * * The caller must ensure that no concurrent resizing occurs by holding * ht->mutex. * * The caller must ensure that no concurrent table mutations take place. * It is however valid to have concurrent lookups if they are RCU protected. * * It is valid to have concurrent insertions and deletions protected by per * bucket locks or concurrent RCU protected lookups and traversals. */ static int rhashtable_shrink(struct rhashtable *ht) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); unsigned int nelems = atomic_read(&ht->nelems); unsigned int size = 0; if (nelems) size = roundup_pow_of_two(nelems * 3 / 2); if (size < ht->p.min_size) size = ht->p.min_size; if (old_tbl->size <= size) return 0; if (rht_dereference(old_tbl->future_tbl, ht)) return -EEXIST; return rhashtable_rehash_alloc(ht, old_tbl, size); } static void rht_deferred_worker(struct work_struct *work) { struct rhashtable *ht; struct bucket_table *tbl; int err = 0; ht = container_of(work, struct rhashtable, run_work); mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); tbl = rhashtable_last_table(ht, tbl); if (rht_grow_above_75(ht, tbl)) err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2); else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) err = rhashtable_shrink(ht); else if (tbl->nest) err = rhashtable_rehash_alloc(ht, tbl, tbl->size); if (!err || err == -EEXIST) { int nerr; nerr = rhashtable_rehash_table(ht); err = err ?: nerr; } mutex_unlock(&ht->mutex); if (err) schedule_work(&ht->run_work); } static int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl) { struct bucket_table *old_tbl; struct bucket_table *new_tbl; unsigned int size; int err; old_tbl = rht_dereference_rcu(ht->tbl, ht); size = tbl->size; err = -EBUSY; if (rht_grow_above_75(ht, tbl)) size *= 2; /* Do not schedule more than one rehash */ else if (old_tbl != tbl) goto fail; err = -ENOMEM; new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN); if (new_tbl == NULL) goto fail; err = rhashtable_rehash_attach(ht, tbl, new_tbl); if (err) { bucket_table_free(new_tbl); if (err == -EEXIST) err = 0; } else schedule_work(&ht->run_work); return err; fail: /* Do not fail the insert if someone else did a rehash. */ if (likely(rcu_access_pointer(tbl->future_tbl))) return 0; /* Schedule async rehash to retry allocation in process context. */ if (err == -ENOMEM) schedule_work(&ht->run_work); return err; } static void *rhashtable_lookup_one(struct rhashtable *ht, struct rhash_lock_head __rcu **bkt, struct bucket_table *tbl, unsigned int hash, const void *key, struct rhash_head *obj) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; struct rhash_head __rcu **pprev = NULL; struct rhash_head *head; int elasticity; elasticity = RHT_ELASTICITY; rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; elasticity--; if (!key || (ht->p.obj_cmpfn ? ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : rhashtable_compare(&arg, rht_obj(ht, head)))) { pprev = &head->next; continue; } if (!ht->rhlist) return rht_obj(ht, head); list = container_of(obj, struct rhlist_head, rhead); plist = container_of(head, struct rhlist_head, rhead); RCU_INIT_POINTER(list->next, plist); head = rht_dereference_bucket(head->next, tbl, hash); RCU_INIT_POINTER(list->rhead.next, head); if (pprev) rcu_assign_pointer(*pprev, obj); else /* Need to preserve the bit lock */ rht_assign_locked(bkt, obj); return NULL; } if (elasticity <= 0) return ERR_PTR(-EAGAIN); return ERR_PTR(-ENOENT); } static struct bucket_table *rhashtable_insert_one( struct rhashtable *ht, struct rhash_lock_head __rcu **bkt, struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj, void *data) { struct bucket_table *new_tbl; struct rhash_head *head; if (!IS_ERR_OR_NULL(data)) return ERR_PTR(-EEXIST); if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) return ERR_CAST(data); new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (new_tbl) return new_tbl; if (PTR_ERR(data) != -ENOENT) return ERR_CAST(data); if (unlikely(rht_grow_above_max(ht, tbl))) return ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_100(ht, tbl))) return ERR_PTR(-EAGAIN); head = rht_ptr(bkt, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (ht->rhlist) { struct rhlist_head *list; list = container_of(obj, struct rhlist_head, rhead); RCU_INIT_POINTER(list->next, NULL); } /* bkt is always the head of the list, so it holds * the lock, which we need to preserve */ rht_assign_locked(bkt, obj); return NULL; } static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, struct rhash_head *obj) { struct bucket_table *new_tbl; struct bucket_table *tbl; struct rhash_lock_head __rcu **bkt; unsigned long flags; unsigned int hash; void *data; new_tbl = rcu_dereference(ht->tbl); do { tbl = new_tbl; hash = rht_head_hashfn(ht, tbl, obj, ht->p); if (rcu_access_pointer(tbl->future_tbl)) /* Failure is OK */ bkt = rht_bucket_var(tbl, hash); else bkt = rht_bucket_insert(ht, tbl, hash); if (bkt == NULL) { new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); data = ERR_PTR(-EAGAIN); } else { bool inserted; flags = rht_lock(tbl, bkt); data = rhashtable_lookup_one(ht, bkt, tbl, hash, key, obj); new_tbl = rhashtable_insert_one(ht, bkt, tbl, hash, obj, data); inserted = data && !new_tbl; if (inserted) atomic_inc(&ht->nelems); if (PTR_ERR(new_tbl) != -EEXIST) data = ERR_CAST(new_tbl); rht_unlock(tbl, bkt, flags); if (inserted && rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); } } while (!IS_ERR_OR_NULL(new_tbl)); if (PTR_ERR(data) == -EAGAIN) data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: -EAGAIN); return data; } void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj) { void *data; do { rcu_read_lock(); data = rhashtable_try_insert(ht, key, obj); rcu_read_unlock(); } while (PTR_ERR(data) == -EAGAIN); return data; } EXPORT_SYMBOL_GPL(rhashtable_insert_slow); /** * rhashtable_walk_enter - Initialise an iterator * @ht: Table to walk over * @iter: Hash table Iterator * * This function prepares a hash table walk. * * Note that if you restart a walk after rhashtable_walk_stop you * may see the same object twice. Also, you may miss objects if * there are removals in between rhashtable_walk_stop and the next * call to rhashtable_walk_start. * * For a completely stable walk you should construct your own data * structure outside the hash table. * * This function may be called from any process context, including * non-preemptible context, but cannot be called from softirq or * hardirq context. * * You must call rhashtable_walk_exit after this function returns. */ void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) { iter->ht = ht; iter->p = NULL; iter->slot = 0; iter->skip = 0; iter->end_of_table = 0; spin_lock(&ht->lock); iter->walker.tbl = rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); list_add(&iter->walker.list, &iter->walker.tbl->walkers); spin_unlock(&ht->lock); } EXPORT_SYMBOL_GPL(rhashtable_walk_enter); /** * rhashtable_walk_exit - Free an iterator * @iter: Hash table Iterator * * This function frees resources allocated by rhashtable_walk_enter. */ void rhashtable_walk_exit(struct rhashtable_iter *iter) { spin_lock(&iter->ht->lock); if (iter->walker.tbl) list_del(&iter->walker.list); spin_unlock(&iter->ht->lock); } EXPORT_SYMBOL_GPL(rhashtable_walk_exit); /** * rhashtable_walk_start_check - Start a hash table walk * @iter: Hash table iterator * * Start a hash table walk at the current iterator position. Note that we take * the RCU lock in all cases including when we return an error. So you must * always call rhashtable_walk_stop to clean up. * * Returns zero if successful. * * Returns -EAGAIN if resize event occurred. Note that the iterator * will rewind back to the beginning and you may use it immediately * by calling rhashtable_walk_next. * * rhashtable_walk_start is defined as an inline variant that returns * void. This is preferred in cases where the caller would ignore * resize events and always continue. */ int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU) { struct rhashtable *ht = iter->ht; bool rhlist = ht->rhlist; rcu_read_lock(); spin_lock(&ht->lock); if (iter->walker.tbl) list_del(&iter->walker.list); spin_unlock(&ht->lock); if (iter->end_of_table) return 0; if (!iter->walker.tbl) { iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); iter->slot = 0; iter->skip = 0; return -EAGAIN; } if (iter->p && !rhlist) { /* * We need to validate that 'p' is still in the table, and * if so, update 'skip' */ struct rhash_head *p; int skip = 0; rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { skip++; if (p == iter->p) { iter->skip = skip; goto found; } } iter->p = NULL; } else if (iter->p && rhlist) { /* Need to validate that 'list' is still in the table, and * if so, update 'skip' and 'p'. */ struct rhash_head *p; struct rhlist_head *list; int skip = 0; rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { for (list = container_of(p, struct rhlist_head, rhead); list; list = rcu_dereference(list->next)) { skip++; if (list == iter->list) { iter->p = p; iter->skip = skip; goto found; } } } iter->p = NULL; } found: return 0; } EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); /** * __rhashtable_walk_find_next - Find the next element in a table (or the first * one in case of a new walk). * * @iter: Hash table iterator * * Returns the found object or NULL when the end of the table is reached. * * Returns -EAGAIN if resize event occurred. */ static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter) { struct bucket_table *tbl = iter->walker.tbl; struct rhlist_head *list = iter->list; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; bool rhlist = ht->rhlist; if (!tbl) return NULL; for (; iter->slot < tbl->size; iter->slot++) { int skip = iter->skip; rht_for_each_rcu(p, tbl, iter->slot) { if (rhlist) { list = container_of(p, struct rhlist_head, rhead); do { if (!skip) goto next; skip--; list = rcu_dereference(list->next); } while (list); continue; } if (!skip) break; skip--; } next: if (!rht_is_a_nulls(p)) { iter->skip++; iter->p = p; iter->list = list; return rht_obj(ht, rhlist ? &list->rhead : p); } iter->skip = 0; } iter->p = NULL; /* Ensure we see any new tables. */ smp_rmb(); iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (iter->walker.tbl) { iter->slot = 0; iter->skip = 0; return ERR_PTR(-EAGAIN); } else { iter->end_of_table = true; } return NULL; } /** * rhashtable_walk_next - Return the next object and advance the iterator * @iter: Hash table iterator * * Note that you must call rhashtable_walk_stop when you are finished * with the walk. * * Returns the next object or NULL when the end of the table is reached. * * Returns -EAGAIN if resize event occurred. Note that the iterator * will rewind back to the beginning and you may continue to use it. */ void *rhashtable_walk_next(struct rhashtable_iter *iter) { struct rhlist_head *list = iter->list; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; bool rhlist = ht->rhlist; if (p) { if (!rhlist || !(list = rcu_dereference(list->next))) { p = rcu_dereference(p->next); list = container_of(p, struct rhlist_head, rhead); } if (!rht_is_a_nulls(p)) { iter->skip++; iter->p = p; iter->list = list; return rht_obj(ht, rhlist ? &list->rhead : p); } /* At the end of this slot, switch to next one and then find * next entry from that point. */ iter->skip = 0; iter->slot++; } return __rhashtable_walk_find_next(iter); } EXPORT_SYMBOL_GPL(rhashtable_walk_next); /** * rhashtable_walk_peek - Return the next object but don't advance the iterator * @iter: Hash table iterator * * Returns the next object or NULL when the end of the table is reached. * * Returns -EAGAIN if resize event occurred. Note that the iterator * will rewind back to the beginning and you may continue to use it. */ void *rhashtable_walk_peek(struct rhashtable_iter *iter) { struct rhlist_head *list = iter->list; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; if (p) return rht_obj(ht, ht->rhlist ? &list->rhead : p); /* No object found in current iter, find next one in the table. */ if (iter->skip) { /* A nonzero skip value points to the next entry in the table * beyond that last one that was found. Decrement skip so * we find the current value. __rhashtable_walk_find_next * will restore the original value of skip assuming that * the table hasn't changed. */ iter->skip--; } return __rhashtable_walk_find_next(iter); } EXPORT_SYMBOL_GPL(rhashtable_walk_peek); /** * rhashtable_walk_stop - Finish a hash table walk * @iter: Hash table iterator * * Finish a hash table walk. Does not reset the iterator to the start of the * hash table. */ void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU) { struct rhashtable *ht; struct bucket_table *tbl = iter->walker.tbl; if (!tbl) goto out; ht = iter->ht; spin_lock(&ht->lock); if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu)) /* This bucket table is being freed, don't re-link it. */ iter->walker.tbl = NULL; else list_add(&iter->walker.list, &tbl->walkers); spin_unlock(&ht->lock); out: rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(const struct rhashtable_params *params) { size_t retsize; if (params->nelem_hint) retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3), (unsigned long)params->min_size); else retsize = max(HASH_DEFAULT_SIZE, (unsigned long)params->min_size); return retsize; } static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) { return jhash2(key, length, seed); } /** * rhashtable_init - initialize a new hash table * @ht: hash table to be initialized * @params: configuration parameters * * Initializes a new hash table based on the provided configuration * parameters. A table can be configured either with a variable or * fixed length key: * * Configuration Example 1: Fixed length keys * struct test_obj { * int key; * void * my_member; * struct rhash_head node; * }; * * struct rhashtable_params params = { * .head_offset = offsetof(struct test_obj, node), * .key_offset = offsetof(struct test_obj, key), * .key_len = sizeof(int), * .hashfn = jhash, * }; * * Configuration Example 2: Variable length keys * struct test_obj { * [...] * struct rhash_head node; * }; * * u32 my_hash_fn(const void *data, u32 len, u32 seed) * { * struct test_obj *obj = data; * * return [... hash ...]; * } * * struct rhashtable_params params = { * .head_offset = offsetof(struct test_obj, node), * .hashfn = jhash, * .obj_hashfn = my_hash_fn, * }; */ int rhashtable_init_noprof(struct rhashtable *ht, const struct rhashtable_params *params) { struct bucket_table *tbl; size_t size; if ((!params->key_len && !params->obj_hashfn) || (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); spin_lock_init(&ht->lock); memcpy(&ht->p, params, sizeof(*params)); alloc_tag_record(ht->alloc_tag); if (params->min_size) ht->p.min_size = roundup_pow_of_two(params->min_size); /* Cap total entries at 2^31 to avoid nelems overflow. */ ht->max_elems = 1u << 31; if (params->max_size) { ht->p.max_size = rounddown_pow_of_two(params->max_size); if (ht->p.max_size < ht->max_elems / 2) ht->max_elems = ht->p.max_size * 2; } ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); size = rounded_hashtable_size(&ht->p); ht->key_len = ht->p.key_len; if (!params->hashfn) { ht->p.hashfn = jhash; if (!(ht->key_len & (sizeof(u32) - 1))) { ht->key_len /= sizeof(u32); ht->p.hashfn = rhashtable_jhash2; } } /* * This is api initialization and thus we need to guarantee the * initial rhashtable allocation. Upon failure, retry with the * smallest possible size with __GFP_NOFAIL semantics. */ tbl = bucket_table_alloc(ht, size, GFP_KERNEL); if (unlikely(tbl == NULL)) { size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL); } atomic_set(&ht->nelems, 0); RCU_INIT_POINTER(ht->tbl, tbl); INIT_WORK(&ht->run_work, rht_deferred_worker); return 0; } EXPORT_SYMBOL_GPL(rhashtable_init_noprof); /** * rhltable_init - initialize a new hash list table * @hlt: hash list table to be initialized * @params: configuration parameters * * Initializes a new hash list table. * * See documentation for rhashtable_init. */ int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params) { int err; err = rhashtable_init_noprof(&hlt->ht, params); hlt->ht.rhlist = true; return err; } EXPORT_SYMBOL_GPL(rhltable_init_noprof); static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, void (*free_fn)(void *ptr, void *arg), void *arg) { struct rhlist_head *list; if (!ht->rhlist) { free_fn(rht_obj(ht, obj), arg); return; } list = container_of(obj, struct rhlist_head, rhead); do { obj = &list->rhead; list = rht_dereference(list->next, ht); free_fn(rht_obj(ht, obj), arg); } while (list); } /** * rhashtable_free_and_destroy - free elements and destroy hash table * @ht: the hash table to destroy * @free_fn: callback to release resources of element * @arg: pointer passed to free_fn * * Stops an eventual async resize. If defined, invokes free_fn for each * element to releasal resources. Please note that RCU protected * readers may still be accessing the elements. Releasing of resources * must occur in a compatible manner. Then frees the bucket array. * * This function will eventually sleep to wait for an async resize * to complete. The caller is responsible that no further write operations * occurs in parallel. */ void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), void *arg) { struct bucket_table *tbl, *next_tbl; unsigned int i; cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); restart: if (free_fn) { for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; cond_resched(); for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)), next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; !rht_is_a_nulls(pos); pos = next, next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL) rhashtable_free_one(ht, pos, free_fn, arg); } } next_tbl = rht_dereference(tbl->future_tbl, ht); bucket_table_free(tbl); if (next_tbl) { tbl = next_tbl; goto restart; } mutex_unlock(&ht->mutex); } EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); void rhashtable_destroy(struct rhashtable *ht) { return rhashtable_free_and_destroy(ht, NULL, NULL); } EXPORT_SYMBOL_GPL(rhashtable_destroy); struct rhash_lock_head __rcu **__rht_bucket_nested( const struct bucket_table *tbl, unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; unsigned int subhash = hash; union nested_table *ntbl; ntbl = nested_table_top(tbl); ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash); subhash >>= tbl->nest; while (ntbl && size > (1 << shift)) { index = subhash & ((1 << shift) - 1); ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash); size >>= shift; subhash >>= shift; } if (!ntbl) return NULL; return &ntbl[subhash].bucket; } EXPORT_SYMBOL_GPL(__rht_bucket_nested); struct rhash_lock_head __rcu **rht_bucket_nested( const struct bucket_table *tbl, unsigned int hash) { static struct rhash_lock_head __rcu *rhnull; if (!rhnull) INIT_RHT_NULLS_HEAD(rhnull); return __rht_bucket_nested(tbl, hash) ?: &rhnull; } EXPORT_SYMBOL_GPL(rht_bucket_nested); struct rhash_lock_head __rcu **rht_bucket_nested_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; union nested_table *ntbl; ntbl = nested_table_top(tbl); hash >>= tbl->nest; ntbl = nested_table_alloc(ht, &ntbl[index].table, size <= (1 << shift)); while (ntbl && size > (1 << shift)) { index = hash & ((1 << shift) - 1); size >>= shift; hash >>= shift; ntbl = nested_table_alloc(ht, &ntbl[index].table, size <= (1 << shift)); } if (!ntbl) return NULL; return &ntbl[hash].bucket; } EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
1 6 5 1 1 1 2 1 1 7 1 1 1 1 1 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 // SPDX-License-Identifier: GPL-2.0-only /* * File: datagram.c * * Datagram (ISI) Phonet sockets * * Copyright (C) 2008 Nokia Corporation. * * Authors: Sakari Ailus <sakari.ailus@nokia.com> * Rémi Denis-Courmont */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/socket.h> #include <asm/ioctls.h> #include <net/sock.h> #include <linux/phonet.h> #include <linux/export.h> #include <net/phonet/phonet.h> static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb); /* associated socket ceases to exist */ static void pn_sock_close(struct sock *sk, long timeout) { sk_common_release(sk); } static int pn_ioctl(struct sock *sk, int cmd, int *karg) { struct sk_buff *skb; switch (cmd) { case SIOCINQ: spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); *karg = skb ? skb->len : 0; spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; case SIOCPNADDRESOURCE: case SIOCPNDELRESOURCE: { u32 res = *karg; if (res >= 256) return -EINVAL; if (cmd == SIOCPNADDRESOURCE) return pn_sock_bind_res(sk, res); else return pn_sock_unbind_res(sk, res); } } return -ENOIOCTLCMD; } /* Destroy socket. All references are gone. */ static void pn_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); } static int pn_init(struct sock *sk) { sk->sk_destruct = pn_destruct; return 0; } static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name); struct sk_buff *skb; int err; if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) return -EOPNOTSUPP; if (target == NULL) return -EDESTADDRREQ; if (msg->msg_namelen < sizeof(struct sockaddr_pn)) return -EINVAL; if (target->spn_family != AF_PHONET) return -EAFNOSUPPORT; skb = sock_alloc_send_skb(sk, MAX_PHONET_HEADER + len, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) return err; skb_reserve(skb, MAX_PHONET_HEADER); err = memcpy_from_msg((void *)skb_put(skb, len), msg, len); if (err < 0) { kfree_skb(skb); return err; } /* * Fill in the Phonet header and * finally pass the packet forwards. */ err = pn_skb_send(sk, skb, target); /* If ok, return len. */ return (err >= 0) ? len : err; } static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; int rval = -EOPNOTSUPP; int copylen; if (flags & ~(MSG_PEEK|MSG_TRUNC|MSG_DONTWAIT|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) goto out_nofree; skb = skb_recv_datagram(sk, flags, &rval); if (skb == NULL) goto out_nofree; pn_skb_get_src_sockaddr(skb, &sa); copylen = skb->len; if (len < copylen) { msg->msg_flags |= MSG_TRUNC; copylen = len; } rval = skb_copy_datagram_msg(skb, 0, msg, copylen); if (rval) { rval = -EFAULT; goto out; } rval = (flags & MSG_TRUNC) ? skb->len : copylen; if (msg->msg_name != NULL) { __sockaddr_check_size(sizeof(sa)); memcpy(msg->msg_name, &sa, sizeof(sa)); *addr_len = sizeof(sa); } out: skb_free_datagram(sk, skb); out_nofree: return rval; } /* Queue an skb for a sock. */ static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb) { int err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); return err ? NET_RX_DROP : NET_RX_SUCCESS; } /* Module registration */ static struct proto pn_proto = { .close = pn_sock_close, .ioctl = pn_ioctl, .init = pn_init, .sendmsg = pn_sendmsg, .recvmsg = pn_recvmsg, .backlog_rcv = pn_backlog_rcv, .hash = pn_sock_hash, .unhash = pn_sock_unhash, .get_port = pn_sock_get_port, .obj_size = sizeof(struct pn_sock), .owner = THIS_MODULE, .name = "PHONET", }; static const struct phonet_protocol pn_dgram_proto = { .ops = &phonet_dgram_ops, .prot = &pn_proto, .sock_type = SOCK_DGRAM, }; int __init isi_register(void) { return phonet_proto_register(PN_PROTO_PHONET, &pn_dgram_proto); } void __exit isi_unregister(void) { phonet_proto_unregister(PN_PROTO_PHONET, &pn_dgram_proto); }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 // SPDX-License-Identifier: GPL-2.0-or-later /* net/sched/sch_teql.c "True" (or "trivial") link equalizer. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/init.h> #include <linux/skbuff.h> #include <linux/moduleparam.h> #include <net/dst.h> #include <net/neighbour.h> #include <net/pkt_sched.h> /* How to setup it. ---------------- After loading this module you will find a new device teqlN and new qdisc with the same name. To join a slave to the equalizer you should just set this qdisc on a device f.e. # tc qdisc add dev eth0 root teql0 # tc qdisc add dev eth1 root teql0 That's all. Full PnP 8) Applicability. -------------- 1. Slave devices MUST be active devices, i.e., they must raise the tbusy signal and generate EOI events. If you want to equalize virtual devices like tunnels, use a normal eql device. 2. This device puts no limitations on physical slave characteristics f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-) Certainly, large difference in link speeds will make the resulting eqalized link unusable, because of huge packet reordering. I estimate an upper useful difference as ~10 times. 3. If the slave requires address resolution, only protocols using neighbour cache (IPv4/IPv6) will work over the equalized link. Other protocols are still allowed to use the slave device directly, which will not break load balancing, though native slave traffic will have the highest priority. */ struct teql_master { struct Qdisc_ops qops; struct net_device *dev; struct Qdisc *slaves; struct list_head master_list; unsigned long tx_bytes; unsigned long tx_packets; unsigned long tx_errors; unsigned long tx_dropped; }; struct teql_sched_data { struct Qdisc *next; struct teql_master *m; struct sk_buff_head q; }; #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next) #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT) /* "teql*" qdisc routines */ static int teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) { __skb_queue_tail(&q->q, skb); return NET_XMIT_SUCCESS; } return qdisc_drop(skb, sch, to_free); } static struct sk_buff * teql_dequeue(struct Qdisc *sch) { struct teql_sched_data *dat = qdisc_priv(sch); struct netdev_queue *dat_queue; struct sk_buff *skb; struct Qdisc *q; skb = __skb_dequeue(&dat->q); dat_queue = netdev_get_tx_queue(dat->m->dev, 0); q = rcu_dereference_bh(dat_queue->qdisc); if (skb == NULL) { struct net_device *m = qdisc_dev(q); if (m) { dat->m->slaves = sch; netif_wake_queue(m); } } else { qdisc_bstats_update(sch, skb); } sch->q.qlen = dat->q.qlen + q->q.qlen; return skb; } static struct sk_buff * teql_peek(struct Qdisc *sch) { /* teql is meant to be used as root qdisc */ return NULL; } static void teql_reset(struct Qdisc *sch) { struct teql_sched_data *dat = qdisc_priv(sch); skb_queue_purge(&dat->q); } static void teql_destroy(struct Qdisc *sch) { struct Qdisc *q, *prev; struct teql_sched_data *dat = qdisc_priv(sch); struct teql_master *master = dat->m; if (!master) return; prev = master->slaves; if (prev) { do { q = NEXT_SLAVE(prev); if (q == sch) { NEXT_SLAVE(prev) = NEXT_SLAVE(q); if (q == master->slaves) { master->slaves = NEXT_SLAVE(q); if (q == master->slaves) { struct netdev_queue *txq; spinlock_t *root_lock; txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc)); spin_lock_bh(root_lock); qdisc_reset(rtnl_dereference(txq->qdisc)); spin_unlock_bh(root_lock); } } skb_queue_purge(&dat->q); break; } } while ((prev = q) != master->slaves); } } static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(sch); struct teql_master *m = (struct teql_master *)sch->ops; struct teql_sched_data *q = qdisc_priv(sch); if (dev->hard_header_len > m->dev->hard_header_len) return -EINVAL; if (m->dev == dev) return -ELOOP; q->m = m; skb_queue_head_init(&q->q); if (m->slaves) { if (m->dev->flags & IFF_UP) { if ((m->dev->flags & IFF_POINTOPOINT && !(dev->flags & IFF_POINTOPOINT)) || (m->dev->flags & IFF_BROADCAST && !(dev->flags & IFF_BROADCAST)) || (m->dev->flags & IFF_MULTICAST && !(dev->flags & IFF_MULTICAST)) || dev->mtu < m->dev->mtu) return -EINVAL; } else { if (!(dev->flags&IFF_POINTOPOINT)) m->dev->flags &= ~IFF_POINTOPOINT; if (!(dev->flags&IFF_BROADCAST)) m->dev->flags &= ~IFF_BROADCAST; if (!(dev->flags&IFF_MULTICAST)) m->dev->flags &= ~IFF_MULTICAST; if (dev->mtu < m->dev->mtu) m->dev->mtu = dev->mtu; } q->next = NEXT_SLAVE(m->slaves); NEXT_SLAVE(m->slaves) = sch; } else { q->next = sch; m->slaves = sch; m->dev->mtu = dev->mtu; m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK); } return 0; } static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev, struct netdev_queue *txq, struct dst_entry *dst) { struct neighbour *n; int err = 0; n = dst_neigh_lookup_skb(dst, skb); if (!n) return -ENOENT; if (dst->dev != dev) { struct neighbour *mn; mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev); neigh_release(n); if (IS_ERR(mn)) return PTR_ERR(mn); n = mn; } if (neigh_event_send(n, skb_res) == 0) { int err; char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, n, dev); err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)), haddr, NULL, skb->len); if (err < 0) err = -EINVAL; } else { err = (skb_res == NULL) ? -EAGAIN : 1; } neigh_release(n); return err; } static inline int teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev, struct netdev_queue *txq) { struct dst_entry *dst = skb_dst(skb); int res; if (rcu_access_pointer(txq->qdisc) == &noop_qdisc) return -ENODEV; if (!dev->header_ops || !dst) return 0; rcu_read_lock(); res = __teql_resolve(skb, skb_res, dev, txq, dst); rcu_read_unlock(); return res; } static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) { struct teql_master *master = netdev_priv(dev); struct Qdisc *start, *q; int busy; int nores; int subq = skb_get_queue_mapping(skb); struct sk_buff *skb_res = NULL; start = master->slaves; restart: nores = 0; busy = 0; q = start; if (!q) goto drop; do { struct net_device *slave = qdisc_dev(q); struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q) continue; if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) || !netif_running(slave)) { busy = 1; continue; } switch (teql_resolve(skb, skb_res, slave, slave_txq)) { case 0: if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); if (!netif_xmit_frozen_or_stopped(slave_txq) && netdev_start_xmit(skb, slave, slave_txq, false) == NETDEV_TX_OK) { __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->tx_packets++; master->tx_bytes += length; return NETDEV_TX_OK; } __netif_tx_unlock(slave_txq); } if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0))) busy = 1; break; case 1: master->slaves = NEXT_SLAVE(q); return NETDEV_TX_OK; default: nores = 1; break; } __skb_pull(skb, skb_network_offset(skb)); } while ((q = NEXT_SLAVE(q)) != start); if (nores && skb_res == NULL) { skb_res = skb; goto restart; } if (busy) { netif_stop_queue(dev); return NETDEV_TX_BUSY; } master->tx_errors++; drop: master->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } static int teql_master_open(struct net_device *dev) { struct Qdisc *q; struct teql_master *m = netdev_priv(dev); int mtu = 0xFFFE; unsigned int flags = IFF_NOARP | IFF_MULTICAST; if (m->slaves == NULL) return -EUNATCH; flags = FMASK; q = m->slaves; do { struct net_device *slave = qdisc_dev(q); if (slave == NULL) return -EUNATCH; if (slave->mtu < mtu) mtu = slave->mtu; if (slave->hard_header_len > LL_MAX_HEADER) return -EINVAL; /* If all the slaves are BROADCAST, master is BROADCAST If all the slaves are PtP, master is PtP Otherwise, master is NBMA. */ if (!(slave->flags&IFF_POINTOPOINT)) flags &= ~IFF_POINTOPOINT; if (!(slave->flags&IFF_BROADCAST)) flags &= ~IFF_BROADCAST; if (!(slave->flags&IFF_MULTICAST)) flags &= ~IFF_MULTICAST; } while ((q = NEXT_SLAVE(q)) != m->slaves); m->dev->mtu = mtu; m->dev->flags = (m->dev->flags&~FMASK) | flags; netif_start_queue(m->dev); return 0; } static int teql_master_close(struct net_device *dev) { netif_stop_queue(dev); return 0; } static void teql_master_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct teql_master *m = netdev_priv(dev); stats->tx_packets = m->tx_packets; stats->tx_bytes = m->tx_bytes; stats->tx_errors = m->tx_errors; stats->tx_dropped = m->tx_dropped; } static int teql_master_mtu(struct net_device *dev, int new_mtu) { struct teql_master *m = netdev_priv(dev); struct Qdisc *q; q = m->slaves; if (q) { do { if (new_mtu > qdisc_dev(q)->mtu) return -EINVAL; } while ((q = NEXT_SLAVE(q)) != m->slaves); } WRITE_ONCE(dev->mtu, new_mtu); return 0; } static const struct net_device_ops teql_netdev_ops = { .ndo_open = teql_master_open, .ndo_stop = teql_master_close, .ndo_start_xmit = teql_master_xmit, .ndo_get_stats64 = teql_master_stats64, .ndo_change_mtu = teql_master_mtu, }; static __init void teql_master_setup(struct net_device *dev) { struct teql_master *master = netdev_priv(dev); struct Qdisc_ops *ops = &master->qops; master->dev = dev; ops->priv_size = sizeof(struct teql_sched_data); ops->enqueue = teql_enqueue; ops->dequeue = teql_dequeue; ops->peek = teql_peek; ops->init = teql_qdisc_init; ops->reset = teql_reset; ops->destroy = teql_destroy; ops->owner = THIS_MODULE; dev->netdev_ops = &teql_netdev_ops; dev->type = ARPHRD_VOID; dev->mtu = 1500; dev->min_mtu = 68; dev->max_mtu = 65535; dev->tx_queue_len = 100; dev->flags = IFF_NOARP; dev->hard_header_len = LL_MAX_HEADER; netif_keep_dst(dev); } static LIST_HEAD(master_dev_list); static int max_equalizers = 1; module_param(max_equalizers, int, 0); MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers"); static int __init teql_init(void) { int i; int err = -ENODEV; for (i = 0; i < max_equalizers; i++) { struct net_device *dev; struct teql_master *master; dev = alloc_netdev(sizeof(struct teql_master), "teql%d", NET_NAME_UNKNOWN, teql_master_setup); if (!dev) { err = -ENOMEM; break; } if ((err = register_netdev(dev))) { free_netdev(dev); break; } master = netdev_priv(dev); strscpy(master->qops.id, dev->name, IFNAMSIZ); err = register_qdisc(&master->qops); if (err) { unregister_netdev(dev); free_netdev(dev); break; } list_add_tail(&master->master_list, &master_dev_list); } return i ? 0 : err; } static void __exit teql_exit(void) { struct teql_master *master, *nxt; list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) { list_del(&master->master_list); unregister_qdisc(&master->qops); unregister_netdev(master->dev); free_netdev(master->dev); } } module_init(teql_init); module_exit(teql_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc");
15 12 4 18 19 19 14 1 2 1 1 2 3 2 4 1 1 4 2 1 5 5 5 4 1 15 9 9 1 1 16 15 1 2 1 1 2 3 3 6 4 4 4 2 1 5 5 5 4 4 1 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 // SPDX-License-Identifier: GPL-2.0 /* * fs/ioprio.c * * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk> * * Helper functions for setting/querying io priorities of processes. The * system calls closely mimmick getpriority/setpriority, see the man page for * those. The prio argument is a composite of prio class and prio data, where * the data argument has meaning within that class. The standard scheduling * classes have 8 distinct prio levels, with 0 being the highest prio and 7 * being the lowest. * * IOW, setting BE scheduling class with prio 2 is done ala: * * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2; * * ioprio_set(PRIO_PROCESS, pid, prio); * * See also Documentation/block/ioprio.rst * */ #include <linux/gfp.h> #include <linux/kernel.h> #include <linux/ioprio.h> #include <linux/cred.h> #include <linux/blkdev.h> #include <linux/capability.h> #include <linux/syscalls.h> #include <linux/security.h> #include <linux/pid_namespace.h> int ioprio_check_cap(int ioprio) { int class = IOPRIO_PRIO_CLASS(ioprio); int level = IOPRIO_PRIO_LEVEL(ioprio); switch (class) { case IOPRIO_CLASS_RT: /* * Originally this only checked for CAP_SYS_ADMIN, * which was implicitly allowed for pid 0 by security * modules such as SELinux. Make sure we check * CAP_SYS_ADMIN first to avoid a denial/avc for * possibly missing CAP_SYS_NICE permission. */ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) return -EPERM; break; case IOPRIO_CLASS_BE: case IOPRIO_CLASS_IDLE: break; case IOPRIO_CLASS_NONE: if (level) return -EINVAL; break; case IOPRIO_CLASS_INVALID: default: return -EINVAL; } return 0; } SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) { struct task_struct *p, *g; struct user_struct *user; struct pid *pgrp; kuid_t uid; int ret; ret = ioprio_check_cap(ioprio); if (ret) return ret; ret = -ESRCH; rcu_read_lock(); switch (which) { case IOPRIO_WHO_PROCESS: if (!who) p = current; else p = find_task_by_vpid(who); if (p) ret = set_task_ioprio(p, ioprio); break; case IOPRIO_WHO_PGRP: if (!who) pgrp = task_pgrp(current); else pgrp = find_vpid(who); read_lock(&tasklist_lock); do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { ret = set_task_ioprio(p, ioprio); if (ret) { read_unlock(&tasklist_lock); goto out; } } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); read_unlock(&tasklist_lock); break; case IOPRIO_WHO_USER: uid = make_kuid(current_user_ns(), who); if (!uid_valid(uid)) break; if (!who) user = current_user(); else user = find_user(uid); if (!user) break; for_each_process_thread(g, p) { if (!uid_eq(task_uid(p), uid) || !task_pid_vnr(p)) continue; ret = set_task_ioprio(p, ioprio); if (ret) goto free_uid; } free_uid: if (who) free_uid(user); break; default: ret = -EINVAL; } out: rcu_read_unlock(); return ret; } static int get_task_ioprio(struct task_struct *p) { int ret; ret = security_task_getioprio(p); if (ret) goto out; task_lock(p); ret = __get_task_ioprio(p); task_unlock(p); out: return ret; } /* * Return raw IO priority value as set by userspace. We use this for * ioprio_get(pid, IOPRIO_WHO_PROCESS) so that we keep historical behavior and * also so that userspace can distinguish unset IO priority (which just gets * overriden based on task's nice value) from IO priority set to some value. */ static int get_task_raw_ioprio(struct task_struct *p) { int ret; ret = security_task_getioprio(p); if (ret) goto out; task_lock(p); if (p->io_context) ret = p->io_context->ioprio; else ret = IOPRIO_DEFAULT; task_unlock(p); out: return ret; } static int ioprio_best(unsigned short aprio, unsigned short bprio) { return min(aprio, bprio); } SYSCALL_DEFINE2(ioprio_get, int, which, int, who) { struct task_struct *g, *p; struct user_struct *user; struct pid *pgrp; kuid_t uid; int ret = -ESRCH; int tmpio; rcu_read_lock(); switch (which) { case IOPRIO_WHO_PROCESS: if (!who) p = current; else p = find_task_by_vpid(who); if (p) ret = get_task_raw_ioprio(p); break; case IOPRIO_WHO_PGRP: if (!who) pgrp = task_pgrp(current); else pgrp = find_vpid(who); read_lock(&tasklist_lock); do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { tmpio = get_task_ioprio(p); if (tmpio < 0) continue; if (ret == -ESRCH) ret = tmpio; else ret = ioprio_best(ret, tmpio); } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); read_unlock(&tasklist_lock); break; case IOPRIO_WHO_USER: uid = make_kuid(current_user_ns(), who); if (!who) user = current_user(); else user = find_user(uid); if (!user) break; for_each_process_thread(g, p) { if (!uid_eq(task_uid(p), user->uid) || !task_pid_vnr(p)) continue; tmpio = get_task_ioprio(p); if (tmpio < 0) continue; if (ret == -ESRCH) ret = tmpio; else ret = ioprio_best(ret, tmpio); } if (who) free_uid(user); break; default: ret = -EINVAL; } rcu_read_unlock(); return ret; }
15 16 20 19 21 20 5 20 14 15 15 15 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 /* * llc_core.c - Minimum needed routines for sap handling and module init/exit * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/module.h> #include <linux/interrupt.h> #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/init.h> #include <net/net_namespace.h> #include <net/llc.h> LIST_HEAD(llc_sap_list); static DEFINE_SPINLOCK(llc_sap_list_lock); /** * llc_sap_alloc - allocates and initializes sap. * * Allocates and initializes sap. */ static struct llc_sap *llc_sap_alloc(void) { struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC); int i; if (sap) { /* sap->laddr.mac - leave as a null, it's filled by bind */ sap->state = LLC_SAP_STATE_ACTIVE; spin_lock_init(&sap->sk_lock); for (i = 0; i < LLC_SK_LADDR_HASH_ENTRIES; i++) INIT_HLIST_NULLS_HEAD(&sap->sk_laddr_hash[i], i); refcount_set(&sap->refcnt, 1); } return sap; } static struct llc_sap *__llc_sap_find(unsigned char sap_value) { struct llc_sap *sap; list_for_each_entry(sap, &llc_sap_list, node) if (sap->laddr.lsap == sap_value) goto out; sap = NULL; out: return sap; } /** * llc_sap_find - searches a SAP in station * @sap_value: sap to be found * * Searches for a sap in the sap list of the LLC's station upon the sap ID. * If the sap is found it will be refcounted and the user will have to do * a llc_sap_put after use. * Returns the sap or %NULL if not found. */ struct llc_sap *llc_sap_find(unsigned char sap_value) { struct llc_sap *sap; rcu_read_lock_bh(); sap = __llc_sap_find(sap_value); if (!sap || !llc_sap_hold_safe(sap)) sap = NULL; rcu_read_unlock_bh(); return sap; } /** * llc_sap_open - open interface to the upper layers. * @lsap: SAP number. * @func: rcv func for datalink protos * * Interface function to upper layer. Each one who wants to get a SAP * (for example NetBEUI) should call this function. Returns the opened * SAP for success, NULL for failure. */ struct llc_sap *llc_sap_open(unsigned char lsap, int (*func)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)) { struct llc_sap *sap = NULL; spin_lock_bh(&llc_sap_list_lock); if (__llc_sap_find(lsap)) /* SAP already exists */ goto out; sap = llc_sap_alloc(); if (!sap) goto out; sap->laddr.lsap = lsap; sap->rcv_func = func; list_add_tail_rcu(&sap->node, &llc_sap_list); out: spin_unlock_bh(&llc_sap_list_lock); return sap; } /** * llc_sap_close - close interface for upper layers. * @sap: SAP to be closed. * * Close interface function to upper layer. Each one who wants to * close an open SAP (for example NetBEUI) should call this function. * Removes this sap from the list of saps in the station and then * frees the memory for this sap. */ void llc_sap_close(struct llc_sap *sap) { WARN_ON(sap->sk_count); spin_lock_bh(&llc_sap_list_lock); list_del_rcu(&sap->node); spin_unlock_bh(&llc_sap_list_lock); kfree_rcu(sap, rcu); } static struct packet_type llc_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_802_2), .func = llc_rcv, }; static int __init llc_init(void) { dev_add_pack(&llc_packet_type); return 0; } static void __exit llc_exit(void) { dev_remove_pack(&llc_packet_type); } module_init(llc_init); module_exit(llc_exit); EXPORT_SYMBOL(llc_sap_list); EXPORT_SYMBOL(llc_sap_find); EXPORT_SYMBOL(llc_sap_open); EXPORT_SYMBOL(llc_sap_close); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Procom 1997, Jay Schullist 2001, Arnaldo C. Melo 2001-2003"); MODULE_DESCRIPTION("LLC IEEE 802.2 core support");
1 3 76 78 77 77 76 357 45 197 224 353 246 192 15 358 300 300 299 156 2 137 296 5 5 5 4 1 1 4 1 2 2 71 1 70 3 115 291 8 222 7 119 2 15 294 4 290 293 10 297 295 112 209 22 6 301 11 62 118 92 118 92 1 85 84 89 102 57 3 289 42 293 1 2 294 100 287 74 57 20 59 13 3 73 72 63 10 2 7 1 14 1 2 1 1 10 4 1 8 7 7 2 2 12 45 29 1 15 4 4 4 2 2 2 4 1 3 12 12 73 2 63 21 2 2 5 87 1 17 73 21 9 12 50 50 50 71 52 14 27 63 26 21 24 24 17 6 1 15 23 1 10 4 3 4 4 1 3 2 9 1 9 7 4 2 2 2 7 7 4 1 2 1 3 3 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 // SPDX-License-Identifier: GPL-2.0-or-later /* Keyring handling * * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/export.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/security.h> #include <linux/seq_file.h> #include <linux/err.h> #include <linux/user_namespace.h> #include <linux/nsproxy.h> #include <keys/keyring-type.h> #include <keys/user-type.h> #include <linux/assoc_array_priv.h> #include <linux/uaccess.h> #include <net/net_namespace.h> #include "internal.h" /* * When plumbing the depths of the key tree, this sets a hard limit * set on how deep we're willing to go. */ #define KEYRING_SEARCH_MAX_DEPTH 6 /* * We mark pointers we pass to the associative array with bit 1 set if * they're keyrings and clear otherwise. */ #define KEYRING_PTR_SUBTYPE 0x2UL static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x) { return (unsigned long)x & KEYRING_PTR_SUBTYPE; } static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x) { void *object = assoc_array_ptr_to_leaf(x); return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE); } static inline void *keyring_key_to_ptr(struct key *key) { if (key->type == &key_type_keyring) return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE); return key; } static DEFINE_RWLOCK(keyring_name_lock); /* * Clean up the bits of user_namespace that belong to us. */ void key_free_user_ns(struct user_namespace *ns) { write_lock(&keyring_name_lock); list_del_init(&ns->keyring_name_list); write_unlock(&keyring_name_lock); key_put(ns->user_keyring_register); #ifdef CONFIG_PERSISTENT_KEYRINGS key_put(ns->persistent_keyring_register); #endif } /* * The keyring key type definition. Keyrings are simply keys of this type and * can be treated as ordinary keys in addition to having their own special * operations. */ static int keyring_preparse(struct key_preparsed_payload *prep); static void keyring_free_preparse(struct key_preparsed_payload *prep); static int keyring_instantiate(struct key *keyring, struct key_preparsed_payload *prep); static void keyring_revoke(struct key *keyring); static void keyring_destroy(struct key *keyring); static void keyring_describe(const struct key *keyring, struct seq_file *m); static long keyring_read(const struct key *keyring, char *buffer, size_t buflen); struct key_type key_type_keyring = { .name = "keyring", .def_datalen = 0, .preparse = keyring_preparse, .free_preparse = keyring_free_preparse, .instantiate = keyring_instantiate, .revoke = keyring_revoke, .destroy = keyring_destroy, .describe = keyring_describe, .read = keyring_read, }; EXPORT_SYMBOL(key_type_keyring); /* * Semaphore to serialise link/link calls to prevent two link calls in parallel * introducing a cycle. */ static DEFINE_MUTEX(keyring_serialise_link_lock); /* * Publish the name of a keyring so that it can be found by name (if it has * one and it doesn't begin with a dot). */ static void keyring_publish_name(struct key *keyring) { struct user_namespace *ns = current_user_ns(); if (keyring->description && keyring->description[0] && keyring->description[0] != '.') { write_lock(&keyring_name_lock); list_add_tail(&keyring->name_link, &ns->keyring_name_list); write_unlock(&keyring_name_lock); } } /* * Preparse a keyring payload */ static int keyring_preparse(struct key_preparsed_payload *prep) { return prep->datalen != 0 ? -EINVAL : 0; } /* * Free a preparse of a user defined key payload */ static void keyring_free_preparse(struct key_preparsed_payload *prep) { } /* * Initialise a keyring. * * Returns 0 on success, -EINVAL if given any data. */ static int keyring_instantiate(struct key *keyring, struct key_preparsed_payload *prep) { assoc_array_init(&keyring->keys); /* make the keyring available by name if it has one */ keyring_publish_name(keyring); return 0; } /* * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit. Ideally we'd * fold the carry back too, but that requires inline asm. */ static u64 mult_64x32_and_fold(u64 x, u32 y) { u64 hi = (u64)(u32)(x >> 32) * y; u64 lo = (u64)(u32)(x) * y; return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32); } /* * Hash a key type and description. */ static void hash_key_type_and_desc(struct keyring_index_key *index_key) { const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP; const unsigned long fan_mask = ASSOC_ARRAY_FAN_MASK; const char *description = index_key->description; unsigned long hash, type; u32 piece; u64 acc; int n, desc_len = index_key->desc_len; type = (unsigned long)index_key->type; acc = mult_64x32_and_fold(type, desc_len + 13); acc = mult_64x32_and_fold(acc, 9207); piece = (unsigned long)index_key->domain_tag; acc = mult_64x32_and_fold(acc, piece); acc = mult_64x32_and_fold(acc, 9207); for (;;) { n = desc_len; if (n <= 0) break; if (n > 4) n = 4; piece = 0; memcpy(&piece, description, n); description += n; desc_len -= n; acc = mult_64x32_and_fold(acc, piece); acc = mult_64x32_and_fold(acc, 9207); } /* Fold the hash down to 32 bits if need be. */ hash = acc; if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32) hash ^= acc >> 32; /* Squidge all the keyrings into a separate part of the tree to * ordinary keys by making sure the lowest level segment in the hash is * zero for keyrings and non-zero otherwise. */ if (index_key->type != &key_type_keyring && (hash & fan_mask) == 0) hash |= (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1; else if (index_key->type == &key_type_keyring && (hash & fan_mask) != 0) hash = (hash + (hash << level_shift)) & ~fan_mask; index_key->hash = hash; } /* * Finalise an index key to include a part of the description actually in the * index key, to set the domain tag and to calculate the hash. */ void key_set_index_key(struct keyring_index_key *index_key) { static struct key_tag default_domain_tag = { .usage = REFCOUNT_INIT(1), }; size_t n = min_t(size_t, index_key->desc_len, sizeof(index_key->desc)); memcpy(index_key->desc, index_key->description, n); if (!index_key->domain_tag) { if (index_key->type->flags & KEY_TYPE_NET_DOMAIN) index_key->domain_tag = current->nsproxy->net_ns->key_domain; else index_key->domain_tag = &default_domain_tag; } hash_key_type_and_desc(index_key); } /** * key_put_tag - Release a ref on a tag. * @tag: The tag to release. * * This releases a reference the given tag and returns true if that ref was the * last one. */ bool key_put_tag(struct key_tag *tag) { if (refcount_dec_and_test(&tag->usage)) { kfree_rcu(tag, rcu); return true; } return false; } /** * key_remove_domain - Kill off a key domain and gc its keys * @domain_tag: The domain tag to release. * * This marks a domain tag as being dead and releases a ref on it. If that * wasn't the last reference, the garbage collector is poked to try and delete * all keys that were in the domain. */ void key_remove_domain(struct key_tag *domain_tag) { domain_tag->removed = true; if (!key_put_tag(domain_tag)) key_schedule_gc_links(); } /* * Build the next index key chunk. * * We return it one word-sized chunk at a time. */ static unsigned long keyring_get_key_chunk(const void *data, int level) { const struct keyring_index_key *index_key = data; unsigned long chunk = 0; const u8 *d; int desc_len = index_key->desc_len, n = sizeof(chunk); level /= ASSOC_ARRAY_KEY_CHUNK_SIZE; switch (level) { case 0: return index_key->hash; case 1: return index_key->x; case 2: return (unsigned long)index_key->type; case 3: return (unsigned long)index_key->domain_tag; default: level -= 4; if (desc_len <= sizeof(index_key->desc)) return 0; d = index_key->description + sizeof(index_key->desc); d += level * sizeof(long); desc_len -= sizeof(index_key->desc); if (desc_len > n) desc_len = n; do { chunk <<= 8; chunk |= *d++; } while (--desc_len > 0); return chunk; } } static unsigned long keyring_get_object_key_chunk(const void *object, int level) { const struct key *key = keyring_ptr_to_key(object); return keyring_get_key_chunk(&key->index_key, level); } static bool keyring_compare_object(const void *object, const void *data) { const struct keyring_index_key *index_key = data; const struct key *key = keyring_ptr_to_key(object); return key->index_key.type == index_key->type && key->index_key.domain_tag == index_key->domain_tag && key->index_key.desc_len == index_key->desc_len && memcmp(key->index_key.description, index_key->description, index_key->desc_len) == 0; } /* * Compare the index keys of a pair of objects and determine the bit position * at which they differ - if they differ. */ static int keyring_diff_objects(const void *object, const void *data) { const struct key *key_a = keyring_ptr_to_key(object); const struct keyring_index_key *a = &key_a->index_key; const struct keyring_index_key *b = data; unsigned long seg_a, seg_b; int level, i; level = 0; seg_a = a->hash; seg_b = b->hash; if ((seg_a ^ seg_b) != 0) goto differ; level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8; /* The number of bits contributed by the hash is controlled by a * constant in the assoc_array headers. Everything else thereafter we * can deal with as being machine word-size dependent. */ seg_a = a->x; seg_b = b->x; if ((seg_a ^ seg_b) != 0) goto differ; level += sizeof(unsigned long); /* The next bit may not work on big endian */ seg_a = (unsigned long)a->type; seg_b = (unsigned long)b->type; if ((seg_a ^ seg_b) != 0) goto differ; level += sizeof(unsigned long); seg_a = (unsigned long)a->domain_tag; seg_b = (unsigned long)b->domain_tag; if ((seg_a ^ seg_b) != 0) goto differ; level += sizeof(unsigned long); i = sizeof(a->desc); if (a->desc_len <= i) goto same; for (; i < a->desc_len; i++) { seg_a = *(unsigned char *)(a->description + i); seg_b = *(unsigned char *)(b->description + i); if ((seg_a ^ seg_b) != 0) goto differ_plus_i; } same: return -1; differ_plus_i: level += i; differ: i = level * 8 + __ffs(seg_a ^ seg_b); return i; } /* * Free an object after stripping the keyring flag off of the pointer. */ static void keyring_free_object(void *object) { key_put(keyring_ptr_to_key(object)); } /* * Operations for keyring management by the index-tree routines. */ static const struct assoc_array_ops keyring_assoc_array_ops = { .get_key_chunk = keyring_get_key_chunk, .get_object_key_chunk = keyring_get_object_key_chunk, .compare_object = keyring_compare_object, .diff_objects = keyring_diff_objects, .free_object = keyring_free_object, }; /* * Clean up a keyring when it is destroyed. Unpublish its name if it had one * and dispose of its data. * * The garbage collector detects the final key_put(), removes the keyring from * the serial number tree and then does RCU synchronisation before coming here, * so we shouldn't need to worry about code poking around here with the RCU * readlock held by this time. */ static void keyring_destroy(struct key *keyring) { if (keyring->description) { write_lock(&keyring_name_lock); if (keyring->name_link.next != NULL && !list_empty(&keyring->name_link)) list_del(&keyring->name_link); write_unlock(&keyring_name_lock); } if (keyring->restrict_link) { struct key_restriction *keyres = keyring->restrict_link; key_put(keyres->key); kfree(keyres); } assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops); } /* * Describe a keyring for /proc. */ static void keyring_describe(const struct key *keyring, struct seq_file *m) { if (keyring->description) seq_puts(m, keyring->description); else seq_puts(m, "[anon]"); if (key_is_positive(keyring)) { if (keyring->keys.nr_leaves_on_tree != 0) seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree); else seq_puts(m, ": empty"); } } struct keyring_read_iterator_context { size_t buflen; size_t count; key_serial_t *buffer; }; static int keyring_read_iterator(const void *object, void *data) { struct keyring_read_iterator_context *ctx = data; const struct key *key = keyring_ptr_to_key(object); kenter("{%s,%d},,{%zu/%zu}", key->type->name, key->serial, ctx->count, ctx->buflen); if (ctx->count >= ctx->buflen) return 1; *ctx->buffer++ = key->serial; ctx->count += sizeof(key->serial); return 0; } /* * Read a list of key IDs from the keyring's contents in binary form * * The keyring's semaphore is read-locked by the caller. This prevents someone * from modifying it under us - which could cause us to read key IDs multiple * times. */ static long keyring_read(const struct key *keyring, char *buffer, size_t buflen) { struct keyring_read_iterator_context ctx; long ret; kenter("{%d},,%zu", key_serial(keyring), buflen); if (buflen & (sizeof(key_serial_t) - 1)) return -EINVAL; /* Copy as many key IDs as fit into the buffer */ if (buffer && buflen) { ctx.buffer = (key_serial_t *)buffer; ctx.buflen = buflen; ctx.count = 0; ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx); if (ret < 0) { kleave(" = %ld [iterate]", ret); return ret; } } /* Return the size of the buffer needed */ ret = keyring->keys.nr_leaves_on_tree * sizeof(key_serial_t); if (ret <= buflen) kleave("= %ld [ok]", ret); else kleave("= %ld [buffer too small]", ret); return ret; } /* * Allocate a keyring and link into the destination keyring. */ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, const struct cred *cred, key_perm_t perm, unsigned long flags, struct key_restriction *restrict_link, struct key *dest) { struct key *keyring; int ret; keyring = key_alloc(&key_type_keyring, description, uid, gid, cred, perm, flags, restrict_link); if (!IS_ERR(keyring)) { ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL); if (ret < 0) { key_put(keyring); keyring = ERR_PTR(ret); } } return keyring; } EXPORT_SYMBOL(keyring_alloc); /** * restrict_link_reject - Give -EPERM to restrict link * @keyring: The keyring being added to. * @type: The type of key being added. * @payload: The payload of the key intended to be added. * @restriction_key: Keys providing additional data for evaluating restriction. * * Reject the addition of any links to a keyring. It can be overridden by * passing KEY_ALLOC_BYPASS_RESTRICTION to key_instantiate_and_link() when * adding a key to a keyring. * * This is meant to be stored in a key_restriction structure which is passed * in the restrict_link parameter to keyring_alloc(). */ int restrict_link_reject(struct key *keyring, const struct key_type *type, const union key_payload *payload, struct key *restriction_key) { return -EPERM; } /* * By default, we keys found by getting an exact match on their descriptions. */ bool key_default_cmp(const struct key *key, const struct key_match_data *match_data) { return strcmp(key->description, match_data->raw_data) == 0; } /* * Iteration function to consider each key found. */ static int keyring_search_iterator(const void *object, void *iterator_data) { struct keyring_search_context *ctx = iterator_data; const struct key *key = keyring_ptr_to_key(object); unsigned long kflags = READ_ONCE(key->flags); short state = READ_ONCE(key->state); kenter("{%d}", key->serial); /* ignore keys not of this type */ if (key->type != ctx->index_key.type) { kleave(" = 0 [!type]"); return 0; } /* skip invalidated, revoked and expired keys */ if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { time64_t expiry = READ_ONCE(key->expiry); if (kflags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED))) { ctx->result = ERR_PTR(-EKEYREVOKED); kleave(" = %d [invrev]", ctx->skipped_ret); goto skipped; } if (expiry && ctx->now >= expiry) { if (!(ctx->flags & KEYRING_SEARCH_SKIP_EXPIRED)) ctx->result = ERR_PTR(-EKEYEXPIRED); kleave(" = %d [expire]", ctx->skipped_ret); goto skipped; } } /* keys that don't match */ if (!ctx->match_data.cmp(key, &ctx->match_data)) { kleave(" = 0 [!match]"); return 0; } /* key must have search permissions */ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && key_task_permission(make_key_ref(key, ctx->possessed), ctx->cred, KEY_NEED_SEARCH) < 0) { ctx->result = ERR_PTR(-EACCES); kleave(" = %d [!perm]", ctx->skipped_ret); goto skipped; } if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { /* we set a different error code if we pass a negative key */ if (state < 0) { ctx->result = ERR_PTR(state); kleave(" = %d [neg]", ctx->skipped_ret); goto skipped; } } /* Found */ ctx->result = make_key_ref(key, ctx->possessed); kleave(" = 1 [found]"); return 1; skipped: return ctx->skipped_ret; } /* * Search inside a keyring for a key. We can search by walking to it * directly based on its index-key or we can iterate over the entire * tree looking for it, based on the match function. */ static int search_keyring(struct key *keyring, struct keyring_search_context *ctx) { if (ctx->match_data.lookup_type == KEYRING_SEARCH_LOOKUP_DIRECT) { const void *object; object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, &ctx->index_key); return object ? ctx->iterator(object, ctx) : 0; } return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx); } /* * Search a tree of keyrings that point to other keyrings up to the maximum * depth. */ static bool search_nested_keyrings(struct key *keyring, struct keyring_search_context *ctx) { struct { struct key *keyring; struct assoc_array_node *node; int slot; } stack[KEYRING_SEARCH_MAX_DEPTH]; struct assoc_array_shortcut *shortcut; struct assoc_array_node *node; struct assoc_array_ptr *ptr; struct key *key; int sp = 0, slot; kenter("{%d},{%s,%s}", keyring->serial, ctx->index_key.type->name, ctx->index_key.description); #define STATE_CHECKS (KEYRING_SEARCH_NO_STATE_CHECK | KEYRING_SEARCH_DO_STATE_CHECK) BUG_ON((ctx->flags & STATE_CHECKS) == 0 || (ctx->flags & STATE_CHECKS) == STATE_CHECKS); if (ctx->index_key.description) key_set_index_key(&ctx->index_key); /* Check to see if this top-level keyring is what we are looking for * and whether it is valid or not. */ if (ctx->match_data.lookup_type == KEYRING_SEARCH_LOOKUP_ITERATE || keyring_compare_object(keyring, &ctx->index_key)) { ctx->skipped_ret = 2; switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) { case 1: goto found; case 2: return false; default: break; } } ctx->skipped_ret = 0; /* Start processing a new keyring */ descend_to_keyring: kdebug("descend to %d", keyring->serial); if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED))) goto not_this_keyring; /* Search through the keys in this keyring before its searching its * subtrees. */ if (search_keyring(keyring, ctx)) goto found; /* Then manually iterate through the keyrings nested in this one. * * Start from the root node of the index tree. Because of the way the * hash function has been set up, keyrings cluster on the leftmost * branch of the root node (root slot 0) or in the root node itself. * Non-keyrings avoid the leftmost branch of the root entirely (root * slots 1-15). */ if (!(ctx->flags & KEYRING_SEARCH_RECURSE)) goto not_this_keyring; ptr = READ_ONCE(keyring->keys.root); if (!ptr) goto not_this_keyring; if (assoc_array_ptr_is_shortcut(ptr)) { /* If the root is a shortcut, either the keyring only contains * keyring pointers (everything clusters behind root slot 0) or * doesn't contain any keyring pointers. */ shortcut = assoc_array_ptr_to_shortcut(ptr); if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0) goto not_this_keyring; ptr = READ_ONCE(shortcut->next_node); node = assoc_array_ptr_to_node(ptr); goto begin_node; } node = assoc_array_ptr_to_node(ptr); ptr = node->slots[0]; if (!assoc_array_ptr_is_meta(ptr)) goto begin_node; descend_to_node: /* Descend to a more distal node in this keyring's content tree and go * through that. */ kdebug("descend"); if (assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); ptr = READ_ONCE(shortcut->next_node); BUG_ON(!assoc_array_ptr_is_node(ptr)); } node = assoc_array_ptr_to_node(ptr); begin_node: kdebug("begin_node"); slot = 0; ascend_to_node: /* Go through the slots in a node */ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); if (assoc_array_ptr_is_meta(ptr)) { if (node->back_pointer || assoc_array_ptr_is_shortcut(ptr)) goto descend_to_node; } if (!keyring_ptr_is_keyring(ptr)) continue; key = keyring_ptr_to_key(ptr); if (sp >= KEYRING_SEARCH_MAX_DEPTH) { if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) { ctx->result = ERR_PTR(-ELOOP); return false; } goto not_this_keyring; } /* Search a nested keyring */ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && key_task_permission(make_key_ref(key, ctx->possessed), ctx->cred, KEY_NEED_SEARCH) < 0) continue; /* stack the current position */ stack[sp].keyring = keyring; stack[sp].node = node; stack[sp].slot = slot; sp++; /* begin again with the new keyring */ keyring = key; goto descend_to_keyring; } /* We've dealt with all the slots in the current node, so now we need * to ascend to the parent and continue processing there. */ ptr = READ_ONCE(node->back_pointer); slot = node->parent_slot; if (ptr && assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); ptr = READ_ONCE(shortcut->back_pointer); slot = shortcut->parent_slot; } if (!ptr) goto not_this_keyring; node = assoc_array_ptr_to_node(ptr); slot++; /* If we've ascended to the root (zero backpointer), we must have just * finished processing the leftmost branch rather than the root slots - * so there can't be any more keyrings for us to find. */ if (node->back_pointer) { kdebug("ascend %d", slot); goto ascend_to_node; } /* The keyring we're looking at was disqualified or didn't contain a * matching key. */ not_this_keyring: kdebug("not_this_keyring %d", sp); if (sp <= 0) { kleave(" = false"); return false; } /* Resume the processing of a keyring higher up in the tree */ sp--; keyring = stack[sp].keyring; node = stack[sp].node; slot = stack[sp].slot + 1; kdebug("ascend to %d [%d]", keyring->serial, slot); goto ascend_to_node; /* We found a viable match */ found: key = key_ref_to_ptr(ctx->result); key_check(key); if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) { key->last_used_at = ctx->now; keyring->last_used_at = ctx->now; while (sp > 0) stack[--sp].keyring->last_used_at = ctx->now; } kleave(" = true"); return true; } /** * keyring_search_rcu - Search a keyring tree for a matching key under RCU * @keyring_ref: A pointer to the keyring with possession indicator. * @ctx: The keyring search context. * * Search the supplied keyring tree for a key that matches the criteria given. * The root keyring and any linked keyrings must grant Search permission to the * caller to be searchable and keys can only be found if they too grant Search * to the caller. The possession flag on the root keyring pointer controls use * of the possessor bits in permissions checking of the entire tree. In * addition, the LSM gets to forbid keyring searches and key matches. * * The search is performed as a breadth-then-depth search up to the prescribed * limit (KEYRING_SEARCH_MAX_DEPTH). The caller must hold the RCU read lock to * prevent keyrings from being destroyed or rearranged whilst they are being * searched. * * Keys are matched to the type provided and are then filtered by the match * function, which is given the description to use in any way it sees fit. The * match function may use any attributes of a key that it wishes to * determine the match. Normally the match function from the key type would be * used. * * RCU can be used to prevent the keyring key lists from disappearing without * the need to take lots of locks. * * Returns a pointer to the found key and increments the key usage count if * successful; -EAGAIN if no matching keys were found, or if expired or revoked * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the * specified keyring wasn't a keyring. * * In the case of a successful return, the possession attribute from * @keyring_ref is propagated to the returned key reference. */ key_ref_t keyring_search_rcu(key_ref_t keyring_ref, struct keyring_search_context *ctx) { struct key *keyring; long err; ctx->iterator = keyring_search_iterator; ctx->possessed = is_key_possessed(keyring_ref); ctx->result = ERR_PTR(-EAGAIN); keyring = key_ref_to_ptr(keyring_ref); key_check(keyring); if (keyring->type != &key_type_keyring) return ERR_PTR(-ENOTDIR); if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) { err = key_task_permission(keyring_ref, ctx->cred, KEY_NEED_SEARCH); if (err < 0) return ERR_PTR(err); } ctx->now = ktime_get_real_seconds(); if (search_nested_keyrings(keyring, ctx)) __key_get(key_ref_to_ptr(ctx->result)); return ctx->result; } /** * keyring_search - Search the supplied keyring tree for a matching key * @keyring: The root of the keyring tree to be searched. * @type: The type of keyring we want to find. * @description: The name of the keyring we want to find. * @recurse: True to search the children of @keyring also * * As keyring_search_rcu() above, but using the current task's credentials and * type's default matching function and preferred search method. */ key_ref_t keyring_search(key_ref_t keyring, struct key_type *type, const char *description, bool recurse) { struct keyring_search_context ctx = { .index_key.type = type, .index_key.description = description, .index_key.desc_len = strlen(description), .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .flags = KEYRING_SEARCH_DO_STATE_CHECK, }; key_ref_t key; int ret; if (recurse) ctx.flags |= KEYRING_SEARCH_RECURSE; if (type->match_preparse) { ret = type->match_preparse(&ctx.match_data); if (ret < 0) return ERR_PTR(ret); } rcu_read_lock(); key = keyring_search_rcu(keyring, &ctx); rcu_read_unlock(); if (type->match_free) type->match_free(&ctx.match_data); return key; } EXPORT_SYMBOL(keyring_search); static struct key_restriction *keyring_restriction_alloc( key_restrict_link_func_t check) { struct key_restriction *keyres = kzalloc(sizeof(struct key_restriction), GFP_KERNEL); if (!keyres) return ERR_PTR(-ENOMEM); keyres->check = check; return keyres; } /* * Semaphore to serialise restriction setup to prevent reference count * cycles through restriction key pointers. */ static DECLARE_RWSEM(keyring_serialise_restrict_sem); /* * Check for restriction cycles that would prevent keyring garbage collection. * keyring_serialise_restrict_sem must be held. */ static bool keyring_detect_restriction_cycle(const struct key *dest_keyring, struct key_restriction *keyres) { while (keyres && keyres->key && keyres->key->type == &key_type_keyring) { if (keyres->key == dest_keyring) return true; keyres = keyres->key->restrict_link; } return false; } /** * keyring_restrict - Look up and apply a restriction to a keyring * @keyring_ref: The keyring to be restricted * @type: The key type that will provide the restriction checker. * @restriction: The restriction options to apply to the keyring * * Look up a keyring and apply a restriction to it. The restriction is managed * by the specific key type, but can be configured by the options specified in * the restriction string. */ int keyring_restrict(key_ref_t keyring_ref, const char *type, const char *restriction) { struct key *keyring; struct key_type *restrict_type = NULL; struct key_restriction *restrict_link; int ret = 0; keyring = key_ref_to_ptr(keyring_ref); key_check(keyring); if (keyring->type != &key_type_keyring) return -ENOTDIR; if (!type) { restrict_link = keyring_restriction_alloc(restrict_link_reject); } else { restrict_type = key_type_lookup(type); if (IS_ERR(restrict_type)) return PTR_ERR(restrict_type); if (!restrict_type->lookup_restriction) { ret = -ENOENT; goto error; } restrict_link = restrict_type->lookup_restriction(restriction); } if (IS_ERR(restrict_link)) { ret = PTR_ERR(restrict_link); goto error; } down_write(&keyring->sem); down_write(&keyring_serialise_restrict_sem); if (keyring->restrict_link) { ret = -EEXIST; } else if (keyring_detect_restriction_cycle(keyring, restrict_link)) { ret = -EDEADLK; } else { keyring->restrict_link = restrict_link; notify_key(keyring, NOTIFY_KEY_SETATTR, 0); } up_write(&keyring_serialise_restrict_sem); up_write(&keyring->sem); if (ret < 0) { key_put(restrict_link->key); kfree(restrict_link); } error: if (restrict_type) key_type_put(restrict_type); return ret; } EXPORT_SYMBOL(keyring_restrict); /* * Search the given keyring for a key that might be updated. * * The caller must guarantee that the keyring is a keyring and that the * permission is granted to modify the keyring as no check is made here. The * caller must also hold a lock on the keyring semaphore. * * Returns a pointer to the found key with usage count incremented if * successful and returns NULL if not found. Revoked and invalidated keys are * skipped over. * * If successful, the possession indicator is propagated from the keyring ref * to the returned key reference. */ key_ref_t find_key_to_update(key_ref_t keyring_ref, const struct keyring_index_key *index_key) { struct key *keyring, *key; const void *object; keyring = key_ref_to_ptr(keyring_ref); kenter("{%d},{%s,%s}", keyring->serial, index_key->type->name, index_key->description); object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, index_key); if (object) goto found; kleave(" = NULL"); return NULL; found: key = keyring_ptr_to_key(object); if (key->flags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED))) { kleave(" = NULL [x]"); return NULL; } __key_get(key); kleave(" = {%d}", key->serial); return make_key_ref(key, is_key_possessed(keyring_ref)); } /* * Find a keyring with the specified name. * * Only keyrings that have nonzero refcount, are not revoked, and are owned by a * user in the current user namespace are considered. If @uid_keyring is %true, * the keyring additionally must have been allocated as a user or user session * keyring; otherwise, it must grant Search permission directly to the caller. * * Returns a pointer to the keyring with the keyring's refcount having being * incremented on success. -ENOKEY is returned if a key could not be found. */ struct key *find_keyring_by_name(const char *name, bool uid_keyring) { struct user_namespace *ns = current_user_ns(); struct key *keyring; if (!name) return ERR_PTR(-EINVAL); read_lock(&keyring_name_lock); /* Search this hash bucket for a keyring with a matching name that * grants Search permission and that hasn't been revoked */ list_for_each_entry(keyring, &ns->keyring_name_list, name_link) { if (!kuid_has_mapping(ns, keyring->user->uid)) continue; if (test_bit(KEY_FLAG_REVOKED, &keyring->flags)) continue; if (strcmp(keyring->description, name) != 0) continue; if (uid_keyring) { if (!test_bit(KEY_FLAG_UID_KEYRING, &keyring->flags)) continue; } else { if (key_permission(make_key_ref(keyring, 0), KEY_NEED_SEARCH) < 0) continue; } /* we've got a match but we might end up racing with * key_cleanup() if the keyring is currently 'dead' * (ie. it has a zero usage count) */ if (!refcount_inc_not_zero(&keyring->usage)) continue; keyring->last_used_at = ktime_get_real_seconds(); goto out; } keyring = ERR_PTR(-ENOKEY); out: read_unlock(&keyring_name_lock); return keyring; } static int keyring_detect_cycle_iterator(const void *object, void *iterator_data) { struct keyring_search_context *ctx = iterator_data; const struct key *key = keyring_ptr_to_key(object); kenter("{%d}", key->serial); /* We might get a keyring with matching index-key that is nonetheless a * different keyring. */ if (key != ctx->match_data.raw_data) return 0; ctx->result = ERR_PTR(-EDEADLK); return 1; } /* * See if a cycle will be created by inserting acyclic tree B in acyclic * tree A at the topmost level (ie: as a direct child of A). * * Since we are adding B to A at the top level, checking for cycles should just * be a matter of seeing if node A is somewhere in tree B. */ static int keyring_detect_cycle(struct key *A, struct key *B) { struct keyring_search_context ctx = { .index_key = A->index_key, .match_data.raw_data = A, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .iterator = keyring_detect_cycle_iterator, .flags = (KEYRING_SEARCH_NO_STATE_CHECK | KEYRING_SEARCH_NO_UPDATE_TIME | KEYRING_SEARCH_NO_CHECK_PERM | KEYRING_SEARCH_DETECT_TOO_DEEP | KEYRING_SEARCH_RECURSE), }; rcu_read_lock(); search_nested_keyrings(B, &ctx); rcu_read_unlock(); return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result); } /* * Lock keyring for link. */ int __key_link_lock(struct key *keyring, const struct keyring_index_key *index_key) __acquires(&keyring->sem) __acquires(&keyring_serialise_link_lock) { if (keyring->type != &key_type_keyring) return -ENOTDIR; down_write(&keyring->sem); /* Serialise link/link calls to prevent parallel calls causing a cycle * when linking two keyring in opposite orders. */ if (index_key->type == &key_type_keyring) mutex_lock(&keyring_serialise_link_lock); return 0; } /* * Lock keyrings for move (link/unlink combination). */ int __key_move_lock(struct key *l_keyring, struct key *u_keyring, const struct keyring_index_key *index_key) __acquires(&l_keyring->sem) __acquires(&u_keyring->sem) __acquires(&keyring_serialise_link_lock) { if (l_keyring->type != &key_type_keyring || u_keyring->type != &key_type_keyring) return -ENOTDIR; /* We have to be very careful here to take the keyring locks in the * right order, lest we open ourselves to deadlocking against another * move operation. */ if (l_keyring < u_keyring) { down_write(&l_keyring->sem); down_write_nested(&u_keyring->sem, 1); } else { down_write(&u_keyring->sem); down_write_nested(&l_keyring->sem, 1); } /* Serialise link/link calls to prevent parallel calls causing a cycle * when linking two keyring in opposite orders. */ if (index_key->type == &key_type_keyring) mutex_lock(&keyring_serialise_link_lock); return 0; } /* * Preallocate memory so that a key can be linked into to a keyring. */ int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key, struct assoc_array_edit **_edit) { struct assoc_array_edit *edit; int ret; kenter("%d,%s,%s,", keyring->serial, index_key->type->name, index_key->description); BUG_ON(index_key->desc_len == 0); BUG_ON(*_edit != NULL); *_edit = NULL; ret = -EKEYREVOKED; if (test_bit(KEY_FLAG_REVOKED, &keyring->flags)) goto error; /* Create an edit script that will insert/replace the key in the * keyring tree. */ edit = assoc_array_insert(&keyring->keys, &keyring_assoc_array_ops, index_key, NULL); if (IS_ERR(edit)) { ret = PTR_ERR(edit); goto error; } /* If we're not replacing a link in-place then we're going to need some * extra quota. */ if (!edit->dead_leaf) { ret = key_payload_reserve(keyring, keyring->datalen + KEYQUOTA_LINK_BYTES); if (ret < 0) goto error_cancel; } *_edit = edit; kleave(" = 0"); return 0; error_cancel: assoc_array_cancel_edit(edit); error: kleave(" = %d", ret); return ret; } /* * Check already instantiated keys aren't going to be a problem. * * The caller must have called __key_link_begin(). Don't need to call this for * keys that were created since __key_link_begin() was called. */ int __key_link_check_live_key(struct key *keyring, struct key *key) { if (key->type == &key_type_keyring) /* check that we aren't going to create a cycle by linking one * keyring to another */ return keyring_detect_cycle(keyring, key); return 0; } /* * Link a key into to a keyring. * * Must be called with __key_link_begin() having being called. Discards any * already extant link to matching key if there is one, so that each keyring * holds at most one link to any given key of a particular type+description * combination. */ void __key_link(struct key *keyring, struct key *key, struct assoc_array_edit **_edit) { __key_get(key); assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key)); assoc_array_apply_edit(*_edit); *_edit = NULL; notify_key(keyring, NOTIFY_KEY_LINKED, key_serial(key)); } /* * Finish linking a key into to a keyring. * * Must be called with __key_link_begin() having being called. */ void __key_link_end(struct key *keyring, const struct keyring_index_key *index_key, struct assoc_array_edit *edit) __releases(&keyring->sem) __releases(&keyring_serialise_link_lock) { BUG_ON(index_key->type == NULL); kenter("%d,%s,", keyring->serial, index_key->type->name); if (edit) { if (!edit->dead_leaf) { key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES); } assoc_array_cancel_edit(edit); } up_write(&keyring->sem); if (index_key->type == &key_type_keyring) mutex_unlock(&keyring_serialise_link_lock); } /* * Check addition of keys to restricted keyrings. */ static int __key_link_check_restriction(struct key *keyring, struct key *key) { if (!keyring->restrict_link || !keyring->restrict_link->check) return 0; return keyring->restrict_link->check(keyring, key->type, &key->payload, keyring->restrict_link->key); } /** * key_link - Link a key to a keyring * @keyring: The keyring to make the link in. * @key: The key to link to. * * Make a link in a keyring to a key, such that the keyring holds a reference * on that key and the key can potentially be found by searching that keyring. * * This function will write-lock the keyring's semaphore and will consume some * of the user's key data quota to hold the link. * * Returns 0 if successful, -ENOTDIR if the keyring isn't a keyring, * -EKEYREVOKED if the keyring has been revoked, -ENFILE if the keyring is * full, -EDQUOT if there is insufficient key data quota remaining to add * another link or -ENOMEM if there's insufficient memory. * * It is assumed that the caller has checked that it is permitted for a link to * be made (the keyring should have Write permission and the key Link * permission). */ int key_link(struct key *keyring, struct key *key) { struct assoc_array_edit *edit = NULL; int ret; kenter("{%d,%d}", keyring->serial, refcount_read(&keyring->usage)); key_check(keyring); key_check(key); ret = __key_link_lock(keyring, &key->index_key); if (ret < 0) goto error; ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret < 0) goto error_end; kdebug("begun {%d,%d}", keyring->serial, refcount_read(&keyring->usage)); ret = __key_link_check_restriction(keyring, key); if (ret == 0) ret = __key_link_check_live_key(keyring, key); if (ret == 0) __key_link(keyring, key, &edit); error_end: __key_link_end(keyring, &key->index_key, edit); error: kleave(" = %d {%d,%d}", ret, keyring->serial, refcount_read(&keyring->usage)); return ret; } EXPORT_SYMBOL(key_link); /* * Lock a keyring for unlink. */ static int __key_unlink_lock(struct key *keyring) __acquires(&keyring->sem) { if (keyring->type != &key_type_keyring) return -ENOTDIR; down_write(&keyring->sem); return 0; } /* * Begin the process of unlinking a key from a keyring. */ static int __key_unlink_begin(struct key *keyring, struct key *key, struct assoc_array_edit **_edit) { struct assoc_array_edit *edit; BUG_ON(*_edit != NULL); edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops, &key->index_key); if (IS_ERR(edit)) return PTR_ERR(edit); if (!edit) return -ENOENT; *_edit = edit; return 0; } /* * Apply an unlink change. */ static void __key_unlink(struct key *keyring, struct key *key, struct assoc_array_edit **_edit) { assoc_array_apply_edit(*_edit); notify_key(keyring, NOTIFY_KEY_UNLINKED, key_serial(key)); *_edit = NULL; key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES); } /* * Finish unlinking a key from to a keyring. */ static void __key_unlink_end(struct key *keyring, struct key *key, struct assoc_array_edit *edit) __releases(&keyring->sem) { if (edit) assoc_array_cancel_edit(edit); up_write(&keyring->sem); } /** * key_unlink - Unlink the first link to a key from a keyring. * @keyring: The keyring to remove the link from. * @key: The key the link is to. * * Remove a link from a keyring to a key. * * This function will write-lock the keyring's semaphore. * * Returns 0 if successful, -ENOTDIR if the keyring isn't a keyring, -ENOENT if * the key isn't linked to by the keyring or -ENOMEM if there's insufficient * memory. * * It is assumed that the caller has checked that it is permitted for a link to * be removed (the keyring should have Write permission; no permissions are * required on the key). */ int key_unlink(struct key *keyring, struct key *key) { struct assoc_array_edit *edit = NULL; int ret; key_check(keyring); key_check(key); ret = __key_unlink_lock(keyring); if (ret < 0) return ret; ret = __key_unlink_begin(keyring, key, &edit); if (ret == 0) __key_unlink(keyring, key, &edit); __key_unlink_end(keyring, key, edit); return ret; } EXPORT_SYMBOL(key_unlink); /** * key_move - Move a key from one keyring to another * @key: The key to move * @from_keyring: The keyring to remove the link from. * @to_keyring: The keyring to make the link in. * @flags: Qualifying flags, such as KEYCTL_MOVE_EXCL. * * Make a link in @to_keyring to a key, such that the keyring holds a reference * on that key and the key can potentially be found by searching that keyring * whilst simultaneously removing a link to the key from @from_keyring. * * This function will write-lock both keyring's semaphores and will consume * some of the user's key data quota to hold the link on @to_keyring. * * Returns 0 if successful, -ENOTDIR if either keyring isn't a keyring, * -EKEYREVOKED if either keyring has been revoked, -ENFILE if the second * keyring is full, -EDQUOT if there is insufficient key data quota remaining * to add another link or -ENOMEM if there's insufficient memory. If * KEYCTL_MOVE_EXCL is set, then -EEXIST will be returned if there's already a * matching key in @to_keyring. * * It is assumed that the caller has checked that it is permitted for a link to * be made (the keyring should have Write permission and the key Link * permission). */ int key_move(struct key *key, struct key *from_keyring, struct key *to_keyring, unsigned int flags) { struct assoc_array_edit *from_edit = NULL, *to_edit = NULL; int ret; kenter("%d,%d,%d", key->serial, from_keyring->serial, to_keyring->serial); if (from_keyring == to_keyring) return 0; key_check(key); key_check(from_keyring); key_check(to_keyring); ret = __key_move_lock(from_keyring, to_keyring, &key->index_key); if (ret < 0) goto out; ret = __key_unlink_begin(from_keyring, key, &from_edit); if (ret < 0) goto error; ret = __key_link_begin(to_keyring, &key->index_key, &to_edit); if (ret < 0) goto error; ret = -EEXIST; if (to_edit->dead_leaf && (flags & KEYCTL_MOVE_EXCL)) goto error; ret = __key_link_check_restriction(to_keyring, key); if (ret < 0) goto error; ret = __key_link_check_live_key(to_keyring, key); if (ret < 0) goto error; __key_unlink(from_keyring, key, &from_edit); __key_link(to_keyring, key, &to_edit); error: __key_link_end(to_keyring, &key->index_key, to_edit); __key_unlink_end(from_keyring, key, from_edit); out: kleave(" = %d", ret); return ret; } EXPORT_SYMBOL(key_move); /** * keyring_clear - Clear a keyring * @keyring: The keyring to clear. * * Clear the contents of the specified keyring. * * Returns 0 if successful or -ENOTDIR if the keyring isn't a keyring. */ int keyring_clear(struct key *keyring) { struct assoc_array_edit *edit; int ret; if (keyring->type != &key_type_keyring) return -ENOTDIR; down_write(&keyring->sem); edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); if (IS_ERR(edit)) { ret = PTR_ERR(edit); } else { if (edit) assoc_array_apply_edit(edit); notify_key(keyring, NOTIFY_KEY_CLEARED, 0); key_payload_reserve(keyring, 0); ret = 0; } up_write(&keyring->sem); return ret; } EXPORT_SYMBOL(keyring_clear); /* * Dispose of the links from a revoked keyring. * * This is called with the key sem write-locked. */ static void keyring_revoke(struct key *keyring) { struct assoc_array_edit *edit; edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); if (!IS_ERR(edit)) { if (edit) assoc_array_apply_edit(edit); key_payload_reserve(keyring, 0); } } static bool keyring_gc_select_iterator(void *object, void *iterator_data) { struct key *key = keyring_ptr_to_key(object); time64_t *limit = iterator_data; if (key_is_dead(key, *limit)) return false; key_get(key); return true; } static int keyring_gc_check_iterator(const void *object, void *iterator_data) { const struct key *key = keyring_ptr_to_key(object); time64_t *limit = iterator_data; key_check(key); return key_is_dead(key, *limit); } /* * Garbage collect pointers from a keyring. * * Not called with any locks held. The keyring's key struct will not be * deallocated under us as only our caller may deallocate it. */ void keyring_gc(struct key *keyring, time64_t limit) { int result; kenter("%x{%s}", keyring->serial, keyring->description ?: ""); if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED))) goto dont_gc; /* scan the keyring looking for dead keys */ rcu_read_lock(); result = assoc_array_iterate(&keyring->keys, keyring_gc_check_iterator, &limit); rcu_read_unlock(); if (result == true) goto do_gc; dont_gc: kleave(" [no gc]"); return; do_gc: down_write(&keyring->sem); assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops, keyring_gc_select_iterator, &limit); up_write(&keyring->sem); kleave(" [gc]"); } /* * Garbage collect restriction pointers from a keyring. * * Keyring restrictions are associated with a key type, and must be cleaned * up if the key type is unregistered. The restriction is altered to always * reject additional keys so a keyring cannot be opened up by unregistering * a key type. * * Not called with any keyring locks held. The keyring's key struct will not * be deallocated under us as only our caller may deallocate it. * * The caller is required to hold key_types_sem and dead_type->sem. This is * fulfilled by key_gc_keytype() holding the locks on behalf of * key_garbage_collector(), which it invokes on a workqueue. */ void keyring_restriction_gc(struct key *keyring, struct key_type *dead_type) { struct key_restriction *keyres; kenter("%x{%s}", keyring->serial, keyring->description ?: ""); /* * keyring->restrict_link is only assigned at key allocation time * or with the key type locked, so the only values that could be * concurrently assigned to keyring->restrict_link are for key * types other than dead_type. Given this, it's ok to check * the key type before acquiring keyring->sem. */ if (!dead_type || !keyring->restrict_link || keyring->restrict_link->keytype != dead_type) { kleave(" [no restriction gc]"); return; } /* Lock the keyring to ensure that a link is not in progress */ down_write(&keyring->sem); keyres = keyring->restrict_link; keyres->check = restrict_link_reject; key_put(keyres->key); keyres->key = NULL; keyres->keytype = NULL; up_write(&keyring->sem); kleave(" [restriction gc]"); }
4 1 1 4 3 3 1 5 5 4 4 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 // SPDX-License-Identifier: GPL-2.0 /* * fs/sysfs/symlink.c - operations for initializing and mounting sysfs * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007 Tejun Heo <teheo@suse.de> * * Please see Documentation/filesystems/sysfs.rst for more information. */ #include <linux/fs.h> #include <linux/magic.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/user_namespace.h> #include <linux/fs_context.h> #include <net/net_namespace.h> #include "sysfs.h" static struct kernfs_root *sysfs_root; struct kernfs_node *sysfs_root_kn; static int sysfs_get_tree(struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; int ret; ret = kernfs_get_tree(fc); if (ret) return ret; if (kfc->new_sb_created) fc->root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE; return 0; } static void sysfs_fs_context_free(struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; if (kfc->ns_tag) kobj_ns_drop(KOBJ_NS_TYPE_NET, kfc->ns_tag); kernfs_free_fs_context(fc); kfree(kfc); } static const struct fs_context_operations sysfs_fs_context_ops = { .free = sysfs_fs_context_free, .get_tree = sysfs_get_tree, }; static int sysfs_init_fs_context(struct fs_context *fc) { struct kernfs_fs_context *kfc; struct net *netns; if (!(fc->sb_flags & SB_KERNMOUNT)) { if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) return -EPERM; } kfc = kzalloc(sizeof(struct kernfs_fs_context), GFP_KERNEL); if (!kfc) return -ENOMEM; kfc->ns_tag = netns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); kfc->root = sysfs_root; kfc->magic = SYSFS_MAGIC; fc->fs_private = kfc; fc->ops = &sysfs_fs_context_ops; if (netns) { put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(netns->user_ns); } fc->global = true; return 0; } static void sysfs_kill_sb(struct super_block *sb) { void *ns = (void *)kernfs_super_ns(sb); kernfs_kill_sb(sb); kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); } static struct file_system_type sysfs_fs_type = { .name = "sysfs", .init_fs_context = sysfs_init_fs_context, .kill_sb = sysfs_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; int __init sysfs_init(void) { int err; sysfs_root = kernfs_create_root(NULL, KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, NULL); if (IS_ERR(sysfs_root)) return PTR_ERR(sysfs_root); sysfs_root_kn = kernfs_root_to_node(sysfs_root); err = register_filesystem(&sysfs_fs_type); if (err) { kernfs_destroy_root(sysfs_root); return err; } return 0; }
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2011-2012, Pavel Zubarev <pavel.zubarev@gmail.com> * Copyright 2011-2012, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de> * Copyright 2011-2012, cozybit Inc. * Copyright (C) 2021,2023 Intel Corporation */ #include "ieee80211_i.h" #include "mesh.h" #include "driver-ops.h" /* This is not in the standard. It represents a tolerable tsf drift below * which we do no TSF adjustment. */ #define TOFFSET_MINIMUM_ADJUSTMENT 10 /* This is not in the standard. It is a margin added to the * Toffset setpoint to mitigate TSF overcorrection * introduced by TSF adjustment latency. */ #define TOFFSET_SET_MARGIN 20 /* This is not in the standard. It represents the maximum Toffset jump above * which we'll invalidate the Toffset setpoint and choose a new setpoint. This * could be, for instance, in case a neighbor is restarted and its TSF counter * reset. */ #define TOFFSET_MAXIMUM_ADJUSTMENT 800 /* 0.8 ms */ struct sync_method { u8 method; struct ieee80211_mesh_sync_ops ops; }; /** * mesh_peer_tbtt_adjusting - check if an mp is currently adjusting its TBTT * * @cfg: mesh config element from the mesh peer (or %NULL) * * Returns: If the mesh peer is currently adjusting its TBTT */ static bool mesh_peer_tbtt_adjusting(const struct ieee80211_meshconf_ie *cfg) { return cfg && (cfg->meshconf_cap & IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING); } void mesh_sync_adjust_tsf(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; /* sdata->vif.bss_conf.beacon_int in 1024us units, 0.04% */ u64 beacon_int_fraction = sdata->vif.bss_conf.beacon_int * 1024 / 2500; u64 tsf; u64 tsfdelta; spin_lock_bh(&ifmsh->sync_offset_lock); if (ifmsh->sync_offset_clockdrift_max < beacon_int_fraction) { msync_dbg(sdata, "TSF : max clockdrift=%lld; adjusting\n", (long long) ifmsh->sync_offset_clockdrift_max); tsfdelta = -ifmsh->sync_offset_clockdrift_max; ifmsh->sync_offset_clockdrift_max = 0; } else { msync_dbg(sdata, "TSF : max clockdrift=%lld; adjusting by %llu\n", (long long) ifmsh->sync_offset_clockdrift_max, (unsigned long long) beacon_int_fraction); tsfdelta = -beacon_int_fraction; ifmsh->sync_offset_clockdrift_max -= beacon_int_fraction; } spin_unlock_bh(&ifmsh->sync_offset_lock); if (local->ops->offset_tsf) { drv_offset_tsf(local, sdata, tsfdelta); } else { tsf = drv_get_tsf(local, sdata); if (tsf != -1ULL) drv_set_tsf(local, sdata, tsf + tsfdelta); } } static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype, struct ieee80211_mgmt *mgmt, unsigned int len, const struct ieee80211_meshconf_ie *mesh_cfg, struct ieee80211_rx_status *rx_status) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; struct sta_info *sta; u64 t_t, t_r; WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); /* standard mentions only beacons */ if (stype != IEEE80211_STYPE_BEACON) return; /* * Get time when timestamp field was received. If we don't * have rx timestamps, then use current tsf as an approximation. * drv_get_tsf() must be called before entering the rcu-read * section. */ if (ieee80211_have_rx_timestamp(rx_status)) t_r = ieee80211_calculate_rx_timestamp(local, rx_status, len + FCS_LEN, 24); else t_r = drv_get_tsf(local, sdata); rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); if (!sta) goto no_sync; /* check offset sync conditions (13.13.2.2.1) * * TODO also sync to * dot11MeshNbrOffsetMaxNeighbor non-peer non-MBSS neighbors */ if (mesh_peer_tbtt_adjusting(mesh_cfg)) { msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", sta->sta.addr); goto no_sync; } /* Timing offset calculation (see 13.13.2.2.2) */ t_t = le64_to_cpu(mgmt->u.beacon.timestamp); sta->mesh->t_offset = t_t - t_r; if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { s64 t_clockdrift = sta->mesh->t_offset_setpoint - sta->mesh->t_offset; msync_dbg(sdata, "STA %pM : t_offset=%lld, t_offset_setpoint=%lld, t_clockdrift=%lld\n", sta->sta.addr, (long long) sta->mesh->t_offset, (long long) sta->mesh->t_offset_setpoint, (long long) t_clockdrift); if (t_clockdrift > TOFFSET_MAXIMUM_ADJUSTMENT || t_clockdrift < -TOFFSET_MAXIMUM_ADJUSTMENT) { msync_dbg(sdata, "STA %pM : t_clockdrift=%lld too large, setpoint reset\n", sta->sta.addr, (long long) t_clockdrift); clear_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); goto no_sync; } spin_lock_bh(&ifmsh->sync_offset_lock); if (t_clockdrift > ifmsh->sync_offset_clockdrift_max) ifmsh->sync_offset_clockdrift_max = t_clockdrift; spin_unlock_bh(&ifmsh->sync_offset_lock); } else { sta->mesh->t_offset_setpoint = sta->mesh->t_offset - TOFFSET_SET_MARGIN; set_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); msync_dbg(sdata, "STA %pM : offset was invalid, t_offset=%lld\n", sta->sta.addr, (long long) sta->mesh->t_offset); } no_sync: rcu_read_unlock(); } static void mesh_sync_offset_adjust_tsf(struct ieee80211_sub_if_data *sdata, struct beacon_data *beacon) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); WARN_ON(!rcu_read_lock_held()); spin_lock_bh(&ifmsh->sync_offset_lock); if (ifmsh->sync_offset_clockdrift_max > TOFFSET_MINIMUM_ADJUSTMENT) { /* Since adjusting the tsf here would * require a possibly blocking call * to the driver tsf setter, we punt * the tsf adjustment to the mesh tasklet */ msync_dbg(sdata, "TSF : kicking off TSF adjustment with clockdrift_max=%lld\n", ifmsh->sync_offset_clockdrift_max); set_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags); } else { msync_dbg(sdata, "TSF : max clockdrift=%lld; too small to adjust\n", (long long)ifmsh->sync_offset_clockdrift_max); ifmsh->sync_offset_clockdrift_max = 0; } spin_unlock_bh(&ifmsh->sync_offset_lock); } static const struct sync_method sync_methods[] = { { .method = IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET, .ops = { .rx_bcn_presp = &mesh_sync_offset_rx_bcn_presp, .adjust_tsf = &mesh_sync_offset_adjust_tsf, } }, }; const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method) { int i; for (i = 0 ; i < ARRAY_SIZE(sync_methods); ++i) { if (sync_methods[i].method == method) return &sync_methods[i].ops; } return NULL; }
1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 /* * Atheros CARL9170 driver * * mac80211 interaction code * * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> * Copyright 2009, 2010, Christian Lamparter <chunkeey@googlemail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; see the file COPYING. If not, see * http://www.gnu.org/licenses/. * * This file incorporates work covered by the following copyright and * permission notice: * Copyright (c) 2007-2008 Atheros Communications, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <linux/slab.h> #include <linux/module.h> #include <linux/etherdevice.h> #include <linux/random.h> #include <net/mac80211.h> #include <net/cfg80211.h> #include "hw.h" #include "carl9170.h" #include "cmd.h" static bool modparam_nohwcrypt; module_param_named(nohwcrypt, modparam_nohwcrypt, bool, 0444); MODULE_PARM_DESC(nohwcrypt, "Disable hardware crypto offload."); int modparam_noht; module_param_named(noht, modparam_noht, int, 0444); MODULE_PARM_DESC(noht, "Disable MPDU aggregation."); #define RATE(_bitrate, _hw_rate, _txpidx, _flags) { \ .bitrate = (_bitrate), \ .flags = (_flags), \ .hw_value = (_hw_rate) | (_txpidx) << 4, \ } struct ieee80211_rate __carl9170_ratetable[] = { RATE(10, 0, 0, 0), RATE(20, 1, 1, IEEE80211_RATE_SHORT_PREAMBLE), RATE(55, 2, 2, IEEE80211_RATE_SHORT_PREAMBLE), RATE(110, 3, 3, IEEE80211_RATE_SHORT_PREAMBLE), RATE(60, 0xb, 0, 0), RATE(90, 0xf, 0, 0), RATE(120, 0xa, 0, 0), RATE(180, 0xe, 0, 0), RATE(240, 0x9, 0, 0), RATE(360, 0xd, 1, 0), RATE(480, 0x8, 2, 0), RATE(540, 0xc, 3, 0), }; #undef RATE #define carl9170_g_ratetable (__carl9170_ratetable + 0) #define carl9170_g_ratetable_size 12 #define carl9170_a_ratetable (__carl9170_ratetable + 4) #define carl9170_a_ratetable_size 8 /* * NB: The hw_value is used as an index into the carl9170_phy_freq_params * array in phy.c so that we don't have to do frequency lookups! */ #define CHAN(_freq, _idx) { \ .center_freq = (_freq), \ .hw_value = (_idx), \ .max_power = 18, /* XXX */ \ } static struct ieee80211_channel carl9170_2ghz_chantable[] = { CHAN(2412, 0), CHAN(2417, 1), CHAN(2422, 2), CHAN(2427, 3), CHAN(2432, 4), CHAN(2437, 5), CHAN(2442, 6), CHAN(2447, 7), CHAN(2452, 8), CHAN(2457, 9), CHAN(2462, 10), CHAN(2467, 11), CHAN(2472, 12), CHAN(2484, 13), }; static struct ieee80211_channel carl9170_5ghz_chantable[] = { CHAN(4920, 14), CHAN(4940, 15), CHAN(4960, 16), CHAN(4980, 17), CHAN(5040, 18), CHAN(5060, 19), CHAN(5080, 20), CHAN(5180, 21), CHAN(5200, 22), CHAN(5220, 23), CHAN(5240, 24), CHAN(5260, 25), CHAN(5280, 26), CHAN(5300, 27), CHAN(5320, 28), CHAN(5500, 29), CHAN(5520, 30), CHAN(5540, 31), CHAN(5560, 32), CHAN(5580, 33), CHAN(5600, 34), CHAN(5620, 35), CHAN(5640, 36), CHAN(5660, 37), CHAN(5680, 38), CHAN(5700, 39), CHAN(5745, 40), CHAN(5765, 41), CHAN(5785, 42), CHAN(5805, 43), CHAN(5825, 44), CHAN(5170, 45), CHAN(5190, 46), CHAN(5210, 47), CHAN(5230, 48), }; #undef CHAN #define CARL9170_HT_CAP \ { \ .ht_supported = true, \ .cap = IEEE80211_HT_CAP_MAX_AMSDU | \ IEEE80211_HT_CAP_SUP_WIDTH_20_40 | \ IEEE80211_HT_CAP_SGI_40 | \ IEEE80211_HT_CAP_DSSSCCK40 | \ IEEE80211_HT_CAP_SM_PS, \ .ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K, \ .ampdu_density = IEEE80211_HT_MPDU_DENSITY_8, \ .mcs = { \ .rx_mask = { 0xff, 0xff, 0, 0, 0x1, 0, 0, 0, 0, 0, }, \ .rx_highest = cpu_to_le16(300), \ .tx_params = IEEE80211_HT_MCS_TX_DEFINED, \ }, \ } static struct ieee80211_supported_band carl9170_band_2GHz = { .channels = carl9170_2ghz_chantable, .n_channels = ARRAY_SIZE(carl9170_2ghz_chantable), .bitrates = carl9170_g_ratetable, .n_bitrates = carl9170_g_ratetable_size, .ht_cap = CARL9170_HT_CAP, }; static struct ieee80211_supported_band carl9170_band_5GHz = { .channels = carl9170_5ghz_chantable, .n_channels = ARRAY_SIZE(carl9170_5ghz_chantable), .bitrates = carl9170_a_ratetable, .n_bitrates = carl9170_a_ratetable_size, .ht_cap = CARL9170_HT_CAP, }; static void carl9170_ampdu_gc(struct ar9170 *ar) { struct carl9170_sta_tid *tid_info; LIST_HEAD(tid_gc); rcu_read_lock(); list_for_each_entry_rcu(tid_info, &ar->tx_ampdu_list, list) { spin_lock_bh(&ar->tx_ampdu_list_lock); if (tid_info->state == CARL9170_TID_STATE_SHUTDOWN) { tid_info->state = CARL9170_TID_STATE_KILLED; list_del_rcu(&tid_info->list); ar->tx_ampdu_list_len--; list_add_tail(&tid_info->tmp_list, &tid_gc); } spin_unlock_bh(&ar->tx_ampdu_list_lock); } rcu_assign_pointer(ar->tx_ampdu_iter, tid_info); rcu_read_unlock(); synchronize_rcu(); while (!list_empty(&tid_gc)) { struct sk_buff *skb; tid_info = list_first_entry(&tid_gc, struct carl9170_sta_tid, tmp_list); while ((skb = __skb_dequeue(&tid_info->queue))) carl9170_tx_status(ar, skb, false); list_del_init(&tid_info->tmp_list); kfree(tid_info); } } static void carl9170_flush(struct ar9170 *ar, bool drop_queued) { if (drop_queued) { int i; /* * We can only drop frames which have not been uploaded * to the device yet. */ for (i = 0; i < ar->hw->queues; i++) { struct sk_buff *skb; while ((skb = skb_dequeue(&ar->tx_pending[i]))) { struct ieee80211_tx_info *info; info = IEEE80211_SKB_CB(skb); if (info->flags & IEEE80211_TX_CTL_AMPDU) atomic_dec(&ar->tx_ampdu_upload); carl9170_tx_status(ar, skb, false); } } } /* Wait for all other outstanding frames to timeout. */ if (atomic_read(&ar->tx_total_queued)) WARN_ON(wait_for_completion_timeout(&ar->tx_flush, HZ) == 0); } static void carl9170_flush_ba(struct ar9170 *ar) { struct sk_buff_head free; struct carl9170_sta_tid *tid_info; struct sk_buff *skb; __skb_queue_head_init(&free); rcu_read_lock(); spin_lock_bh(&ar->tx_ampdu_list_lock); list_for_each_entry_rcu(tid_info, &ar->tx_ampdu_list, list) { if (tid_info->state > CARL9170_TID_STATE_SUSPEND) { tid_info->state = CARL9170_TID_STATE_SUSPEND; spin_lock(&tid_info->lock); while ((skb = __skb_dequeue(&tid_info->queue))) __skb_queue_tail(&free, skb); spin_unlock(&tid_info->lock); } } spin_unlock_bh(&ar->tx_ampdu_list_lock); rcu_read_unlock(); while ((skb = __skb_dequeue(&free))) carl9170_tx_status(ar, skb, false); } static void carl9170_zap_queues(struct ar9170 *ar) { struct carl9170_vif_info *cvif; unsigned int i; carl9170_ampdu_gc(ar); carl9170_flush_ba(ar); carl9170_flush(ar, true); for (i = 0; i < ar->hw->queues; i++) { spin_lock_bh(&ar->tx_status[i].lock); while (!skb_queue_empty(&ar->tx_status[i])) { struct sk_buff *skb; skb = skb_peek(&ar->tx_status[i]); carl9170_tx_get_skb(skb); spin_unlock_bh(&ar->tx_status[i].lock); carl9170_tx_drop(ar, skb); spin_lock_bh(&ar->tx_status[i].lock); carl9170_tx_put_skb(skb); } spin_unlock_bh(&ar->tx_status[i].lock); } BUILD_BUG_ON(CARL9170_NUM_TX_LIMIT_SOFT < 1); BUILD_BUG_ON(CARL9170_NUM_TX_LIMIT_HARD < CARL9170_NUM_TX_LIMIT_SOFT); BUILD_BUG_ON(CARL9170_NUM_TX_LIMIT_HARD >= CARL9170_BAW_BITS); /* reinitialize queues statistics */ memset(&ar->tx_stats, 0, sizeof(ar->tx_stats)); for (i = 0; i < ar->hw->queues; i++) ar->tx_stats[i].limit = CARL9170_NUM_TX_LIMIT_HARD; bitmap_zero(ar->mem_bitmap, ar->fw.mem_blocks); rcu_read_lock(); list_for_each_entry_rcu(cvif, &ar->vif_list, list) { spin_lock_bh(&ar->beacon_lock); dev_kfree_skb_any(cvif->beacon); cvif->beacon = NULL; spin_unlock_bh(&ar->beacon_lock); } rcu_read_unlock(); atomic_set(&ar->tx_ampdu_upload, 0); atomic_set(&ar->tx_ampdu_scheduler, 0); atomic_set(&ar->tx_total_pending, 0); atomic_set(&ar->tx_total_queued, 0); atomic_set(&ar->mem_free_blocks, ar->fw.mem_blocks); } #define CARL9170_FILL_QUEUE(queue, ai_fs, cwmin, cwmax, _txop) \ do { \ queue.aifs = ai_fs; \ queue.cw_min = cwmin; \ queue.cw_max = cwmax; \ queue.txop = _txop; \ } while (0) static int carl9170_op_start(struct ieee80211_hw *hw) { struct ar9170 *ar = hw->priv; int err, i; mutex_lock(&ar->mutex); carl9170_zap_queues(ar); /* reset QoS defaults */ CARL9170_FILL_QUEUE(ar->edcf[AR9170_TXQ_VO], 2, 3, 7, 47); CARL9170_FILL_QUEUE(ar->edcf[AR9170_TXQ_VI], 2, 7, 15, 94); CARL9170_FILL_QUEUE(ar->edcf[AR9170_TXQ_BE], 3, 15, 1023, 0); CARL9170_FILL_QUEUE(ar->edcf[AR9170_TXQ_BK], 7, 15, 1023, 0); CARL9170_FILL_QUEUE(ar->edcf[AR9170_TXQ_SPECIAL], 2, 3, 7, 0); ar->current_factor = ar->current_density = -1; /* "The first key is unique." */ ar->usedkeys = 1; ar->filter_state = 0; ar->ps.last_action = jiffies; ar->ps.last_slept = jiffies; ar->erp_mode = CARL9170_ERP_AUTO; /* Set "disable hw crypto offload" whenever the module parameter * nohwcrypt is true or if the firmware does not support it. */ ar->disable_offload = modparam_nohwcrypt | ar->fw.disable_offload_fw; ar->rx_software_decryption = ar->disable_offload; for (i = 0; i < ar->hw->queues; i++) { ar->queue_stop_timeout[i] = jiffies; ar->max_queue_stop_timeout[i] = 0; } atomic_set(&ar->mem_allocs, 0); err = carl9170_usb_open(ar); if (err) goto out; err = carl9170_init_mac(ar); if (err) goto out; err = carl9170_set_qos(ar); if (err) goto out; if (ar->fw.rx_filter) { err = carl9170_rx_filter(ar, CARL9170_RX_FILTER_OTHER_RA | CARL9170_RX_FILTER_CTL_OTHER | CARL9170_RX_FILTER_BAD); if (err) goto out; } err = carl9170_write_reg(ar, AR9170_MAC_REG_DMA_TRIGGER, AR9170_DMA_TRIGGER_RXQ); if (err) goto out; /* Clear key-cache */ for (i = 0; i < AR9170_CAM_MAX_USER + 4; i++) { err = carl9170_upload_key(ar, i, NULL, AR9170_ENC_ALG_NONE, 0, NULL, 0); if (err) goto out; err = carl9170_upload_key(ar, i, NULL, AR9170_ENC_ALG_NONE, 1, NULL, 0); if (err) goto out; if (i < AR9170_CAM_MAX_USER) { err = carl9170_disable_key(ar, i); if (err) goto out; } } carl9170_set_state_when(ar, CARL9170_IDLE, CARL9170_STARTED); ieee80211_queue_delayed_work(ar->hw, &ar->stat_work, round_jiffies(msecs_to_jiffies(CARL9170_STAT_WORK))); ieee80211_wake_queues(ar->hw); err = 0; out: mutex_unlock(&ar->mutex); return err; } static void carl9170_cancel_worker(struct ar9170 *ar) { cancel_delayed_work_sync(&ar->stat_work); cancel_delayed_work_sync(&ar->tx_janitor); #ifdef CONFIG_CARL9170_LEDS cancel_delayed_work_sync(&ar->led_work); #endif /* CONFIG_CARL9170_LEDS */ cancel_work_sync(&ar->ps_work); cancel_work_sync(&ar->ping_work); cancel_work_sync(&ar->ampdu_work); } static void carl9170_op_stop(struct ieee80211_hw *hw, bool suspend) { struct ar9170 *ar = hw->priv; carl9170_set_state_when(ar, CARL9170_STARTED, CARL9170_IDLE); ieee80211_stop_queues(ar->hw); mutex_lock(&ar->mutex); if (IS_ACCEPTING_CMD(ar)) { RCU_INIT_POINTER(ar->beacon_iter, NULL); carl9170_led_set_state(ar, 0); /* stop DMA */ carl9170_write_reg(ar, AR9170_MAC_REG_DMA_TRIGGER, 0); carl9170_usb_stop(ar); } carl9170_zap_queues(ar); mutex_unlock(&ar->mutex); carl9170_cancel_worker(ar); } static void carl9170_restart_work(struct work_struct *work) { struct ar9170 *ar = container_of(work, struct ar9170, restart_work); int err = -EIO; ar->usedkeys = 0; ar->filter_state = 0; carl9170_cancel_worker(ar); mutex_lock(&ar->mutex); if (!ar->force_usb_reset) { err = carl9170_usb_restart(ar); if (net_ratelimit()) { if (err) dev_err(&ar->udev->dev, "Failed to restart device (%d).\n", err); else dev_info(&ar->udev->dev, "device restarted successfully.\n"); } } carl9170_zap_queues(ar); mutex_unlock(&ar->mutex); if (!err && !ar->force_usb_reset) { ar->restart_counter++; atomic_set(&ar->pending_restarts, 0); ieee80211_restart_hw(ar->hw); } else { /* * The reset was unsuccessful and the device seems to * be dead. But there's still one option: a low-level * usb subsystem reset... */ carl9170_usb_reset(ar); } } void carl9170_restart(struct ar9170 *ar, const enum carl9170_restart_reasons r) { carl9170_set_state_when(ar, CARL9170_STARTED, CARL9170_IDLE); /* * Sometimes, an error can trigger several different reset events. * By ignoring these *surplus* reset events, the device won't be * killed again, right after it has recovered. */ if (atomic_inc_return(&ar->pending_restarts) > 1) { dev_dbg(&ar->udev->dev, "ignoring restart (%d)\n", r); return; } ieee80211_stop_queues(ar->hw); dev_err(&ar->udev->dev, "restart device (%d)\n", r); if (!WARN_ON(r == CARL9170_RR_NO_REASON) || !WARN_ON(r >= __CARL9170_RR_LAST)) ar->last_reason = r; if (!ar->registered) return; if (!IS_ACCEPTING_CMD(ar) || ar->needs_full_reset) ar->force_usb_reset = true; ieee80211_queue_work(ar->hw, &ar->restart_work); /* * At this point, the device instance might have vanished/disabled. * So, don't put any code which access the ar9170 struct * without proper protection. */ } static void carl9170_ping_work(struct work_struct *work) { struct ar9170 *ar = container_of(work, struct ar9170, ping_work); int err; if (!IS_STARTED(ar)) return; mutex_lock(&ar->mutex); err = carl9170_echo_test(ar, 0xdeadbeef); if (err) carl9170_restart(ar, CARL9170_RR_UNRESPONSIVE_DEVICE); mutex_unlock(&ar->mutex); } static int carl9170_init_interface(struct ar9170 *ar, struct ieee80211_vif *vif) { struct ath_common *common = &ar->common; int err; if (!vif) { WARN_ON_ONCE(IS_STARTED(ar)); return 0; } memcpy(common->macaddr, vif->addr, ETH_ALEN); /* We have to fall back to software crypto, whenever * the user choose to participates in an IBSS. HW * offload for IBSS RSN is not supported by this driver. * * NOTE: If the previous main interface has already * disabled hw crypto offload, we have to keep this * previous disable_offload setting as it was. * Altough ideally, we should notify mac80211 and tell * it to forget about any HW crypto offload for now. */ ar->disable_offload |= ((vif->type != NL80211_IFTYPE_STATION) && (vif->type != NL80211_IFTYPE_AP)); /* The driver used to have P2P GO+CLIENT support, * but since this was dropped and we don't know if * there are any gremlins lurking in the shadows, * so best we keep HW offload disabled for P2P. */ ar->disable_offload |= vif->p2p; ar->rx_software_decryption = ar->disable_offload; err = carl9170_set_operating_mode(ar); return err; } static int carl9170_op_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct carl9170_vif_info *vif_priv = (void *) vif->drv_priv; struct ieee80211_vif *main_vif, *old_main = NULL; struct ar9170 *ar = hw->priv; int vif_id = -1, err = 0; mutex_lock(&ar->mutex); rcu_read_lock(); if (vif_priv->active) { /* * Skip the interface structure initialization, * if the vif survived the _restart call. */ vif_id = vif_priv->id; vif_priv->enable_beacon = false; spin_lock_bh(&ar->beacon_lock); dev_kfree_skb_any(vif_priv->beacon); vif_priv->beacon = NULL; spin_unlock_bh(&ar->beacon_lock); goto init; } /* Because the AR9170 HW's MAC doesn't provide full support for * multiple, independent interfaces [of different operation modes]. * We have to select ONE main interface [main mode of HW], but we * can have multiple slaves [AKA: entry in the ACK-table]. * * The first (from HEAD/TOP) interface in the ar->vif_list is * always the main intf. All following intfs in this list * are considered to be slave intfs. */ main_vif = carl9170_get_main_vif(ar); if (main_vif) { switch (main_vif->type) { case NL80211_IFTYPE_STATION: if (vif->type == NL80211_IFTYPE_STATION) break; err = -EBUSY; rcu_read_unlock(); goto unlock; case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_AP: if ((vif->type == NL80211_IFTYPE_STATION) || (vif->type == NL80211_IFTYPE_AP) || (vif->type == NL80211_IFTYPE_MESH_POINT)) break; err = -EBUSY; rcu_read_unlock(); goto unlock; default: rcu_read_unlock(); goto unlock; } } vif_id = bitmap_find_free_region(&ar->vif_bitmap, ar->fw.vif_num, 0); if (vif_id < 0) { rcu_read_unlock(); err = -ENOSPC; goto unlock; } BUG_ON(ar->vif_priv[vif_id].id != vif_id); vif_priv->active = true; vif_priv->id = vif_id; vif_priv->enable_beacon = false; ar->vifs++; if (old_main) { /* We end up in here, if the main interface is being replaced. * Put the new main interface at the HEAD of the list and the * previous inteface will automatically become second in line. */ list_add_rcu(&vif_priv->list, &ar->vif_list); } else { /* Add new inteface. If the list is empty, it will become the * main inteface, otherwise it will be slave. */ list_add_tail_rcu(&vif_priv->list, &ar->vif_list); } rcu_assign_pointer(ar->vif_priv[vif_id].vif, vif); init: main_vif = carl9170_get_main_vif(ar); if (main_vif == vif) { rcu_assign_pointer(ar->beacon_iter, vif_priv); rcu_read_unlock(); if (old_main) { struct carl9170_vif_info *old_main_priv = (void *) old_main->drv_priv; /* downgrade old main intf to slave intf. * NOTE: We are no longer under rcu_read_lock. * But we are still holding ar->mutex, so the * vif data [id, addr] is safe. */ err = carl9170_mod_virtual_mac(ar, old_main_priv->id, old_main->addr); if (err) goto unlock; } err = carl9170_init_interface(ar, vif); if (err) goto unlock; } else { rcu_read_unlock(); err = carl9170_mod_virtual_mac(ar, vif_id, vif->addr); if (err) goto unlock; } if (ar->fw.tx_seq_table) { err = carl9170_write_reg(ar, ar->fw.tx_seq_table + vif_id * 4, 0); if (err) goto unlock; } unlock: if (err && (vif_id >= 0)) { vif_priv->active = false; bitmap_release_region(&ar->vif_bitmap, vif_id, 0); ar->vifs--; RCU_INIT_POINTER(ar->vif_priv[vif_id].vif, NULL); list_del_rcu(&vif_priv->list); mutex_unlock(&ar->mutex); synchronize_rcu(); } else { if (ar->vifs > 1) ar->ps.off_override |= PS_OFF_VIF; mutex_unlock(&ar->mutex); } return err; } static void carl9170_op_remove_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct carl9170_vif_info *vif_priv = (void *) vif->drv_priv; struct ieee80211_vif *main_vif; struct ar9170 *ar = hw->priv; unsigned int id; mutex_lock(&ar->mutex); if (WARN_ON_ONCE(!vif_priv->active)) goto unlock; ar->vifs--; rcu_read_lock(); main_vif = carl9170_get_main_vif(ar); id = vif_priv->id; vif_priv->active = false; WARN_ON(vif_priv->enable_beacon); vif_priv->enable_beacon = false; list_del_rcu(&vif_priv->list); RCU_INIT_POINTER(ar->vif_priv[id].vif, NULL); if (vif == main_vif) { rcu_read_unlock(); if (ar->vifs) { WARN_ON(carl9170_init_interface(ar, carl9170_get_main_vif(ar))); } else { carl9170_set_operating_mode(ar); } } else { rcu_read_unlock(); WARN_ON(carl9170_mod_virtual_mac(ar, id, NULL)); } carl9170_update_beacon(ar, false); carl9170_flush_cab(ar, id); spin_lock_bh(&ar->beacon_lock); dev_kfree_skb_any(vif_priv->beacon); vif_priv->beacon = NULL; spin_unlock_bh(&ar->beacon_lock); bitmap_release_region(&ar->vif_bitmap, id, 0); carl9170_set_beacon_timers(ar); if (ar->vifs == 1) ar->ps.off_override &= ~PS_OFF_VIF; unlock: mutex_unlock(&ar->mutex); synchronize_rcu(); } void carl9170_ps_check(struct ar9170 *ar) { ieee80211_queue_work(ar->hw, &ar->ps_work); } /* caller must hold ar->mutex */ static int carl9170_ps_update(struct ar9170 *ar) { bool ps = false; int err = 0; if (!ar->ps.off_override) ps = (ar->hw->conf.flags & IEEE80211_CONF_PS); if (ps != ar->ps.state) { err = carl9170_powersave(ar, ps); if (err) return err; if (ar->ps.state && !ps) { ar->ps.sleep_ms = jiffies_to_msecs(jiffies - ar->ps.last_action); } if (ps) ar->ps.last_slept = jiffies; ar->ps.last_action = jiffies; ar->ps.state = ps; } return 0; } static void carl9170_ps_work(struct work_struct *work) { struct ar9170 *ar = container_of(work, struct ar9170, ps_work); mutex_lock(&ar->mutex); if (IS_STARTED(ar)) WARN_ON_ONCE(carl9170_ps_update(ar) != 0); mutex_unlock(&ar->mutex); } static int carl9170_update_survey(struct ar9170 *ar, bool flush, bool noise) { int err; if (noise) { err = carl9170_get_noisefloor(ar); if (err) return err; } if (ar->fw.hw_counters) { err = carl9170_collect_tally(ar); if (err) return err; } if (flush) memset(&ar->tally, 0, sizeof(ar->tally)); return 0; } static void carl9170_stat_work(struct work_struct *work) { struct ar9170 *ar = container_of(work, struct ar9170, stat_work.work); int err; mutex_lock(&ar->mutex); err = carl9170_update_survey(ar, false, true); mutex_unlock(&ar->mutex); if (err) return; ieee80211_queue_delayed_work(ar->hw, &ar->stat_work, round_jiffies(msecs_to_jiffies(CARL9170_STAT_WORK))); } static int carl9170_op_config(struct ieee80211_hw *hw, int radio_idx, u32 changed) { struct ar9170 *ar = hw->priv; int err = 0; mutex_lock(&ar->mutex); if (changed & IEEE80211_CONF_CHANGE_LISTEN_INTERVAL) { /* TODO */ err = 0; } if (changed & IEEE80211_CONF_CHANGE_PS) { err = carl9170_ps_update(ar); if (err) goto out; } if (changed & IEEE80211_CONF_CHANGE_SMPS) { /* TODO */ err = 0; } if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { enum nl80211_channel_type channel_type = cfg80211_get_chandef_type(&hw->conf.chandef); /* adjust slot time for 5 GHz */ err = carl9170_set_slot_time(ar); if (err) goto out; err = carl9170_update_survey(ar, true, false); if (err) goto out; err = carl9170_set_channel(ar, hw->conf.chandef.chan, channel_type); if (err) goto out; err = carl9170_update_survey(ar, false, true); if (err) goto out; err = carl9170_set_dyn_sifs_ack(ar); if (err) goto out; err = carl9170_set_rts_cts_rate(ar); if (err) goto out; } if (changed & IEEE80211_CONF_CHANGE_POWER) { err = carl9170_set_mac_tpc(ar, ar->hw->conf.chandef.chan); if (err) goto out; } out: mutex_unlock(&ar->mutex); return err; } static u64 carl9170_op_prepare_multicast(struct ieee80211_hw *hw, struct netdev_hw_addr_list *mc_list) { struct netdev_hw_addr *ha; u64 mchash; /* always get broadcast frames */ mchash = 1ULL << (0xff >> 2); netdev_hw_addr_list_for_each(ha, mc_list) mchash |= 1ULL << (ha->addr[5] >> 2); return mchash; } static void carl9170_op_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *new_flags, u64 multicast) { struct ar9170 *ar = hw->priv; /* mask supported flags */ *new_flags &= FIF_ALLMULTI | ar->rx_filter_caps; if (!IS_ACCEPTING_CMD(ar)) return; mutex_lock(&ar->mutex); ar->filter_state = *new_flags; /* * We can support more by setting the sniffer bit and * then checking the error flags, later. */ if (*new_flags & FIF_ALLMULTI) multicast = ~0ULL; if (multicast != ar->cur_mc_hash) WARN_ON(carl9170_update_multicast(ar, multicast)); if (changed_flags & FIF_OTHER_BSS) { ar->sniffer_enabled = !!(*new_flags & FIF_OTHER_BSS); WARN_ON(carl9170_set_operating_mode(ar)); } if (ar->fw.rx_filter && changed_flags & ar->rx_filter_caps) { u32 rx_filter = 0; if (!ar->fw.ba_filter) rx_filter |= CARL9170_RX_FILTER_CTL_OTHER; if (!(*new_flags & (FIF_FCSFAIL | FIF_PLCPFAIL))) rx_filter |= CARL9170_RX_FILTER_BAD; if (!(*new_flags & FIF_CONTROL)) rx_filter |= CARL9170_RX_FILTER_CTL_OTHER; if (!(*new_flags & FIF_PSPOLL)) rx_filter |= CARL9170_RX_FILTER_CTL_PSPOLL; if (!(*new_flags & FIF_OTHER_BSS)) { rx_filter |= CARL9170_RX_FILTER_OTHER_RA; rx_filter |= CARL9170_RX_FILTER_DECRY_FAIL; } WARN_ON(carl9170_rx_filter(ar, rx_filter)); } mutex_unlock(&ar->mutex); } static void carl9170_op_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *bss_conf, u64 changed) { struct ar9170 *ar = hw->priv; struct ath_common *common = &ar->common; int err = 0; struct carl9170_vif_info *vif_priv; struct ieee80211_vif *main_vif; mutex_lock(&ar->mutex); vif_priv = (void *) vif->drv_priv; main_vif = carl9170_get_main_vif(ar); if (WARN_ON(!main_vif)) goto out; if (changed & BSS_CHANGED_BEACON_ENABLED) { struct carl9170_vif_info *iter; int i = 0; vif_priv->enable_beacon = bss_conf->enable_beacon; rcu_read_lock(); list_for_each_entry_rcu(iter, &ar->vif_list, list) { if (iter->active && iter->enable_beacon) i++; } rcu_read_unlock(); ar->beacon_enabled = i; } if (changed & BSS_CHANGED_BEACON) { err = carl9170_update_beacon(ar, false); if (err) goto out; } if (changed & (BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_INT)) { if (main_vif != vif) { bss_conf->beacon_int = main_vif->bss_conf.beacon_int; bss_conf->dtim_period = main_vif->bss_conf.dtim_period; } /* * Therefore a hard limit for the broadcast traffic should * prevent false alarms. */ if (vif->type != NL80211_IFTYPE_STATION && (bss_conf->beacon_int * bss_conf->dtim_period >= (CARL9170_QUEUE_STUCK_TIMEOUT / 2))) { err = -EINVAL; goto out; } err = carl9170_set_beacon_timers(ar); if (err) goto out; } if (changed & BSS_CHANGED_HT) { /* TODO */ err = 0; if (err) goto out; } if (main_vif != vif) goto out; /* * The following settings can only be changed by the * master interface. */ if (changed & BSS_CHANGED_BSSID) { memcpy(common->curbssid, bss_conf->bssid, ETH_ALEN); err = carl9170_set_operating_mode(ar); if (err) goto out; } if (changed & BSS_CHANGED_ASSOC) { ar->common.curaid = vif->cfg.aid; err = carl9170_set_beacon_timers(ar); if (err) goto out; } if (changed & BSS_CHANGED_ERP_SLOT) { err = carl9170_set_slot_time(ar); if (err) goto out; } if (changed & BSS_CHANGED_BASIC_RATES) { err = carl9170_set_mac_rates(ar); if (err) goto out; } out: WARN_ON_ONCE(err && IS_STARTED(ar)); mutex_unlock(&ar->mutex); } static u64 carl9170_op_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ar9170 *ar = hw->priv; struct carl9170_tsf_rsp tsf; int err; mutex_lock(&ar->mutex); err = carl9170_exec_cmd(ar, CARL9170_CMD_READ_TSF, 0, NULL, sizeof(tsf), &tsf); mutex_unlock(&ar->mutex); if (WARN_ON(err)) return 0; return le64_to_cpu(tsf.tsf_64); } static int carl9170_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct ieee80211_key_conf *key) { struct ar9170 *ar = hw->priv; int err = 0, i; u8 ktype; if (ar->disable_offload || !vif) return -EOPNOTSUPP; /* Fall back to software encryption whenever the driver is connected * to more than one network. * * This is very unfortunate, because some machines cannot handle * the high througput speed in 802.11n networks. */ if (!is_main_vif(ar, vif)) { mutex_lock(&ar->mutex); goto err_softw; } /* * While the hardware supports *catch-all* key, for offloading * group-key en-/de-cryption. The way of how the hardware * decides which keyId maps to which key, remains a mystery... */ if ((vif->type != NL80211_IFTYPE_STATION && vif->type != NL80211_IFTYPE_ADHOC) && !(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) return -EOPNOTSUPP; switch (key->cipher) { case WLAN_CIPHER_SUITE_WEP40: ktype = AR9170_ENC_ALG_WEP64; break; case WLAN_CIPHER_SUITE_WEP104: ktype = AR9170_ENC_ALG_WEP128; break; case WLAN_CIPHER_SUITE_TKIP: ktype = AR9170_ENC_ALG_TKIP; break; case WLAN_CIPHER_SUITE_CCMP: ktype = AR9170_ENC_ALG_AESCCMP; key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX; break; default: return -EOPNOTSUPP; } mutex_lock(&ar->mutex); if (cmd == SET_KEY) { if (!IS_STARTED(ar)) { err = -EOPNOTSUPP; goto out; } if (!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) { sta = NULL; i = 64 + key->keyidx; } else { for (i = 0; i < 64; i++) if (!(ar->usedkeys & BIT(i))) break; if (i == 64) goto err_softw; } key->hw_key_idx = i; err = carl9170_upload_key(ar, i, sta ? sta->addr : NULL, ktype, 0, key->key, min_t(u8, 16, key->keylen)); if (err) goto out; if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { err = carl9170_upload_key(ar, i, sta ? sta->addr : NULL, ktype, 1, key->key + 16, 16); if (err) goto out; /* * hardware is not capable generating MMIC * of fragmented frames! */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; } if (i < 64) ar->usedkeys |= BIT(i); key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; } else { if (!IS_STARTED(ar)) { /* The device is gone... together with the key ;-) */ err = 0; goto out; } if (key->hw_key_idx < 64) { ar->usedkeys &= ~BIT(key->hw_key_idx); } else { err = carl9170_upload_key(ar, key->hw_key_idx, NULL, AR9170_ENC_ALG_NONE, 0, NULL, 0); if (err) goto out; if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { err = carl9170_upload_key(ar, key->hw_key_idx, NULL, AR9170_ENC_ALG_NONE, 1, NULL, 0); if (err) goto out; } } err = carl9170_disable_key(ar, key->hw_key_idx); if (err) goto out; } out: mutex_unlock(&ar->mutex); return err; err_softw: if (!ar->rx_software_decryption) { ar->rx_software_decryption = true; carl9170_set_operating_mode(ar); } mutex_unlock(&ar->mutex); return -ENOSPC; } static int carl9170_op_sta_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { struct carl9170_sta_info *sta_info = (void *) sta->drv_priv; unsigned int i; atomic_set(&sta_info->pending_frames, 0); if (sta->deflink.ht_cap.ht_supported) { if (sta->deflink.ht_cap.ampdu_density > 6) { /* * HW does support 16us AMPDU density. * No HT-Xmit for station. */ return 0; } for (i = 0; i < ARRAY_SIZE(sta_info->agg); i++) RCU_INIT_POINTER(sta_info->agg[i], NULL); sta_info->ampdu_max_len = 1 << (3 + sta->deflink.ht_cap.ampdu_factor); sta_info->ht_sta = true; } return 0; } static int carl9170_op_sta_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { struct ar9170 *ar = hw->priv; struct carl9170_sta_info *sta_info = (void *) sta->drv_priv; unsigned int i; bool cleanup = false; if (sta->deflink.ht_cap.ht_supported) { sta_info->ht_sta = false; rcu_read_lock(); for (i = 0; i < ARRAY_SIZE(sta_info->agg); i++) { struct carl9170_sta_tid *tid_info; tid_info = rcu_dereference(sta_info->agg[i]); RCU_INIT_POINTER(sta_info->agg[i], NULL); if (!tid_info) continue; spin_lock_bh(&ar->tx_ampdu_list_lock); if (tid_info->state > CARL9170_TID_STATE_SHUTDOWN) tid_info->state = CARL9170_TID_STATE_SHUTDOWN; spin_unlock_bh(&ar->tx_ampdu_list_lock); cleanup = true; } rcu_read_unlock(); if (cleanup) carl9170_ampdu_gc(ar); } return 0; } static int carl9170_op_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, unsigned int link_id, u16 queue, const struct ieee80211_tx_queue_params *param) { struct ar9170 *ar = hw->priv; int ret; mutex_lock(&ar->mutex); memcpy(&ar->edcf[ar9170_qmap(queue)], param, sizeof(*param)); ret = carl9170_set_qos(ar); mutex_unlock(&ar->mutex); return ret; } static void carl9170_ampdu_work(struct work_struct *work) { struct ar9170 *ar = container_of(work, struct ar9170, ampdu_work); if (!IS_STARTED(ar)) return; mutex_lock(&ar->mutex); carl9170_ampdu_gc(ar); mutex_unlock(&ar->mutex); } static int carl9170_op_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_ampdu_params *params) { struct ieee80211_sta *sta = params->sta; enum ieee80211_ampdu_mlme_action action = params->action; u16 tid = params->tid; u16 *ssn = &params->ssn; struct ar9170 *ar = hw->priv; struct carl9170_sta_info *sta_info = (void *) sta->drv_priv; struct carl9170_sta_tid *tid_info; if (modparam_noht) return -EOPNOTSUPP; switch (action) { case IEEE80211_AMPDU_TX_START: if (!sta_info->ht_sta) return -EOPNOTSUPP; tid_info = kzalloc(sizeof(struct carl9170_sta_tid), GFP_KERNEL); if (!tid_info) return -ENOMEM; tid_info->hsn = tid_info->bsn = tid_info->snx = (*ssn); tid_info->state = CARL9170_TID_STATE_PROGRESS; tid_info->tid = tid; tid_info->max = sta_info->ampdu_max_len; tid_info->sta = sta; tid_info->vif = vif; INIT_LIST_HEAD(&tid_info->list); INIT_LIST_HEAD(&tid_info->tmp_list); skb_queue_head_init(&tid_info->queue); spin_lock_init(&tid_info->lock); spin_lock_bh(&ar->tx_ampdu_list_lock); ar->tx_ampdu_list_len++; list_add_tail_rcu(&tid_info->list, &ar->tx_ampdu_list); rcu_assign_pointer(sta_info->agg[tid], tid_info); spin_unlock_bh(&ar->tx_ampdu_list_lock); return IEEE80211_AMPDU_TX_START_IMMEDIATE; case IEEE80211_AMPDU_TX_STOP_CONT: case IEEE80211_AMPDU_TX_STOP_FLUSH: case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: rcu_read_lock(); tid_info = rcu_dereference(sta_info->agg[tid]); if (tid_info) { spin_lock_bh(&ar->tx_ampdu_list_lock); if (tid_info->state > CARL9170_TID_STATE_SHUTDOWN) tid_info->state = CARL9170_TID_STATE_SHUTDOWN; spin_unlock_bh(&ar->tx_ampdu_list_lock); } RCU_INIT_POINTER(sta_info->agg[tid], NULL); rcu_read_unlock(); ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); ieee80211_queue_work(ar->hw, &ar->ampdu_work); break; case IEEE80211_AMPDU_TX_OPERATIONAL: rcu_read_lock(); tid_info = rcu_dereference(sta_info->agg[tid]); sta_info->stats[tid].clear = true; sta_info->stats[tid].req = false; if (tid_info) { bitmap_zero(tid_info->bitmap, CARL9170_BAW_SIZE); tid_info->state = CARL9170_TID_STATE_IDLE; } rcu_read_unlock(); if (WARN_ON_ONCE(!tid_info)) return -EFAULT; break; case IEEE80211_AMPDU_RX_START: case IEEE80211_AMPDU_RX_STOP: /* Handled by hardware */ break; default: return -EOPNOTSUPP; } return 0; } #ifdef CONFIG_CARL9170_WPC static int carl9170_register_wps_button(struct ar9170 *ar) { struct input_dev *input; int err; if (!(ar->features & CARL9170_WPS_BUTTON)) return 0; input = devm_input_allocate_device(&ar->udev->dev); if (!input) return -ENOMEM; snprintf(ar->wps.name, sizeof(ar->wps.name), "%s WPS Button", wiphy_name(ar->hw->wiphy)); snprintf(ar->wps.phys, sizeof(ar->wps.phys), "ieee80211/%s/input0", wiphy_name(ar->hw->wiphy)); input->name = ar->wps.name; input->phys = ar->wps.phys; input->id.bustype = BUS_USB; input->dev.parent = &ar->hw->wiphy->dev; input_set_capability(input, EV_KEY, KEY_WPS_BUTTON); err = input_register_device(input); if (err) return err; ar->wps.pbc = input; return 0; } #endif /* CONFIG_CARL9170_WPC */ #ifdef CONFIG_CARL9170_HWRNG static int carl9170_rng_get(struct ar9170 *ar) { #define RW (CARL9170_MAX_CMD_PAYLOAD_LEN / sizeof(u32)) #define RB (CARL9170_MAX_CMD_PAYLOAD_LEN) static const __le32 rng_load[RW] = { [0 ... (RW - 1)] = cpu_to_le32(AR9170_RAND_REG_NUM)}; u32 buf[RW]; unsigned int i, off = 0, transfer, count; int err; BUILD_BUG_ON(RB > CARL9170_MAX_CMD_PAYLOAD_LEN); if (!IS_ACCEPTING_CMD(ar)) return -EAGAIN; count = ARRAY_SIZE(ar->rng.cache); while (count) { err = carl9170_exec_cmd(ar, CARL9170_CMD_RREG, RB, (u8 *) rng_load, RB, (u8 *) buf); if (err) return err; transfer = min_t(unsigned int, count, RW); for (i = 0; i < transfer; i++) ar->rng.cache[off + i] = buf[i]; off += transfer; count -= transfer; } ar->rng.cache_idx = 0; #undef RW #undef RB return 0; } static int carl9170_rng_read(struct hwrng *rng, u32 *data) { struct ar9170 *ar = (struct ar9170 *)rng->priv; int ret = -EIO; mutex_lock(&ar->mutex); if (ar->rng.cache_idx >= ARRAY_SIZE(ar->rng.cache)) { ret = carl9170_rng_get(ar); if (ret) { mutex_unlock(&ar->mutex); return ret; } } *data = ar->rng.cache[ar->rng.cache_idx++]; mutex_unlock(&ar->mutex); return sizeof(u16); } static int carl9170_register_hwrng(struct ar9170 *ar) { int err; snprintf(ar->rng.name, ARRAY_SIZE(ar->rng.name), "%s_%s", KBUILD_MODNAME, wiphy_name(ar->hw->wiphy)); ar->rng.rng.name = ar->rng.name; ar->rng.rng.data_read = carl9170_rng_read; ar->rng.rng.priv = (unsigned long)ar; err = devm_hwrng_register(&ar->udev->dev, &ar->rng.rng); if (err) { dev_err(&ar->udev->dev, "Failed to register the random " "number generator (%d)\n", err); return err; } return carl9170_rng_get(ar); } #endif /* CONFIG_CARL9170_HWRNG */ static int carl9170_op_get_survey(struct ieee80211_hw *hw, int idx, struct survey_info *survey) { struct ar9170 *ar = hw->priv; struct ieee80211_channel *chan; struct ieee80211_supported_band *band; int err, b, i; chan = ar->channel; if (!chan) return -ENODEV; if (idx == chan->hw_value) { mutex_lock(&ar->mutex); err = carl9170_update_survey(ar, false, true); mutex_unlock(&ar->mutex); if (err) return err; } for (b = 0; b < NUM_NL80211_BANDS; b++) { band = ar->hw->wiphy->bands[b]; if (!band) continue; for (i = 0; i < band->n_channels; i++) { if (band->channels[i].hw_value == idx) { chan = &band->channels[i]; goto found; } } } return -ENOENT; found: memcpy(survey, &ar->survey[idx], sizeof(*survey)); survey->channel = chan; survey->filled = SURVEY_INFO_NOISE_DBM; if (ar->channel == chan) survey->filled |= SURVEY_INFO_IN_USE; if (ar->fw.hw_counters) { survey->filled |= SURVEY_INFO_TIME | SURVEY_INFO_TIME_BUSY | SURVEY_INFO_TIME_TX; } return 0; } static void carl9170_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, bool drop) { struct ar9170 *ar = hw->priv; unsigned int vid; mutex_lock(&ar->mutex); for_each_set_bit(vid, &ar->vif_bitmap, ar->fw.vif_num) carl9170_flush_cab(ar, vid); carl9170_flush(ar, drop); mutex_unlock(&ar->mutex); } static int carl9170_op_get_stats(struct ieee80211_hw *hw, struct ieee80211_low_level_stats *stats) { struct ar9170 *ar = hw->priv; memset(stats, 0, sizeof(*stats)); stats->dot11ACKFailureCount = ar->tx_ack_failures; stats->dot11FCSErrorCount = ar->tx_fcs_errors; return 0; } static void carl9170_op_sta_notify(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd cmd, struct ieee80211_sta *sta) { struct carl9170_sta_info *sta_info = (void *) sta->drv_priv; switch (cmd) { case STA_NOTIFY_SLEEP: sta_info->sleeping = true; if (atomic_read(&sta_info->pending_frames)) ieee80211_sta_block_awake(hw, sta, true); break; case STA_NOTIFY_AWAKE: sta_info->sleeping = false; break; } } static bool carl9170_tx_frames_pending(struct ieee80211_hw *hw) { struct ar9170 *ar = hw->priv; return !!atomic_read(&ar->tx_total_queued); } static const struct ieee80211_ops carl9170_ops = { .add_chanctx = ieee80211_emulate_add_chanctx, .remove_chanctx = ieee80211_emulate_remove_chanctx, .change_chanctx = ieee80211_emulate_change_chanctx, .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .start = carl9170_op_start, .stop = carl9170_op_stop, .tx = carl9170_op_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .flush = carl9170_op_flush, .add_interface = carl9170_op_add_interface, .remove_interface = carl9170_op_remove_interface, .config = carl9170_op_config, .prepare_multicast = carl9170_op_prepare_multicast, .configure_filter = carl9170_op_configure_filter, .conf_tx = carl9170_op_conf_tx, .bss_info_changed = carl9170_op_bss_info_changed, .get_tsf = carl9170_op_get_tsf, .set_key = carl9170_op_set_key, .sta_add = carl9170_op_sta_add, .sta_remove = carl9170_op_sta_remove, .sta_notify = carl9170_op_sta_notify, .get_survey = carl9170_op_get_survey, .get_stats = carl9170_op_get_stats, .ampdu_action = carl9170_op_ampdu_action, .tx_frames_pending = carl9170_tx_frames_pending, }; void *carl9170_alloc(size_t priv_size) { struct ieee80211_hw *hw; struct ar9170 *ar; struct sk_buff *skb; int i; /* * this buffer is used for rx stream reconstruction. * Under heavy load this device (or the transport layer?) * tends to split the streams into separate rx descriptors. */ skb = __dev_alloc_skb(AR9170_RX_STREAM_MAX_SIZE, GFP_KERNEL); if (!skb) goto err_nomem; hw = ieee80211_alloc_hw(priv_size, &carl9170_ops); if (!hw) goto err_nomem; ar = hw->priv; ar->hw = hw; ar->rx_failover = skb; memset(&ar->rx_plcp, 0, sizeof(struct ar9170_rx_head)); ar->rx_has_plcp = false; /* * Here's a hidden pitfall! * * All 4 AC queues work perfectly well under _legacy_ operation. * However as soon as aggregation is enabled, the traffic flow * gets very bumpy. Therefore we have to _switch_ to a * software AC with a single HW queue. */ hw->queues = __AR9170_NUM_TXQ; mutex_init(&ar->mutex); spin_lock_init(&ar->beacon_lock); spin_lock_init(&ar->cmd_lock); spin_lock_init(&ar->tx_stats_lock); spin_lock_init(&ar->tx_ampdu_list_lock); spin_lock_init(&ar->mem_lock); spin_lock_init(&ar->state_lock); atomic_set(&ar->pending_restarts, 0); ar->vifs = 0; for (i = 0; i < ar->hw->queues; i++) { skb_queue_head_init(&ar->tx_status[i]); skb_queue_head_init(&ar->tx_pending[i]); INIT_LIST_HEAD(&ar->bar_list[i]); spin_lock_init(&ar->bar_list_lock[i]); } INIT_WORK(&ar->ps_work, carl9170_ps_work); INIT_WORK(&ar->ping_work, carl9170_ping_work); INIT_WORK(&ar->restart_work, carl9170_restart_work); INIT_WORK(&ar->ampdu_work, carl9170_ampdu_work); INIT_DELAYED_WORK(&ar->stat_work, carl9170_stat_work); INIT_DELAYED_WORK(&ar->tx_janitor, carl9170_tx_janitor); INIT_LIST_HEAD(&ar->tx_ampdu_list); rcu_assign_pointer(ar->tx_ampdu_iter, (struct carl9170_sta_tid *) &ar->tx_ampdu_list); bitmap_zero(&ar->vif_bitmap, ar->fw.vif_num); INIT_LIST_HEAD(&ar->vif_list); init_completion(&ar->tx_flush); /* firmware decides which modes we support */ hw->wiphy->interface_modes = 0; ieee80211_hw_set(hw, RX_INCLUDES_FCS); ieee80211_hw_set(hw, MFP_CAPABLE); ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); ieee80211_hw_set(hw, SUPPORTS_PS); ieee80211_hw_set(hw, PS_NULLFUNC_STACK); ieee80211_hw_set(hw, NEED_DTIM_BEFORE_ASSOC); ieee80211_hw_set(hw, SUPPORTS_RC_TABLE); ieee80211_hw_set(hw, SIGNAL_DBM); ieee80211_hw_set(hw, SUPPORTS_HT_CCK_RATES); if (!modparam_noht) { /* * see the comment above, why we allow the user * to disable HT by a module parameter. */ ieee80211_hw_set(hw, AMPDU_AGGREGATION); } hw->extra_tx_headroom = sizeof(struct _carl9170_tx_superframe); hw->sta_data_size = sizeof(struct carl9170_sta_info); hw->vif_data_size = sizeof(struct carl9170_vif_info); hw->max_rates = CARL9170_TX_MAX_RATES; hw->max_rate_tries = CARL9170_TX_USER_RATE_TRIES; for (i = 0; i < ARRAY_SIZE(ar->noise); i++) ar->noise[i] = -95; /* ATH_DEFAULT_NOISE_FLOOR */ wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); return ar; err_nomem: kfree_skb(skb); return ERR_PTR(-ENOMEM); } static int carl9170_read_eeprom(struct ar9170 *ar) { #define RW 8 /* number of words to read at once */ #define RB (sizeof(u32) * RW) u8 *eeprom = (void *)&ar->eeprom; __le32 offsets[RW]; int i, j, err; BUILD_BUG_ON(sizeof(ar->eeprom) & 3); BUILD_BUG_ON(RB > CARL9170_MAX_CMD_LEN - 4); #ifndef __CHECKER__ /* don't want to handle trailing remains */ BUILD_BUG_ON(sizeof(ar->eeprom) % RB); #endif for (i = 0; i < sizeof(ar->eeprom) / RB; i++) { for (j = 0; j < RW; j++) offsets[j] = cpu_to_le32(AR9170_EEPROM_START + RB * i + 4 * j); err = carl9170_exec_cmd(ar, CARL9170_CMD_RREG, RB, (u8 *) &offsets, RB, eeprom + RB * i); if (err) return err; } #undef RW #undef RB return 0; } static int carl9170_parse_eeprom(struct ar9170 *ar) { struct ath_regulatory *regulatory = &ar->common.regulatory; unsigned int rx_streams, tx_streams, tx_params = 0; int bands = 0; int chans = 0; if (ar->eeprom.length == cpu_to_le16(0xffff)) return -ENODATA; rx_streams = hweight8(ar->eeprom.rx_mask); tx_streams = hweight8(ar->eeprom.tx_mask); if (rx_streams != tx_streams) { tx_params = IEEE80211_HT_MCS_TX_RX_DIFF; WARN_ON(!(tx_streams >= 1 && tx_streams <= IEEE80211_HT_MCS_TX_MAX_STREAMS)); tx_params |= (tx_streams - 1) << IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT; carl9170_band_2GHz.ht_cap.mcs.tx_params |= tx_params; carl9170_band_5GHz.ht_cap.mcs.tx_params |= tx_params; } if (ar->eeprom.operating_flags & AR9170_OPFLAG_2GHZ) { ar->hw->wiphy->bands[NL80211_BAND_2GHZ] = &carl9170_band_2GHz; chans += carl9170_band_2GHz.n_channels; bands++; } if (ar->eeprom.operating_flags & AR9170_OPFLAG_5GHZ) { ar->hw->wiphy->bands[NL80211_BAND_5GHZ] = &carl9170_band_5GHz; chans += carl9170_band_5GHz.n_channels; bands++; } if (!bands) return -EINVAL; ar->survey = devm_kcalloc(&ar->udev->dev, chans, sizeof(struct survey_info), GFP_KERNEL); if (!ar->survey) return -ENOMEM; ar->num_channels = chans; regulatory->current_rd = le16_to_cpu(ar->eeprom.reg_domain[0]); /* second part of wiphy init */ SET_IEEE80211_PERM_ADDR(ar->hw, ar->eeprom.mac_address); return 0; } static void carl9170_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request) { struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy); struct ar9170 *ar = hw->priv; ath_reg_notifier_apply(wiphy, request, &ar->common.regulatory); } int carl9170_register(struct ar9170 *ar) { struct ath_regulatory *regulatory = &ar->common.regulatory; int err = 0, i; ar->mem_bitmap = devm_bitmap_zalloc(&ar->udev->dev, ar->fw.mem_blocks, GFP_KERNEL); if (!ar->mem_bitmap) return -ENOMEM; /* try to read EEPROM, init MAC addr */ err = carl9170_read_eeprom(ar); if (err) return err; err = carl9170_parse_eeprom(ar); if (err) return err; err = ath_regd_init(regulatory, ar->hw->wiphy, carl9170_reg_notifier); if (err) return err; if (modparam_noht) { carl9170_band_2GHz.ht_cap.ht_supported = false; carl9170_band_5GHz.ht_cap.ht_supported = false; } for (i = 0; i < ar->fw.vif_num; i++) { ar->vif_priv[i].id = i; ar->vif_priv[i].vif = NULL; } err = ieee80211_register_hw(ar->hw); if (err) return err; /* mac80211 interface is now registered */ ar->registered = true; if (!ath_is_world_regd(regulatory)) regulatory_hint(ar->hw->wiphy, regulatory->alpha2); #ifdef CONFIG_CARL9170_DEBUGFS carl9170_debugfs_register(ar); #endif /* CONFIG_CARL9170_DEBUGFS */ err = carl9170_led_init(ar); if (err) goto err_unreg; #ifdef CONFIG_CARL9170_LEDS err = carl9170_led_register(ar); if (err) goto err_unreg; #endif /* CONFIG_CARL9170_LEDS */ #ifdef CONFIG_CARL9170_WPC err = carl9170_register_wps_button(ar); if (err) goto err_unreg; #endif /* CONFIG_CARL9170_WPC */ #ifdef CONFIG_CARL9170_HWRNG err = carl9170_register_hwrng(ar); if (err) goto err_unreg; #endif /* CONFIG_CARL9170_HWRNG */ dev_info(&ar->udev->dev, "Atheros AR9170 is registered as '%s'\n", wiphy_name(ar->hw->wiphy)); return 0; err_unreg: carl9170_unregister(ar); return err; } void carl9170_unregister(struct ar9170 *ar) { if (!ar->registered) return; ar->registered = false; #ifdef CONFIG_CARL9170_LEDS carl9170_led_unregister(ar); #endif /* CONFIG_CARL9170_LEDS */ #ifdef CONFIG_CARL9170_DEBUGFS carl9170_debugfs_unregister(ar); #endif /* CONFIG_CARL9170_DEBUGFS */ carl9170_cancel_worker(ar); cancel_work_sync(&ar->restart_work); ieee80211_unregister_hw(ar->hw); } void carl9170_free(struct ar9170 *ar) { WARN_ON(ar->registered); WARN_ON(IS_INITIALIZED(ar)); kfree_skb(ar->rx_failover); ar->rx_failover = NULL; mutex_destroy(&ar->mutex); ieee80211_free_hw(ar->hw); }
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM oom #if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_OOM_H #include <linux/tracepoint.h> #include <trace/events/mmflags.h> #define PG_COUNT_TO_KB(x) ((x) << (PAGE_SHIFT - 10)) TRACE_EVENT(oom_score_adj_update, TP_PROTO(struct task_struct *task), TP_ARGS(task), TP_STRUCT__entry( __field( pid_t, pid) __array( char, comm, TASK_COMM_LEN ) __field( short, oom_score_adj) ), TP_fast_assign( __entry->pid = task->pid; memcpy(__entry->comm, task->comm, TASK_COMM_LEN); __entry->oom_score_adj = task->signal->oom_score_adj; ), TP_printk("pid=%d comm=%s oom_score_adj=%hd", __entry->pid, __entry->comm, __entry->oom_score_adj) ); TRACE_EVENT(reclaim_retry_zone, TP_PROTO(struct zoneref *zoneref, int order, unsigned long reclaimable, unsigned long available, unsigned long min_wmark, int no_progress_loops, bool wmark_check), TP_ARGS(zoneref, order, reclaimable, available, min_wmark, no_progress_loops, wmark_check), TP_STRUCT__entry( __field( int, node) __field( int, zone_idx) __field( int, order) __field( unsigned long, reclaimable) __field( unsigned long, available) __field( unsigned long, min_wmark) __field( int, no_progress_loops) __field( bool, wmark_check) ), TP_fast_assign( __entry->node = zonelist_node_idx(zoneref); __entry->zone_idx = zonelist_zone_idx(zoneref); __entry->order = order; __entry->reclaimable = reclaimable; __entry->available = available; __entry->min_wmark = min_wmark; __entry->no_progress_loops = no_progress_loops; __entry->wmark_check = wmark_check; ), TP_printk("node=%d zone=%-8s order=%d reclaimable=%lu available=%lu min_wmark=%lu no_progress_loops=%d wmark_check=%d", __entry->node, __print_symbolic(__entry->zone_idx, ZONE_TYPE), __entry->order, __entry->reclaimable, __entry->available, __entry->min_wmark, __entry->no_progress_loops, __entry->wmark_check) ); TRACE_EVENT(mark_victim, TP_PROTO(struct task_struct *task, uid_t uid), TP_ARGS(task, uid), TP_STRUCT__entry( __field(int, pid) __string(comm, task->comm) __field(unsigned long, total_vm) __field(unsigned long, anon_rss) __field(unsigned long, file_rss) __field(unsigned long, shmem_rss) __field(uid_t, uid) __field(unsigned long, pgtables) __field(short, oom_score_adj) ), TP_fast_assign( __entry->pid = task->pid; __assign_str(comm); __entry->total_vm = PG_COUNT_TO_KB(task->mm->total_vm); __entry->anon_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_ANONPAGES)); __entry->file_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_FILEPAGES)); __entry->shmem_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_SHMEMPAGES)); __entry->uid = uid; __entry->pgtables = mm_pgtables_bytes(task->mm) >> 10; __entry->oom_score_adj = task->signal->oom_score_adj; ), TP_printk("pid=%d comm=%s total-vm=%lukB anon-rss=%lukB file-rss:%lukB shmem-rss:%lukB uid=%u pgtables=%lukB oom_score_adj=%hd", __entry->pid, __get_str(comm), __entry->total_vm, __entry->anon_rss, __entry->file_rss, __entry->shmem_rss, __entry->uid, __entry->pgtables, __entry->oom_score_adj ) ); TRACE_EVENT(wake_reaper, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(start_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(finish_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(skip_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); #ifdef CONFIG_COMPACTION TRACE_EVENT(compact_retry, TP_PROTO(int order, enum compact_priority priority, enum compact_result result, int retries, int max_retries, bool ret), TP_ARGS(order, priority, result, retries, max_retries, ret), TP_STRUCT__entry( __field( int, order) __field( int, priority) __field( int, result) __field( int, retries) __field( int, max_retries) __field( bool, ret) ), TP_fast_assign( __entry->order = order; __entry->priority = priority; __entry->result = compact_result_to_feedback(result); __entry->retries = retries; __entry->max_retries = max_retries; __entry->ret = ret; ), TP_printk("order=%d priority=%s compaction_result=%s retries=%d max_retries=%d should_retry=%d", __entry->order, __print_symbolic(__entry->priority, COMPACTION_PRIORITY), __print_symbolic(__entry->result, COMPACTION_FEEDBACK), __entry->retries, __entry->max_retries, __entry->ret) ); #endif /* CONFIG_COMPACTION */ #endif /* This part must be outside protection */ #include <trace/define_trace.h>
69 3 66 2 66 4 65 38 37 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/ip.h> #include <linux/sctp.h> #include <net/ip.h> #include <net/ip6_checksum.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <net/sctp/checksum.h> #include <net/ip_vs.h> static int sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); static int sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { struct ip_vs_service *svc; struct sctp_chunkhdr _schunkh, *sch; struct sctphdr *sh, _sctph; __be16 _ports[2], *ports = NULL; if (likely(!ip_vs_iph_icmp(iph))) { sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); if (sh) { sch = skb_header_pointer(skb, iph->len + sizeof(_sctph), sizeof(_schunkh), &_schunkh); if (sch) { if (sch->type == SCTP_CID_ABORT || !(sysctl_sloppy_sctp(ipvs) || sch->type == SCTP_CID_INIT)) return 1; ports = &sh->source; } } } else { ports = skb_header_pointer( skb, iph->len, sizeof(_ports), &_ports); } if (!ports) { *verdict = NF_DROP; return 0; } if (likely(!ip_vs_iph_inverse(iph))) svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->daddr, ports[1]); else svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->saddr, ports[0]); if (svc) { int ignored; if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( */ *verdict = NF_DROP; return 0; } /* * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd, iph); else *verdict = NF_DROP; return 0; } } /* NF_ACCEPT */ return 1; } static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph, unsigned int sctphoff) { sctph->checksum = sctp_compute_cksum(skb, sctphoff); skb->ip_summed = CHECKSUM_UNNECESSARY; } static int sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct sctphdr *sctph; unsigned int sctphoff = iph->len; bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) return 1; #endif /* csum_check requires unshared skb */ if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph))) return 0; if (unlikely(cp->app != NULL)) { int ret; /* Some checks before mangling */ if (!sctp_csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ ret = ip_vs_app_pkt_out(cp, skb, iph); if (ret == 0) return 0; /* ret=2: csum update is needed after payload mangling */ if (ret == 2) payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; /* Only update csum if we really have to */ if (sctph->source != cp->vport || payload_csum || skb->ip_summed == CHECKSUM_PARTIAL) { sctph->source = cp->vport; if (!skb_is_gso(skb)) sctp_nat_csum(skb, sctph, sctphoff); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } static int sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct sctphdr *sctph; unsigned int sctphoff = iph->len; bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) return 1; #endif /* csum_check requires unshared skb */ if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph))) return 0; if (unlikely(cp->app != NULL)) { int ret; /* Some checks before mangling */ if (!sctp_csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ ret = ip_vs_app_pkt_in(cp, skb, iph); if (ret == 0) return 0; /* ret=2: csum update is needed after payload mangling */ if (ret == 2) payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; /* Only update csum if we really have to */ if (sctph->dest != cp->dport || payload_csum || (skb->ip_summed == CHECKSUM_PARTIAL && !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; if (!skb_is_gso(skb)) sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } static int sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) { unsigned int sctphoff; struct sctphdr *sh; __le32 cmp, val; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) sctphoff = sizeof(struct ipv6hdr); else #endif sctphoff = ip_hdrlen(skb); sh = (struct sctphdr *)(skb->data + sctphoff); cmp = sh->checksum; val = sctp_compute_cksum(skb, sctphoff); if (val != cmp) { /* CRC failure, dump it. */ IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } return 1; } enum ipvs_sctp_event_t { IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */ IP_VS_SCTP_INIT, IP_VS_SCTP_INIT_ACK, IP_VS_SCTP_COOKIE_ECHO, IP_VS_SCTP_COOKIE_ACK, IP_VS_SCTP_SHUTDOWN, IP_VS_SCTP_SHUTDOWN_ACK, IP_VS_SCTP_SHUTDOWN_COMPLETE, IP_VS_SCTP_ERROR, IP_VS_SCTP_ABORT, IP_VS_SCTP_EVENT_LAST }; /* RFC 2960, 3.2 Chunk Field Descriptions */ static __u8 sctp_events[] = { [SCTP_CID_DATA] = IP_VS_SCTP_DATA, [SCTP_CID_INIT] = IP_VS_SCTP_INIT, [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK, [SCTP_CID_SACK] = IP_VS_SCTP_DATA, [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA, [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA, [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT, [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN, [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK, [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR, [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO, [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK, [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA, [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA, [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE, }; /* SCTP States: * See RFC 2960, 4. SCTP Association State Diagram * * New states (not in diagram): * - INIT1 state: use shorter timeout for dropped INIT packets * - REJECTED state: use shorter timeout if INIT is rejected with ABORT * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging * * The states are as seen in real server. In the diagram, INIT1, INIT, * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state. * * States as per packets from client (C) and server (S): * * Setup of client connection: * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK * * Setup of server connection: * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK */ #define sNO IP_VS_SCTP_S_NONE #define sI1 IP_VS_SCTP_S_INIT1 #define sIN IP_VS_SCTP_S_INIT #define sCS IP_VS_SCTP_S_COOKIE_SENT #define sCR IP_VS_SCTP_S_COOKIE_REPLIED #define sCW IP_VS_SCTP_S_COOKIE_WAIT #define sCO IP_VS_SCTP_S_COOKIE #define sCE IP_VS_SCTP_S_COOKIE_ECHOED #define sES IP_VS_SCTP_S_ESTABLISHED #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT #define sRJ IP_VS_SCTP_S_REJECTED #define sCL IP_VS_SCTP_S_CLOSED static const __u8 sctp_states [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = { { /* INPUT */ /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL}, /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL}, /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL}, /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL}, /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, }, { /* OUTPUT */ /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW}, /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, /* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL}, /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL}, /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, }, { /* INPUT-ONLY */ /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ /* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL}, /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL}, /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, }, }; #define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ) /* Timeout table[state] */ static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { [IP_VS_SCTP_S_NONE] = 2 * HZ, [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ, [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO, [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO, [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO, [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO, [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO, [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO, [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO, [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO, [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO, [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ, [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO, [IP_VS_SCTP_S_LAST] = 2 * HZ, }; static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { [IP_VS_SCTP_S_NONE] = "NONE", [IP_VS_SCTP_S_INIT1] = "INIT1", [IP_VS_SCTP_S_INIT] = "INIT", [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT", [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED", [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT", [IP_VS_SCTP_S_COOKIE] = "COOKIE", [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED", [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED", [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT", [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED", [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT", [IP_VS_SCTP_S_REJECTED] = "REJECTED", [IP_VS_SCTP_S_CLOSED] = "CLOSED", [IP_VS_SCTP_S_LAST] = "BUG!", }; static const char *sctp_state_name(int state) { if (state >= IP_VS_SCTP_S_LAST) return "ERR!"; if (sctp_state_name_table[state]) return sctp_state_name_table[state]; return "?"; } static inline void set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, const struct sk_buff *skb) { struct sctp_chunkhdr _sctpch, *sch; unsigned char chunk_type; int event, next_state; int ihl, cofs; #ifdef CONFIG_IP_VS_IPV6 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); #else ihl = ip_hdrlen(skb); #endif cofs = ihl + sizeof(struct sctphdr); sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); if (sch == NULL) return; chunk_type = sch->type; /* * Section 3: Multiple chunks can be bundled into one SCTP packet * up to the MTU size, except for the INIT, INIT ACK, and * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with * any other chunk in a packet. * * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be * bundled with an ABORT, but they MUST be placed before the ABORT * in the SCTP packet or they will be ignored by the receiver. */ if ((sch->type == SCTP_CID_COOKIE_ECHO) || (sch->type == SCTP_CID_COOKIE_ACK)) { int clen = ntohs(sch->length); if (clen >= sizeof(_sctpch)) { sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), sizeof(_sctpch), &_sctpch); if (sch && sch->type == SCTP_CID_ABORT) chunk_type = sch->type; } } event = (chunk_type < sizeof(sctp_events)) ? sctp_events[chunk_type] : IP_VS_SCTP_DATA; /* Update direction to INPUT_ONLY if necessary * or delete NO_OUTPUT flag if output packet detected */ if (cp->flags & IP_VS_CONN_F_NOOUTPUT) { if (direction == IP_VS_DIR_OUTPUT) cp->flags &= ~IP_VS_CONN_F_NOOUTPUT; else direction = IP_VS_DIR_INPUT_ONLY; } next_state = sctp_states[direction][event][cp->state]; if (next_state != cp->state) { struct ip_vs_dest *dest = cp->dest; IP_VS_DBG_BUF(8, "%s %s %s:%d->" "%s:%d state: %s->%s conn->refcnt:%d\n", pd->pp->name, ((direction == IP_VS_DIR_OUTPUT) ? "output " : "input "), IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), sctp_state_name(cp->state), sctp_state_name(next_state), refcount_read(&cp->refcnt)); if (dest) { if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && (next_state != IP_VS_SCTP_S_ESTABLISHED)) { atomic_dec(&dest->activeconns); atomic_inc(&dest->inactconns); cp->flags |= IP_VS_CONN_F_INACTIVE; } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && (next_state == IP_VS_SCTP_S_ESTABLISHED)) { atomic_inc(&dest->activeconns); atomic_dec(&dest->inactconns); cp->flags &= ~IP_VS_CONN_F_INACTIVE; } } if (next_state == IP_VS_SCTP_S_ESTABLISHED) ip_vs_control_assure_ct(cp); } if (likely(pd)) cp->timeout = pd->timeout_table[cp->state = next_state]; else /* What to do ? */ cp->timeout = sctp_timeouts[cp->state = next_state]; } static void sctp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) { spin_lock_bh(&cp->lock); set_sctp_state(pd, cp, direction, skb); spin_unlock_bh(&cp->lock); } static inline __u16 sctp_app_hashkey(__be16 port) { return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) & SCTP_APP_TAB_MASK; } static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP); hash = sctp_app_hashkey(port); list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]); atomic_inc(&pd->appcnt); out: return ret; } static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP); atomic_dec(&pd->appcnt); list_del_rcu(&inc->p_list); } static int sctp_app_conn_bind(struct ip_vs_conn *cp) { struct netns_ipvs *ipvs = cp->ipvs; int hash; struct ip_vs_app *inc; int result = 0; /* Default binding: bind app only for NAT */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) return 0; /* Lookup application incarnations and bind the right one */ hash = sctp_app_hashkey(cp->vport); list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" "%s:%u to app %s on port %u\n", __func__, IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), inc->name, ntohs(inc->port)); cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); break; } } return result; } /* --------------------------------------------- * timeouts is netns related now. * --------------------------------------------- */ static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); if (!pd->timeout_table) return -ENOMEM; return 0; } static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { kfree(pd->timeout_table); } struct ip_vs_protocol ip_vs_protocol_sctp = { .name = "SCTP", .protocol = IPPROTO_SCTP, .num_states = IP_VS_SCTP_S_LAST, .dont_defrag = 0, .init = NULL, .exit = NULL, .init_netns = __ip_vs_sctp_init, .exit_netns = __ip_vs_sctp_exit, .register_app = sctp_register_app, .unregister_app = sctp_unregister_app, .conn_schedule = sctp_conn_schedule, .conn_in_get = ip_vs_conn_in_get_proto, .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = sctp_snat_handler, .dnat_handler = sctp_dnat_handler, .state_name = sctp_state_name, .state_transition = sctp_state_transition, .app_conn_bind = sctp_app_conn_bind, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, };
1 16 16 1 1 1 1 2 2 2 2 16 16 15 15 15 15 13 16 7 1 1 1 1 3 3 3 3 8 1 2 1 1 1 1 1 1 1 1 1 3 2 1 3 2 1 3 16 16 5 7 6 3 1 5 5 5 5 5 7 7 7 7 6 6 3 5 4 4 4 4 4 4 4 4 6 4 4 4 1 1 1 1 1 1 4 15 2 2 11 14 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 // SPDX-License-Identifier: GPL-2.0 /* Watch queue and general notification mechanism, built on pipes * * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * See Documentation/core-api/watch_queue.rst */ #define pr_fmt(fmt) "watchq: " fmt #include <linux/module.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/printk.h> #include <linux/miscdevice.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/poll.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/file.h> #include <linux/security.h> #include <linux/cred.h> #include <linux/sched/signal.h> #include <linux/watch_queue.h> #include <linux/pipe_fs_i.h> MODULE_DESCRIPTION("Watch queue"); MODULE_AUTHOR("Red Hat, Inc."); #define WATCH_QUEUE_NOTE_SIZE 128 #define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE) /* * This must be called under the RCU read-lock, which makes * sure that the wqueue still exists. It can then take the lock, * and check that the wqueue hasn't been destroyed, which in * turn makes sure that the notification pipe still exists. */ static inline bool lock_wqueue(struct watch_queue *wqueue) { spin_lock_bh(&wqueue->lock); if (unlikely(!wqueue->pipe)) { spin_unlock_bh(&wqueue->lock); return false; } return true; } static inline void unlock_wqueue(struct watch_queue *wqueue) { spin_unlock_bh(&wqueue->lock); } static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct watch_queue *wqueue = (struct watch_queue *)buf->private; struct page *page; unsigned int bit; /* We need to work out which note within the page this refers to, but * the note might have been maximum size, so merely ANDing the offset * off doesn't work. OTOH, the note must've been more than zero size. */ bit = buf->offset + buf->len; if ((bit & (WATCH_QUEUE_NOTE_SIZE - 1)) == 0) bit -= WATCH_QUEUE_NOTE_SIZE; bit /= WATCH_QUEUE_NOTE_SIZE; page = buf->page; bit += page->private; set_bit(bit, wqueue->notes_bitmap); generic_pipe_buf_release(pipe, buf); } // No try_steal function => no stealing #define watch_queue_pipe_buf_try_steal NULL /* New data written to a pipe may be appended to a buffer with this type. */ static const struct pipe_buf_operations watch_queue_pipe_buf_ops = { .release = watch_queue_pipe_buf_release, .try_steal = watch_queue_pipe_buf_try_steal, .get = generic_pipe_buf_get, }; /* * Post a notification to a watch queue. * * Must be called with the RCU lock for reading, and the * watch_queue lock held, which guarantees that the pipe * hasn't been released. */ static bool post_one_notification(struct watch_queue *wqueue, struct watch_notification *n) { void *p; struct pipe_inode_info *pipe = wqueue->pipe; struct pipe_buffer *buf; struct page *page; unsigned int head, tail, note, offset, len; bool done = false; spin_lock_irq(&pipe->rd_wait.lock); head = pipe->head; tail = pipe->tail; if (pipe_full(head, tail, pipe->ring_size)) goto lost; note = find_first_bit(wqueue->notes_bitmap, wqueue->nr_notes); if (note >= wqueue->nr_notes) goto lost; page = wqueue->notes[note / WATCH_QUEUE_NOTES_PER_PAGE]; offset = note % WATCH_QUEUE_NOTES_PER_PAGE * WATCH_QUEUE_NOTE_SIZE; get_page(page); len = n->info & WATCH_INFO_LENGTH; p = kmap_atomic(page); memcpy(p + offset, n, len); kunmap_atomic(p); buf = pipe_buf(pipe, head); buf->page = page; buf->private = (unsigned long)wqueue; buf->ops = &watch_queue_pipe_buf_ops; buf->offset = offset; buf->len = len; buf->flags = PIPE_BUF_FLAG_WHOLE; smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */ if (!test_and_clear_bit(note, wqueue->notes_bitmap)) { spin_unlock_irq(&pipe->rd_wait.lock); BUG(); } wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); done = true; out: spin_unlock_irq(&pipe->rd_wait.lock); if (done) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); return done; lost: buf = pipe_buf(pipe, head - 1); buf->flags |= PIPE_BUF_FLAG_LOSS; goto out; } /* * Apply filter rules to a notification. */ static bool filter_watch_notification(const struct watch_filter *wf, const struct watch_notification *n) { const struct watch_type_filter *wt; unsigned int st_bits = sizeof(wt->subtype_filter[0]) * 8; unsigned int st_index = n->subtype / st_bits; unsigned int st_bit = 1U << (n->subtype % st_bits); int i; if (!test_bit(n->type, wf->type_filter)) return false; for (i = 0; i < wf->nr_filters; i++) { wt = &wf->filters[i]; if (n->type == wt->type && (wt->subtype_filter[st_index] & st_bit) && (n->info & wt->info_mask) == wt->info_filter) return true; } return false; /* If there is a filter, the default is to reject. */ } /** * __post_watch_notification - Post an event notification * @wlist: The watch list to post the event to. * @n: The notification record to post. * @cred: The creds of the process that triggered the notification. * @id: The ID to match on the watch. * * Post a notification of an event into a set of watch queues and let the users * know. * * The size of the notification should be set in n->info & WATCH_INFO_LENGTH and * should be in units of sizeof(*n). */ void __post_watch_notification(struct watch_list *wlist, struct watch_notification *n, const struct cred *cred, u64 id) { const struct watch_filter *wf; struct watch_queue *wqueue; struct watch *watch; if (((n->info & WATCH_INFO_LENGTH) >> WATCH_INFO_LENGTH__SHIFT) == 0) { WARN_ON(1); return; } rcu_read_lock(); hlist_for_each_entry_rcu(watch, &wlist->watchers, list_node) { if (watch->id != id) continue; n->info &= ~WATCH_INFO_ID; n->info |= watch->info_id; wqueue = rcu_dereference(watch->queue); wf = rcu_dereference(wqueue->filter); if (wf && !filter_watch_notification(wf, n)) continue; if (security_post_notification(watch->cred, cred, n) < 0) continue; if (lock_wqueue(wqueue)) { post_one_notification(wqueue, n); unlock_wqueue(wqueue); } } rcu_read_unlock(); } EXPORT_SYMBOL(__post_watch_notification); /* * Allocate sufficient pages to preallocation for the requested number of * notifications. */ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes) { struct watch_queue *wqueue = pipe->watch_queue; struct page **pages; unsigned long *bitmap; unsigned long user_bufs; int ret, i, nr_pages; if (!wqueue) return -ENODEV; if (wqueue->notes) return -EBUSY; if (nr_notes < 1 || nr_notes > 512) /* TODO: choose a better hard limit */ return -EINVAL; nr_pages = (nr_notes + WATCH_QUEUE_NOTES_PER_PAGE - 1); nr_pages /= WATCH_QUEUE_NOTES_PER_PAGE; user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_pages); if (nr_pages > pipe->max_usage && (too_many_pipe_buffers_hard(user_bufs) || too_many_pipe_buffers_soft(user_bufs)) && pipe_is_unprivileged_user()) { ret = -EPERM; goto error; } nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE; ret = pipe_resize_ring(pipe, roundup_pow_of_two(nr_notes)); if (ret < 0) goto error; /* * pipe_resize_ring() does not update nr_accounted for watch_queue * pipes, because the above vastly overprovisions. Set nr_accounted on * and max_usage this pipe to the number that was actually charged to * the user above via account_pipe_buffers. */ pipe->max_usage = nr_pages; pipe->nr_accounted = nr_pages; ret = -ENOMEM; pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); if (!pages) goto error; for (i = 0; i < nr_pages; i++) { pages[i] = alloc_page(GFP_KERNEL); if (!pages[i]) goto error_p; pages[i]->private = i * WATCH_QUEUE_NOTES_PER_PAGE; } bitmap = bitmap_alloc(nr_notes, GFP_KERNEL); if (!bitmap) goto error_p; bitmap_fill(bitmap, nr_notes); wqueue->notes = pages; wqueue->notes_bitmap = bitmap; wqueue->nr_pages = nr_pages; wqueue->nr_notes = nr_notes; return 0; error_p: while (--i >= 0) __free_page(pages[i]); kfree(pages); error: (void) account_pipe_buffers(pipe->user, nr_pages, pipe->nr_accounted); return ret; } /* * Set the filter on a watch queue. */ long watch_queue_set_filter(struct pipe_inode_info *pipe, struct watch_notification_filter __user *_filter) { struct watch_notification_type_filter *tf; struct watch_notification_filter filter; struct watch_type_filter *q; struct watch_filter *wfilter; struct watch_queue *wqueue = pipe->watch_queue; int ret, nr_filter = 0, i; if (!wqueue) return -ENODEV; if (!_filter) { /* Remove the old filter */ wfilter = NULL; goto set; } /* Grab the user's filter specification */ if (copy_from_user(&filter, _filter, sizeof(filter)) != 0) return -EFAULT; if (filter.nr_filters == 0 || filter.nr_filters > 16 || filter.__reserved != 0) return -EINVAL; tf = memdup_array_user(_filter->filters, filter.nr_filters, sizeof(*tf)); if (IS_ERR(tf)) return PTR_ERR(tf); ret = -EINVAL; for (i = 0; i < filter.nr_filters; i++) { if ((tf[i].info_filter & ~tf[i].info_mask) || tf[i].info_mask & WATCH_INFO_LENGTH) goto err_filter; /* Ignore any unknown types */ if (tf[i].type >= WATCH_TYPE__NR) continue; nr_filter++; } /* Now we need to build the internal filter from only the relevant * user-specified filters. */ ret = -ENOMEM; wfilter = kzalloc(struct_size(wfilter, filters, nr_filter), GFP_KERNEL); if (!wfilter) goto err_filter; wfilter->nr_filters = nr_filter; q = wfilter->filters; for (i = 0; i < filter.nr_filters; i++) { if (tf[i].type >= WATCH_TYPE__NR) continue; q->type = tf[i].type; q->info_filter = tf[i].info_filter; q->info_mask = tf[i].info_mask; q->subtype_filter[0] = tf[i].subtype_filter[0]; __set_bit(q->type, wfilter->type_filter); q++; } kfree(tf); set: pipe_lock(pipe); wfilter = rcu_replace_pointer(wqueue->filter, wfilter, lockdep_is_held(&pipe->mutex)); pipe_unlock(pipe); if (wfilter) kfree_rcu(wfilter, rcu); return 0; err_filter: kfree(tf); return ret; } static void __put_watch_queue(struct kref *kref) { struct watch_queue *wqueue = container_of(kref, struct watch_queue, usage); struct watch_filter *wfilter; int i; for (i = 0; i < wqueue->nr_pages; i++) __free_page(wqueue->notes[i]); kfree(wqueue->notes); bitmap_free(wqueue->notes_bitmap); wfilter = rcu_access_pointer(wqueue->filter); if (wfilter) kfree_rcu(wfilter, rcu); kfree_rcu(wqueue, rcu); } /** * put_watch_queue - Dispose of a ref on a watchqueue. * @wqueue: The watch queue to unref. */ void put_watch_queue(struct watch_queue *wqueue) { kref_put(&wqueue->usage, __put_watch_queue); } EXPORT_SYMBOL(put_watch_queue); static void free_watch(struct rcu_head *rcu) { struct watch *watch = container_of(rcu, struct watch, rcu); put_watch_queue(rcu_access_pointer(watch->queue)); atomic_dec(&watch->cred->user->nr_watches); put_cred(watch->cred); kfree(watch); } static void __put_watch(struct kref *kref) { struct watch *watch = container_of(kref, struct watch, usage); call_rcu(&watch->rcu, free_watch); } /* * Discard a watch. */ static void put_watch(struct watch *watch) { kref_put(&watch->usage, __put_watch); } /** * init_watch - Initialise a watch * @watch: The watch to initialise. * @wqueue: The queue to assign. * * Initialise a watch and set the watch queue. */ void init_watch(struct watch *watch, struct watch_queue *wqueue) { kref_init(&watch->usage); INIT_HLIST_NODE(&watch->list_node); INIT_HLIST_NODE(&watch->queue_node); rcu_assign_pointer(watch->queue, wqueue); } static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue) { const struct cred *cred; struct watch *w; hlist_for_each_entry(w, &wlist->watchers, list_node) { struct watch_queue *wq = rcu_access_pointer(w->queue); if (wqueue == wq && watch->id == w->id) return -EBUSY; } cred = current_cred(); if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) { atomic_dec(&cred->user->nr_watches); return -EAGAIN; } watch->cred = get_cred(cred); rcu_assign_pointer(watch->watch_list, wlist); kref_get(&wqueue->usage); kref_get(&watch->usage); hlist_add_head(&watch->queue_node, &wqueue->watches); hlist_add_head_rcu(&watch->list_node, &wlist->watchers); return 0; } /** * add_watch_to_object - Add a watch on an object to a watch list * @watch: The watch to add * @wlist: The watch list to add to * * @watch->queue must have been set to point to the queue to post notifications * to and the watch list of the object to be watched. @watch->cred must also * have been set to the appropriate credentials and a ref taken on them. * * The caller must pin the queue and the list both and must hold the list * locked against racing watch additions/removals. */ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) { struct watch_queue *wqueue; int ret = -ENOENT; rcu_read_lock(); wqueue = rcu_access_pointer(watch->queue); if (lock_wqueue(wqueue)) { spin_lock(&wlist->lock); ret = add_one_watch(watch, wlist, wqueue); spin_unlock(&wlist->lock); unlock_wqueue(wqueue); } rcu_read_unlock(); return ret; } EXPORT_SYMBOL(add_watch_to_object); /** * remove_watch_from_object - Remove a watch or all watches from an object. * @wlist: The watch list to remove from * @wq: The watch queue of interest (ignored if @all is true) * @id: The ID of the watch to remove (ignored if @all is true) * @all: True to remove all objects * * Remove a specific watch or all watches from an object. A notification is * sent to the watcher to tell them that this happened. */ int remove_watch_from_object(struct watch_list *wlist, struct watch_queue *wq, u64 id, bool all) { struct watch_notification_removal n; struct watch_queue *wqueue; struct watch *watch; int ret = -EBADSLT; rcu_read_lock(); again: spin_lock(&wlist->lock); hlist_for_each_entry(watch, &wlist->watchers, list_node) { if (all || (watch->id == id && rcu_access_pointer(watch->queue) == wq)) goto found; } spin_unlock(&wlist->lock); goto out; found: ret = 0; hlist_del_init_rcu(&watch->list_node); rcu_assign_pointer(watch->watch_list, NULL); spin_unlock(&wlist->lock); /* We now own the reference on watch that used to belong to wlist. */ n.watch.type = WATCH_TYPE_META; n.watch.subtype = WATCH_META_REMOVAL_NOTIFICATION; n.watch.info = watch->info_id | watch_sizeof(n.watch); n.id = id; if (id != 0) n.watch.info = watch->info_id | watch_sizeof(n); wqueue = rcu_dereference(watch->queue); if (lock_wqueue(wqueue)) { post_one_notification(wqueue, &n.watch); if (!hlist_unhashed(&watch->queue_node)) { hlist_del_init_rcu(&watch->queue_node); put_watch(watch); } unlock_wqueue(wqueue); } if (wlist->release_watch) { void (*release_watch)(struct watch *); release_watch = wlist->release_watch; rcu_read_unlock(); (*release_watch)(watch); rcu_read_lock(); } put_watch(watch); if (all && !hlist_empty(&wlist->watchers)) goto again; out: rcu_read_unlock(); return ret; } EXPORT_SYMBOL(remove_watch_from_object); /* * Remove all the watches that are contributory to a queue. This has the * potential to race with removal of the watches by the destruction of the * objects being watched or with the distribution of notifications. */ void watch_queue_clear(struct watch_queue *wqueue) { struct watch_list *wlist; struct watch *watch; bool release; rcu_read_lock(); spin_lock_bh(&wqueue->lock); /* * This pipe can be freed by callers like free_pipe_info(). * Removing this reference also prevents new notifications. */ wqueue->pipe = NULL; while (!hlist_empty(&wqueue->watches)) { watch = hlist_entry(wqueue->watches.first, struct watch, queue_node); hlist_del_init_rcu(&watch->queue_node); /* We now own a ref on the watch. */ spin_unlock_bh(&wqueue->lock); /* We can't do the next bit under the queue lock as we need to * get the list lock - which would cause a deadlock if someone * was removing from the opposite direction at the same time or * posting a notification. */ wlist = rcu_dereference(watch->watch_list); if (wlist) { void (*release_watch)(struct watch *); spin_lock(&wlist->lock); release = !hlist_unhashed(&watch->list_node); if (release) { hlist_del_init_rcu(&watch->list_node); rcu_assign_pointer(watch->watch_list, NULL); /* We now own a second ref on the watch. */ } release_watch = wlist->release_watch; spin_unlock(&wlist->lock); if (release) { if (release_watch) { rcu_read_unlock(); /* This might need to call dput(), so * we have to drop all the locks. */ (*release_watch)(watch); rcu_read_lock(); } put_watch(watch); } } put_watch(watch); spin_lock_bh(&wqueue->lock); } spin_unlock_bh(&wqueue->lock); rcu_read_unlock(); } /** * get_watch_queue - Get a watch queue from its file descriptor. * @fd: The fd to query. */ struct watch_queue *get_watch_queue(int fd) { struct pipe_inode_info *pipe; struct watch_queue *wqueue = ERR_PTR(-EINVAL); CLASS(fd, f)(fd); if (!fd_empty(f)) { pipe = get_pipe_info(fd_file(f), false); if (pipe && pipe->watch_queue) { wqueue = pipe->watch_queue; kref_get(&wqueue->usage); } } return wqueue; } EXPORT_SYMBOL(get_watch_queue); /* * Initialise a watch queue */ int watch_queue_init(struct pipe_inode_info *pipe) { struct watch_queue *wqueue; wqueue = kzalloc(sizeof(*wqueue), GFP_KERNEL); if (!wqueue) return -ENOMEM; wqueue->pipe = pipe; kref_init(&wqueue->usage); spin_lock_init(&wqueue->lock); INIT_HLIST_HEAD(&wqueue->watches); pipe->watch_queue = wqueue; return 0; }
2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 // SPDX-License-Identifier: GPL-2.0 // // Register map access API - I2C support // // Copyright 2011 Wolfson Microelectronics plc // // Author: Mark Brown <broonie@opensource.wolfsonmicro.com> #include <linux/regmap.h> #include <linux/i2c.h> #include <linux/module.h> #include "internal.h" static int regmap_smbus_byte_reg_read(void *context, unsigned int reg, unsigned int *val) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); int ret; if (reg > 0xff) return -EINVAL; ret = i2c_smbus_read_byte_data(i2c, reg); if (ret < 0) return ret; *val = ret; return 0; } static int regmap_smbus_byte_reg_write(void *context, unsigned int reg, unsigned int val) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); if (val > 0xff || reg > 0xff) return -EINVAL; return i2c_smbus_write_byte_data(i2c, reg, val); } static const struct regmap_bus regmap_smbus_byte = { .reg_write = regmap_smbus_byte_reg_write, .reg_read = regmap_smbus_byte_reg_read, }; static int regmap_smbus_word_reg_read(void *context, unsigned int reg, unsigned int *val) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); int ret; if (reg > 0xff) return -EINVAL; ret = i2c_smbus_read_word_data(i2c, reg); if (ret < 0) return ret; *val = ret; return 0; } static int regmap_smbus_word_reg_write(void *context, unsigned int reg, unsigned int val) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); if (val > 0xffff || reg > 0xff) return -EINVAL; return i2c_smbus_write_word_data(i2c, reg, val); } static const struct regmap_bus regmap_smbus_word = { .reg_write = regmap_smbus_word_reg_write, .reg_read = regmap_smbus_word_reg_read, }; static int regmap_smbus_word_read_swapped(void *context, unsigned int reg, unsigned int *val) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); int ret; if (reg > 0xff) return -EINVAL; ret = i2c_smbus_read_word_swapped(i2c, reg); if (ret < 0) return ret; *val = ret; return 0; } static int regmap_smbus_word_write_swapped(void *context, unsigned int reg, unsigned int val) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); if (val > 0xffff || reg > 0xff) return -EINVAL; return i2c_smbus_write_word_swapped(i2c, reg, val); } static const struct regmap_bus regmap_smbus_word_swapped = { .reg_write = regmap_smbus_word_write_swapped, .reg_read = regmap_smbus_word_read_swapped, }; static int regmap_i2c_write(void *context, const void *data, size_t count) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); int ret; ret = i2c_master_send(i2c, data, count); if (ret == count) return 0; else if (ret < 0) return ret; else return -EIO; } static int regmap_i2c_gather_write(void *context, const void *reg, size_t reg_size, const void *val, size_t val_size) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); struct i2c_msg xfer[2]; int ret; /* If the I2C controller can't do a gather tell the core, it * will substitute in a linear write for us. */ if (!i2c_check_functionality(i2c->adapter, I2C_FUNC_NOSTART)) return -ENOTSUPP; xfer[0].addr = i2c->addr; xfer[0].flags = 0; xfer[0].len = reg_size; xfer[0].buf = (void *)reg; xfer[1].addr = i2c->addr; xfer[1].flags = I2C_M_NOSTART; xfer[1].len = val_size; xfer[1].buf = (void *)val; ret = i2c_transfer(i2c->adapter, xfer, 2); if (ret == 2) return 0; if (ret < 0) return ret; else return -EIO; } static int regmap_i2c_read(void *context, const void *reg, size_t reg_size, void *val, size_t val_size) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); struct i2c_msg xfer[2]; int ret; xfer[0].addr = i2c->addr; xfer[0].flags = 0; xfer[0].len = reg_size; xfer[0].buf = (void *)reg; xfer[1].addr = i2c->addr; xfer[1].flags = I2C_M_RD; xfer[1].len = val_size; xfer[1].buf = val; ret = i2c_transfer(i2c->adapter, xfer, 2); if (ret == 2) return 0; else if (ret < 0) return ret; else return -EIO; } static const struct regmap_bus regmap_i2c = { .write = regmap_i2c_write, .gather_write = regmap_i2c_gather_write, .read = regmap_i2c_read, .reg_format_endian_default = REGMAP_ENDIAN_BIG, .val_format_endian_default = REGMAP_ENDIAN_BIG, }; static int regmap_i2c_smbus_i2c_write(void *context, const void *data, size_t count) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); if (count < 1) return -EINVAL; --count; return i2c_smbus_write_i2c_block_data(i2c, ((u8 *)data)[0], count, ((u8 *)data + 1)); } static int regmap_i2c_smbus_i2c_read(void *context, const void *reg, size_t reg_size, void *val, size_t val_size) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); int ret; if (reg_size != 1 || val_size < 1) return -EINVAL; ret = i2c_smbus_read_i2c_block_data(i2c, ((u8 *)reg)[0], val_size, val); if (ret == val_size) return 0; else if (ret < 0) return ret; else return -EIO; } static const struct regmap_bus regmap_i2c_smbus_i2c_block = { .write = regmap_i2c_smbus_i2c_write, .read = regmap_i2c_smbus_i2c_read, .max_raw_read = I2C_SMBUS_BLOCK_MAX - 1, .max_raw_write = I2C_SMBUS_BLOCK_MAX - 1, }; static int regmap_i2c_smbus_i2c_write_reg16(void *context, const void *data, size_t count) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); if (count < 2) return -EINVAL; count--; return i2c_smbus_write_i2c_block_data(i2c, ((u8 *)data)[0], count, (u8 *)data + 1); } static int regmap_i2c_smbus_i2c_read_reg16(void *context, const void *reg, size_t reg_size, void *val, size_t val_size) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); int ret, count, len = val_size; if (reg_size != 2) return -EINVAL; ret = i2c_smbus_write_byte_data(i2c, ((u16 *)reg)[0] & 0xff, ((u16 *)reg)[0] >> 8); if (ret < 0) return ret; count = 0; do { /* Current Address Read */ ret = i2c_smbus_read_byte(i2c); if (ret < 0) break; *((u8 *)val++) = ret; count++; len--; } while (len > 0); if (count == val_size) return 0; else if (ret < 0) return ret; else return -EIO; } static const struct regmap_bus regmap_i2c_smbus_i2c_block_reg16 = { .write = regmap_i2c_smbus_i2c_write_reg16, .read = regmap_i2c_smbus_i2c_read_reg16, .max_raw_read = I2C_SMBUS_BLOCK_MAX - 2, .max_raw_write = I2C_SMBUS_BLOCK_MAX - 2, }; static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c, const struct regmap_config *config) { const struct i2c_adapter_quirks *quirks; const struct regmap_bus *bus = NULL; struct regmap_bus *ret_bus; u16 max_read = 0, max_write = 0; if (i2c_check_functionality(i2c->adapter, I2C_FUNC_I2C)) bus = &regmap_i2c; else if (config->val_bits == 8 && config->reg_bits == 8 && i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) bus = &regmap_i2c_smbus_i2c_block; else if (config->val_bits == 8 && config->reg_bits == 16 && i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) bus = &regmap_i2c_smbus_i2c_block_reg16; else if (config->val_bits == 16 && config->reg_bits == 8 && i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_WORD_DATA)) switch (regmap_get_val_endian(&i2c->dev, NULL, config)) { case REGMAP_ENDIAN_LITTLE: bus = &regmap_smbus_word; break; case REGMAP_ENDIAN_BIG: bus = &regmap_smbus_word_swapped; break; default: /* everything else is not supported */ break; } else if (config->val_bits == 8 && config->reg_bits == 8 && i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) bus = &regmap_smbus_byte; if (!bus) return ERR_PTR(-ENOTSUPP); quirks = i2c->adapter->quirks; if (quirks) { if (quirks->max_read_len && (bus->max_raw_read == 0 || bus->max_raw_read > quirks->max_read_len)) max_read = quirks->max_read_len; if (quirks->max_write_len && (bus->max_raw_write == 0 || bus->max_raw_write > quirks->max_write_len)) max_write = quirks->max_write_len - (config->reg_bits + config->pad_bits) / BITS_PER_BYTE; if (max_read || max_write) { ret_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL); if (!ret_bus) return ERR_PTR(-ENOMEM); ret_bus->free_on_exit = true; ret_bus->max_raw_read = max_read; ret_bus->max_raw_write = max_write; bus = ret_bus; } } return bus; } struct regmap *__regmap_init_i2c(struct i2c_client *i2c, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name) { const struct regmap_bus *bus = regmap_get_i2c_bus(i2c, config); if (IS_ERR(bus)) return ERR_CAST(bus); return __regmap_init(&i2c->dev, bus, &i2c->dev, config, lock_key, lock_name); } EXPORT_SYMBOL_GPL(__regmap_init_i2c); struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name) { const struct regmap_bus *bus = regmap_get_i2c_bus(i2c, config); if (IS_ERR(bus)) return ERR_CAST(bus); return __devm_regmap_init(&i2c->dev, bus, &i2c->dev, config, lock_key, lock_name); } EXPORT_SYMBOL_GPL(__devm_regmap_init_i2c); MODULE_DESCRIPTION("Register map access API - I2C support"); MODULE_LICENSE("GPL");
3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 /* SPDX-License-Identifier: GPL-2.0 */ /* Based on net/mac80211/trace.h */ #undef TRACE_SYSTEM #define TRACE_SYSTEM mac802154 #if !defined(__MAC802154_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) #define __MAC802154_DRIVER_TRACE #include <linux/tracepoint.h> #include <net/mac802154.h> #include "ieee802154_i.h" #define MAXNAME 32 #define LOCAL_ENTRY __array(char, wpan_phy_name, MAXNAME) #define LOCAL_ASSIGN strscpy(__entry->wpan_phy_name, \ wpan_phy_name(local->hw.phy), MAXNAME) #define LOCAL_PR_FMT "%s" #define LOCAL_PR_ARG __entry->wpan_phy_name #define CCA_ENTRY __field(enum nl802154_cca_modes, cca_mode) \ __field(enum nl802154_cca_opts, cca_opt) #define CCA_ASSIGN \ do { \ (__entry->cca_mode) = cca->mode; \ (__entry->cca_opt) = cca->opt; \ } while (0) #define CCA_PR_FMT "cca_mode: %d, cca_opt: %d" #define CCA_PR_ARG __entry->cca_mode, __entry->cca_opt #define BOOL_TO_STR(bo) (bo) ? "true" : "false" /* Tracing for driver callbacks */ DECLARE_EVENT_CLASS(local_only_evt4, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local), TP_STRUCT__entry( LOCAL_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; ), TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) ); DEFINE_EVENT(local_only_evt4, 802154_drv_return_void, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local) ); TRACE_EVENT(802154_drv_return_int, TP_PROTO(struct ieee802154_local *local, int ret), TP_ARGS(local, ret), TP_STRUCT__entry( LOCAL_ENTRY __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->ret = ret; ), TP_printk(LOCAL_PR_FMT ", returned: %d", LOCAL_PR_ARG, __entry->ret) ); DEFINE_EVENT(local_only_evt4, 802154_drv_start, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local) ); DEFINE_EVENT(local_only_evt4, 802154_drv_stop, TP_PROTO(struct ieee802154_local *local), TP_ARGS(local) ); TRACE_EVENT(802154_drv_set_channel, TP_PROTO(struct ieee802154_local *local, u8 page, u8 channel), TP_ARGS(local, page, channel), TP_STRUCT__entry( LOCAL_ENTRY __field(u8, page) __field(u8, channel) ), TP_fast_assign( LOCAL_ASSIGN; __entry->page = page; __entry->channel = channel; ), TP_printk(LOCAL_PR_FMT ", page: %d, channel: %d", LOCAL_PR_ARG, __entry->page, __entry->channel) ); TRACE_EVENT(802154_drv_set_cca_mode, TP_PROTO(struct ieee802154_local *local, const struct wpan_phy_cca *cca), TP_ARGS(local, cca), TP_STRUCT__entry( LOCAL_ENTRY CCA_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; CCA_ASSIGN; ), TP_printk(LOCAL_PR_FMT ", " CCA_PR_FMT, LOCAL_PR_ARG, CCA_PR_ARG) ); TRACE_EVENT(802154_drv_set_cca_ed_level, TP_PROTO(struct ieee802154_local *local, s32 mbm), TP_ARGS(local, mbm), TP_STRUCT__entry( LOCAL_ENTRY __field(s32, mbm) ), TP_fast_assign( LOCAL_ASSIGN; __entry->mbm = mbm; ), TP_printk(LOCAL_PR_FMT ", ed level: %d", LOCAL_PR_ARG, __entry->mbm) ); TRACE_EVENT(802154_drv_set_tx_power, TP_PROTO(struct ieee802154_local *local, s32 power), TP_ARGS(local, power), TP_STRUCT__entry( LOCAL_ENTRY __field(s32, power) ), TP_fast_assign( LOCAL_ASSIGN; __entry->power = power; ), TP_printk(LOCAL_PR_FMT ", mbm: %d", LOCAL_PR_ARG, __entry->power) ); TRACE_EVENT(802154_drv_set_lbt_mode, TP_PROTO(struct ieee802154_local *local, bool mode), TP_ARGS(local, mode), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, mode) ), TP_fast_assign( LOCAL_ASSIGN; __entry->mode = mode; ), TP_printk(LOCAL_PR_FMT ", lbt mode: %s", LOCAL_PR_ARG, BOOL_TO_STR(__entry->mode)) ); TRACE_EVENT(802154_drv_set_short_addr, TP_PROTO(struct ieee802154_local *local, __le16 short_addr), TP_ARGS(local, short_addr), TP_STRUCT__entry( LOCAL_ENTRY __field(__le16, short_addr) ), TP_fast_assign( LOCAL_ASSIGN; __entry->short_addr = short_addr; ), TP_printk(LOCAL_PR_FMT ", short addr: 0x%04x", LOCAL_PR_ARG, le16_to_cpu(__entry->short_addr)) ); TRACE_EVENT(802154_drv_set_pan_id, TP_PROTO(struct ieee802154_local *local, __le16 pan_id), TP_ARGS(local, pan_id), TP_STRUCT__entry( LOCAL_ENTRY __field(__le16, pan_id) ), TP_fast_assign( LOCAL_ASSIGN; __entry->pan_id = pan_id; ), TP_printk(LOCAL_PR_FMT ", pan id: 0x%04x", LOCAL_PR_ARG, le16_to_cpu(__entry->pan_id)) ); TRACE_EVENT(802154_drv_set_extended_addr, TP_PROTO(struct ieee802154_local *local, __le64 extended_addr), TP_ARGS(local, extended_addr), TP_STRUCT__entry( LOCAL_ENTRY __field(__le64, extended_addr) ), TP_fast_assign( LOCAL_ASSIGN; __entry->extended_addr = extended_addr; ), TP_printk(LOCAL_PR_FMT ", extended addr: 0x%llx", LOCAL_PR_ARG, le64_to_cpu(__entry->extended_addr)) ); TRACE_EVENT(802154_drv_set_pan_coord, TP_PROTO(struct ieee802154_local *local, bool is_coord), TP_ARGS(local, is_coord), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, is_coord) ), TP_fast_assign( LOCAL_ASSIGN; __entry->is_coord = is_coord; ), TP_printk(LOCAL_PR_FMT ", is_coord: %s", LOCAL_PR_ARG, BOOL_TO_STR(__entry->is_coord)) ); TRACE_EVENT(802154_drv_set_csma_params, TP_PROTO(struct ieee802154_local *local, u8 min_be, u8 max_be, u8 max_csma_backoffs), TP_ARGS(local, min_be, max_be, max_csma_backoffs), TP_STRUCT__entry( LOCAL_ENTRY __field(u8, min_be) __field(u8, max_be) __field(u8, max_csma_backoffs) ), TP_fast_assign( LOCAL_ASSIGN, __entry->min_be = min_be; __entry->max_be = max_be; __entry->max_csma_backoffs = max_csma_backoffs; ), TP_printk(LOCAL_PR_FMT ", min be: %d, max be: %d, max csma backoffs: %d", LOCAL_PR_ARG, __entry->min_be, __entry->max_be, __entry->max_csma_backoffs) ); TRACE_EVENT(802154_drv_set_max_frame_retries, TP_PROTO(struct ieee802154_local *local, s8 max_frame_retries), TP_ARGS(local, max_frame_retries), TP_STRUCT__entry( LOCAL_ENTRY __field(s8, max_frame_retries) ), TP_fast_assign( LOCAL_ASSIGN; __entry->max_frame_retries = max_frame_retries; ), TP_printk(LOCAL_PR_FMT ", max frame retries: %d", LOCAL_PR_ARG, __entry->max_frame_retries) ); TRACE_EVENT(802154_drv_set_promiscuous_mode, TP_PROTO(struct ieee802154_local *local, bool on), TP_ARGS(local, on), TP_STRUCT__entry( LOCAL_ENTRY __field(bool, on) ), TP_fast_assign( LOCAL_ASSIGN; __entry->on = on; ), TP_printk(LOCAL_PR_FMT ", promiscuous mode: %s", LOCAL_PR_ARG, BOOL_TO_STR(__entry->on)) ); TRACE_EVENT(802154_new_scan_event, TP_PROTO(struct ieee802154_coord_desc *desc), TP_ARGS(desc), TP_STRUCT__entry( __field(__le16, pan_id) __field(__le64, addr) __field(u8, channel) __field(u8, page) ), TP_fast_assign( __entry->page = desc->page; __entry->channel = desc->channel; __entry->pan_id = desc->addr.pan_id; __entry->addr = desc->addr.extended_addr; ), TP_printk("panid: %u, coord_addr: 0x%llx, page: %u, channel: %u", __le16_to_cpu(__entry->pan_id), __le64_to_cpu(__entry->addr), __entry->page, __entry->channel) ); DEFINE_EVENT(802154_new_scan_event, 802154_scan_event, TP_PROTO(struct ieee802154_coord_desc *desc), TP_ARGS(desc) ); #endif /* !__MAC802154_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h>
2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 // SPDX-License-Identifier: GPL-2.0-or-later /* * Surface2.0/SUR40/PixelSense input driver * * Copyright (c) 2014 by Florian 'floe' Echtler <floe@butterbrot.org> * * Derived from the USB Skeleton driver 1.1, * Copyright (c) 2003 Greg Kroah-Hartman (greg@kroah.com) * * and from the Apple USB BCM5974 multitouch driver, * Copyright (c) 2008 Henrik Rydberg (rydberg@euromail.se) * * and from the generic hid-multitouch driver, * Copyright (c) 2010-2012 Stephane Chatty <chatty@enac.fr> * * and from the v4l2-pci-skeleton driver, * Copyright (c) Copyright 2014 Cisco Systems, Inc. */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/completion.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/printk.h> #include <linux/input.h> #include <linux/input/mt.h> #include <linux/usb/input.h> #include <linux/videodev2.h> #include <media/v4l2-device.h> #include <media/v4l2-dev.h> #include <media/v4l2-ioctl.h> #include <media/v4l2-ctrls.h> #include <media/videobuf2-v4l2.h> #include <media/videobuf2-dma-sg.h> /* read 512 bytes from endpoint 0x86 -> get header + blobs */ struct sur40_header { __le16 type; /* always 0x0001 */ __le16 count; /* count of blobs (if 0: continue prev. packet) */ __le32 packet_id; /* unique ID for all packets in one frame */ __le32 timestamp; /* milliseconds (inc. by 16 or 17 each frame) */ __le32 unknown; /* "epoch?" always 02/03 00 00 00 */ } __packed; struct sur40_blob { __le16 blob_id; u8 action; /* 0x02 = enter/exit, 0x03 = update (?) */ u8 type; /* bitmask (0x01 blob, 0x02 touch, 0x04 tag) */ __le16 bb_pos_x; /* upper left corner of bounding box */ __le16 bb_pos_y; __le16 bb_size_x; /* size of bounding box */ __le16 bb_size_y; __le16 pos_x; /* finger tip position */ __le16 pos_y; __le16 ctr_x; /* centroid position */ __le16 ctr_y; __le16 axis_x; /* somehow related to major/minor axis, mostly: */ __le16 axis_y; /* axis_x == bb_size_y && axis_y == bb_size_x */ __le32 angle; /* orientation in radians relative to x axis - actually an IEEE754 float, don't use in kernel */ __le32 area; /* size in pixels/pressure (?) */ u8 padding[24]; __le32 tag_id; /* valid when type == 0x04 (SUR40_TAG) */ __le32 unknown; } __packed; /* combined header/blob data */ struct sur40_data { struct sur40_header header; struct sur40_blob blobs[]; } __packed; /* read 512 bytes from endpoint 0x82 -> get header below * continue reading 16k blocks until header.size bytes read */ struct sur40_image_header { __le32 magic; /* "SUBF" */ __le32 packet_id; __le32 size; /* always 0x0007e900 = 960x540 */ __le32 timestamp; /* milliseconds (increases by 16 or 17 each frame) */ __le32 unknown; /* "epoch?" always 02/03 00 00 00 */ } __packed; /* version information */ #define DRIVER_SHORT "sur40" #define DRIVER_LONG "Samsung SUR40" #define DRIVER_AUTHOR "Florian 'floe' Echtler <floe@butterbrot.org>" #define DRIVER_DESC "Surface2.0/SUR40/PixelSense input driver" /* vendor and device IDs */ #define ID_MICROSOFT 0x045e #define ID_SUR40 0x0775 /* sensor resolution */ #define SENSOR_RES_X 1920 #define SENSOR_RES_Y 1080 /* touch data endpoint */ #define TOUCH_ENDPOINT 0x86 /* video data endpoint */ #define VIDEO_ENDPOINT 0x82 /* video header fields */ #define VIDEO_HEADER_MAGIC 0x46425553 #define VIDEO_PACKET_SIZE 16384 /* polling interval (ms) */ #define POLL_INTERVAL 1 /* maximum number of contacts FIXME: this is a guess? */ #define MAX_CONTACTS 64 /* control commands */ #define SUR40_GET_VERSION 0xb0 /* 12 bytes string */ #define SUR40_ACCEL_CAPS 0xb3 /* 5 bytes */ #define SUR40_SENSOR_CAPS 0xc1 /* 24 bytes */ #define SUR40_POKE 0xc5 /* poke register byte */ #define SUR40_PEEK 0xc4 /* 48 bytes registers */ #define SUR40_GET_STATE 0xc5 /* 4 bytes state (?) */ #define SUR40_GET_SENSORS 0xb1 /* 8 bytes sensors */ #define SUR40_BLOB 0x01 #define SUR40_TOUCH 0x02 #define SUR40_TAG 0x04 /* video controls */ #define SUR40_BRIGHTNESS_MAX 0xff #define SUR40_BRIGHTNESS_MIN 0x00 #define SUR40_BRIGHTNESS_DEF 0xff #define SUR40_CONTRAST_MAX 0x0f #define SUR40_CONTRAST_MIN 0x00 #define SUR40_CONTRAST_DEF 0x0a #define SUR40_GAIN_MAX 0x09 #define SUR40_GAIN_MIN 0x00 #define SUR40_GAIN_DEF 0x08 #define SUR40_BACKLIGHT_MAX 0x01 #define SUR40_BACKLIGHT_MIN 0x00 #define SUR40_BACKLIGHT_DEF 0x01 #define sur40_str(s) #s #define SUR40_PARAM_RANGE(lo, hi) " (range " sur40_str(lo) "-" sur40_str(hi) ")" /* module parameters */ static uint brightness = SUR40_BRIGHTNESS_DEF; module_param(brightness, uint, 0644); MODULE_PARM_DESC(brightness, "set initial brightness" SUR40_PARAM_RANGE(SUR40_BRIGHTNESS_MIN, SUR40_BRIGHTNESS_MAX)); static uint contrast = SUR40_CONTRAST_DEF; module_param(contrast, uint, 0644); MODULE_PARM_DESC(contrast, "set initial contrast" SUR40_PARAM_RANGE(SUR40_CONTRAST_MIN, SUR40_CONTRAST_MAX)); static uint gain = SUR40_GAIN_DEF; module_param(gain, uint, 0644); MODULE_PARM_DESC(gain, "set initial gain" SUR40_PARAM_RANGE(SUR40_GAIN_MIN, SUR40_GAIN_MAX)); static const struct v4l2_pix_format sur40_pix_format[] = { { .pixelformat = V4L2_TCH_FMT_TU08, .width = SENSOR_RES_X / 2, .height = SENSOR_RES_Y / 2, .field = V4L2_FIELD_NONE, .colorspace = V4L2_COLORSPACE_RAW, .bytesperline = SENSOR_RES_X / 2, .sizeimage = (SENSOR_RES_X/2) * (SENSOR_RES_Y/2), }, { .pixelformat = V4L2_PIX_FMT_GREY, .width = SENSOR_RES_X / 2, .height = SENSOR_RES_Y / 2, .field = V4L2_FIELD_NONE, .colorspace = V4L2_COLORSPACE_RAW, .bytesperline = SENSOR_RES_X / 2, .sizeimage = (SENSOR_RES_X/2) * (SENSOR_RES_Y/2), } }; /* master device state */ struct sur40_state { struct usb_device *usbdev; struct device *dev; struct input_dev *input; struct v4l2_device v4l2; struct video_device vdev; struct mutex lock; struct v4l2_pix_format pix_fmt; struct v4l2_ctrl_handler hdl; struct vb2_queue queue; struct list_head buf_list; spinlock_t qlock; int sequence; struct sur40_data *bulk_in_buffer; size_t bulk_in_size; u8 bulk_in_epaddr; u8 vsvideo; char phys[64]; }; struct sur40_buffer { struct vb2_v4l2_buffer vb; struct list_head list; }; /* forward declarations */ static const struct video_device sur40_video_device; static const struct vb2_queue sur40_queue; static void sur40_process_video(struct sur40_state *sur40); static int sur40_s_ctrl(struct v4l2_ctrl *ctrl); static const struct v4l2_ctrl_ops sur40_ctrl_ops = { .s_ctrl = sur40_s_ctrl, }; /* * Note: an earlier, non-public version of this driver used USB_RECIP_ENDPOINT * here by mistake which is very likely to have corrupted the firmware EEPROM * on two separate SUR40 devices. Thanks to Alan Stern who spotted this bug. * Should you ever run into a similar problem, the background story to this * incident and instructions on how to fix the corrupted EEPROM are available * at https://floe.butterbrot.org/matrix/hacking/surface/brick.html */ /* command wrapper */ static int sur40_command(struct sur40_state *dev, u8 command, u16 index, void *buffer, u16 size) { return usb_control_msg(dev->usbdev, usb_rcvctrlpipe(dev->usbdev, 0), command, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x00, index, buffer, size, 1000); } /* poke a byte in the panel register space */ static int sur40_poke(struct sur40_state *dev, u8 offset, u8 value) { int result; u8 index = 0x96; // 0xae for permanent write result = usb_control_msg(dev->usbdev, usb_sndctrlpipe(dev->usbdev, 0), SUR40_POKE, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0x32, index, NULL, 0, 1000); if (result < 0) goto error; msleep(5); result = usb_control_msg(dev->usbdev, usb_sndctrlpipe(dev->usbdev, 0), SUR40_POKE, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0x72, offset, NULL, 0, 1000); if (result < 0) goto error; msleep(5); result = usb_control_msg(dev->usbdev, usb_sndctrlpipe(dev->usbdev, 0), SUR40_POKE, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0xb2, value, NULL, 0, 1000); if (result < 0) goto error; msleep(5); error: return result; } static int sur40_set_preprocessor(struct sur40_state *dev, u8 value) { u8 setting_07[2] = { 0x01, 0x00 }; u8 setting_17[2] = { 0x85, 0x80 }; int result; if (value > 1) return -ERANGE; result = usb_control_msg(dev->usbdev, usb_sndctrlpipe(dev->usbdev, 0), SUR40_POKE, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0x07, setting_07[value], NULL, 0, 1000); if (result < 0) goto error; msleep(5); result = usb_control_msg(dev->usbdev, usb_sndctrlpipe(dev->usbdev, 0), SUR40_POKE, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0x17, setting_17[value], NULL, 0, 1000); if (result < 0) goto error; msleep(5); error: return result; } static void sur40_set_vsvideo(struct sur40_state *handle, u8 value) { int i; for (i = 0; i < 4; i++) sur40_poke(handle, 0x1c+i, value); handle->vsvideo = value; } static void sur40_set_irlevel(struct sur40_state *handle, u8 value) { int i; for (i = 0; i < 8; i++) sur40_poke(handle, 0x08+(2*i), value); } /* Initialization routine, called from sur40_open */ static int sur40_init(struct sur40_state *dev) { int result; u8 *buffer; buffer = kmalloc(24, GFP_KERNEL); if (!buffer) { result = -ENOMEM; goto error; } /* stupidly replay the original MS driver init sequence */ result = sur40_command(dev, SUR40_GET_VERSION, 0x00, buffer, 12); if (result < 0) goto error; result = sur40_command(dev, SUR40_GET_VERSION, 0x01, buffer, 12); if (result < 0) goto error; result = sur40_command(dev, SUR40_GET_VERSION, 0x02, buffer, 12); if (result < 0) goto error; result = sur40_command(dev, SUR40_SENSOR_CAPS, 0x00, buffer, 24); if (result < 0) goto error; result = sur40_command(dev, SUR40_ACCEL_CAPS, 0x00, buffer, 5); if (result < 0) goto error; result = sur40_command(dev, SUR40_GET_VERSION, 0x03, buffer, 12); if (result < 0) goto error; result = 0; /* * Discard the result buffer - no known data inside except * some version strings, maybe extract these sometime... */ error: kfree(buffer); return result; } /* * Callback routines from input_dev */ /* Enable the device, polling will now start. */ static int sur40_open(struct input_dev *input) { struct sur40_state *sur40 = input_get_drvdata(input); dev_dbg(sur40->dev, "open\n"); return sur40_init(sur40); } /* Disable device, polling has stopped. */ static void sur40_close(struct input_dev *input) { struct sur40_state *sur40 = input_get_drvdata(input); dev_dbg(sur40->dev, "close\n"); /* * There is no known way to stop the device, so we simply * stop polling. */ } /* * This function is called when a whole contact has been processed, * so that it can assign it to a slot and store the data there. */ static void sur40_report_blob(struct sur40_blob *blob, struct input_dev *input) { int wide, major, minor; int bb_size_x, bb_size_y, pos_x, pos_y, ctr_x, ctr_y, slotnum; if (blob->type != SUR40_TOUCH) return; slotnum = input_mt_get_slot_by_key(input, le16_to_cpu(blob->blob_id)); if (slotnum < 0 || slotnum >= MAX_CONTACTS) return; bb_size_x = le16_to_cpu(blob->bb_size_x); bb_size_y = le16_to_cpu(blob->bb_size_y); pos_x = le16_to_cpu(blob->pos_x); pos_y = le16_to_cpu(blob->pos_y); ctr_x = le16_to_cpu(blob->ctr_x); ctr_y = le16_to_cpu(blob->ctr_y); input_mt_slot(input, slotnum); input_mt_report_slot_state(input, MT_TOOL_FINGER, 1); wide = (bb_size_x > bb_size_y); major = max(bb_size_x, bb_size_y); minor = min(bb_size_x, bb_size_y); input_report_abs(input, ABS_MT_POSITION_X, pos_x); input_report_abs(input, ABS_MT_POSITION_Y, pos_y); input_report_abs(input, ABS_MT_TOOL_X, ctr_x); input_report_abs(input, ABS_MT_TOOL_Y, ctr_y); /* TODO: use a better orientation measure */ input_report_abs(input, ABS_MT_ORIENTATION, wide); input_report_abs(input, ABS_MT_TOUCH_MAJOR, major); input_report_abs(input, ABS_MT_TOUCH_MINOR, minor); } /* core function: poll for new input data */ static void sur40_poll(struct input_dev *input) { struct sur40_state *sur40 = input_get_drvdata(input); int result, bulk_read, need_blobs, packet_blobs, i; struct sur40_header *header = &sur40->bulk_in_buffer->header; struct sur40_blob *inblob = &sur40->bulk_in_buffer->blobs[0]; dev_dbg(sur40->dev, "poll\n"); need_blobs = -1; do { /* perform a blocking bulk read to get data from the device */ result = usb_bulk_msg(sur40->usbdev, usb_rcvbulkpipe(sur40->usbdev, sur40->bulk_in_epaddr), sur40->bulk_in_buffer, sur40->bulk_in_size, &bulk_read, 1000); dev_dbg(sur40->dev, "received %d bytes\n", bulk_read); if (result < 0) { dev_err(sur40->dev, "error in usb_bulk_read\n"); return; } result = bulk_read - sizeof(struct sur40_header); if (result % sizeof(struct sur40_blob) != 0) { dev_err(sur40->dev, "transfer size mismatch\n"); return; } /* first packet? */ if (need_blobs == -1) { need_blobs = le16_to_cpu(header->count); dev_dbg(sur40->dev, "need %d blobs\n", need_blobs); /* packet_id = le32_to_cpu(header->packet_id); */ } /* * Sanity check. when video data is also being retrieved, the * packet ID will usually increase in the middle of a series * instead of at the end. However, the data is still consistent, * so the packet ID is probably just valid for the first packet * in a series. if (packet_id != le32_to_cpu(header->packet_id)) dev_dbg(sur40->dev, "packet ID mismatch\n"); */ packet_blobs = result / sizeof(struct sur40_blob); dev_dbg(sur40->dev, "received %d blobs\n", packet_blobs); /* packets always contain at least 4 blobs, even if empty */ if (packet_blobs > need_blobs) packet_blobs = need_blobs; for (i = 0; i < packet_blobs; i++) { need_blobs--; dev_dbg(sur40->dev, "processing blob\n"); sur40_report_blob(&(inblob[i]), input); } } while (need_blobs > 0); input_mt_sync_frame(input); input_sync(input); sur40_process_video(sur40); } /* deal with video data */ static void sur40_process_video(struct sur40_state *sur40) { struct sur40_image_header *img = (void *)(sur40->bulk_in_buffer); struct sur40_buffer *new_buf; struct usb_sg_request sgr; struct sg_table *sgt; int result, bulk_read; if (!vb2_start_streaming_called(&sur40->queue)) return; /* get a new buffer from the list */ spin_lock(&sur40->qlock); if (list_empty(&sur40->buf_list)) { dev_dbg(sur40->dev, "buffer queue empty\n"); spin_unlock(&sur40->qlock); return; } new_buf = list_entry(sur40->buf_list.next, struct sur40_buffer, list); list_del(&new_buf->list); spin_unlock(&sur40->qlock); dev_dbg(sur40->dev, "buffer acquired\n"); /* retrieve data via bulk read */ result = usb_bulk_msg(sur40->usbdev, usb_rcvbulkpipe(sur40->usbdev, VIDEO_ENDPOINT), sur40->bulk_in_buffer, sur40->bulk_in_size, &bulk_read, 1000); if (result < 0) { dev_err(sur40->dev, "error in usb_bulk_read\n"); goto err_poll; } if (bulk_read != sizeof(struct sur40_image_header)) { dev_err(sur40->dev, "received %d bytes (%zd expected)\n", bulk_read, sizeof(struct sur40_image_header)); goto err_poll; } if (le32_to_cpu(img->magic) != VIDEO_HEADER_MAGIC) { dev_err(sur40->dev, "image magic mismatch\n"); goto err_poll; } if (le32_to_cpu(img->size) != sur40->pix_fmt.sizeimage) { dev_err(sur40->dev, "image size mismatch\n"); goto err_poll; } dev_dbg(sur40->dev, "header acquired\n"); sgt = vb2_dma_sg_plane_desc(&new_buf->vb.vb2_buf, 0); result = usb_sg_init(&sgr, sur40->usbdev, usb_rcvbulkpipe(sur40->usbdev, VIDEO_ENDPOINT), 0, sgt->sgl, sgt->nents, sur40->pix_fmt.sizeimage, 0); if (result < 0) { dev_err(sur40->dev, "error %d in usb_sg_init\n", result); goto err_poll; } usb_sg_wait(&sgr); if (sgr.status < 0) { dev_err(sur40->dev, "error %d in usb_sg_wait\n", sgr.status); goto err_poll; } dev_dbg(sur40->dev, "image acquired\n"); /* return error if streaming was stopped in the meantime */ if (sur40->sequence == -1) return; /* mark as finished */ new_buf->vb.vb2_buf.timestamp = ktime_get_ns(); new_buf->vb.sequence = sur40->sequence++; new_buf->vb.field = V4L2_FIELD_NONE; vb2_buffer_done(&new_buf->vb.vb2_buf, VB2_BUF_STATE_DONE); dev_dbg(sur40->dev, "buffer marked done\n"); return; err_poll: vb2_buffer_done(&new_buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); } /* Initialize input device parameters. */ static int sur40_input_setup_events(struct input_dev *input_dev) { int error; input_set_abs_params(input_dev, ABS_MT_POSITION_X, 0, SENSOR_RES_X, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_Y, 0, SENSOR_RES_Y, 0, 0); input_set_abs_params(input_dev, ABS_MT_TOOL_X, 0, SENSOR_RES_X, 0, 0); input_set_abs_params(input_dev, ABS_MT_TOOL_Y, 0, SENSOR_RES_Y, 0, 0); /* max value unknown, but major/minor axis * can never be larger than screen */ input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, SENSOR_RES_X, 0, 0); input_set_abs_params(input_dev, ABS_MT_TOUCH_MINOR, 0, SENSOR_RES_Y, 0, 0); input_set_abs_params(input_dev, ABS_MT_ORIENTATION, 0, 1, 0, 0); error = input_mt_init_slots(input_dev, MAX_CONTACTS, INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED); if (error) { dev_err(input_dev->dev.parent, "failed to set up slots\n"); return error; } return 0; } /* Check candidate USB interface. */ static int sur40_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *usbdev = interface_to_usbdev(interface); struct sur40_state *sur40; struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; struct input_dev *input; int error; /* Check if we really have the right interface. */ iface_desc = interface->cur_altsetting; if (iface_desc->desc.bInterfaceClass != 0xFF) return -ENODEV; if (iface_desc->desc.bNumEndpoints < 5) return -ENODEV; /* Use endpoint #4 (0x86). */ endpoint = &iface_desc->endpoint[4].desc; if (endpoint->bEndpointAddress != TOUCH_ENDPOINT) return -ENODEV; /* Allocate memory for our device state and initialize it. */ sur40 = kzalloc(sizeof(*sur40), GFP_KERNEL); if (!sur40) return -ENOMEM; input = input_allocate_device(); if (!input) { error = -ENOMEM; goto err_free_dev; } /* initialize locks/lists */ INIT_LIST_HEAD(&sur40->buf_list); spin_lock_init(&sur40->qlock); mutex_init(&sur40->lock); /* Set up regular input device structure */ input->name = DRIVER_LONG; usb_to_input_id(usbdev, &input->id); usb_make_path(usbdev, sur40->phys, sizeof(sur40->phys)); strlcat(sur40->phys, "/input0", sizeof(sur40->phys)); input->phys = sur40->phys; input->dev.parent = &interface->dev; input->open = sur40_open; input->close = sur40_close; error = sur40_input_setup_events(input); if (error) goto err_free_input; input_set_drvdata(input, sur40); error = input_setup_polling(input, sur40_poll); if (error) { dev_err(&interface->dev, "failed to set up polling"); goto err_free_input; } input_set_poll_interval(input, POLL_INTERVAL); sur40->usbdev = usbdev; sur40->dev = &interface->dev; sur40->input = input; /* use the bulk-in endpoint tested above */ sur40->bulk_in_size = usb_endpoint_maxp(endpoint); sur40->bulk_in_epaddr = endpoint->bEndpointAddress; sur40->bulk_in_buffer = kmalloc(sur40->bulk_in_size, GFP_KERNEL); if (!sur40->bulk_in_buffer) { dev_err(&interface->dev, "Unable to allocate input buffer."); error = -ENOMEM; goto err_free_input; } /* register the polled input device */ error = input_register_device(input); if (error) { dev_err(&interface->dev, "Unable to register polled input device."); goto err_free_buffer; } /* register the video master device */ snprintf(sur40->v4l2.name, sizeof(sur40->v4l2.name), "%s", DRIVER_LONG); error = v4l2_device_register(sur40->dev, &sur40->v4l2); if (error) { dev_err(&interface->dev, "Unable to register video master device."); goto err_unreg_v4l2; } /* initialize the lock and subdevice */ sur40->queue = sur40_queue; sur40->queue.drv_priv = sur40; sur40->queue.lock = &sur40->lock; sur40->queue.dev = sur40->dev; /* initialize the queue */ error = vb2_queue_init(&sur40->queue); if (error) goto err_unreg_v4l2; sur40->pix_fmt = sur40_pix_format[0]; sur40->vdev = sur40_video_device; sur40->vdev.v4l2_dev = &sur40->v4l2; sur40->vdev.lock = &sur40->lock; sur40->vdev.queue = &sur40->queue; video_set_drvdata(&sur40->vdev, sur40); /* initialize the control handler for 4 controls */ v4l2_ctrl_handler_init(&sur40->hdl, 4); sur40->v4l2.ctrl_handler = &sur40->hdl; sur40->vsvideo = (SUR40_CONTRAST_DEF << 4) | SUR40_GAIN_DEF; v4l2_ctrl_new_std(&sur40->hdl, &sur40_ctrl_ops, V4L2_CID_BRIGHTNESS, SUR40_BRIGHTNESS_MIN, SUR40_BRIGHTNESS_MAX, 1, clamp(brightness, (uint)SUR40_BRIGHTNESS_MIN, (uint)SUR40_BRIGHTNESS_MAX)); v4l2_ctrl_new_std(&sur40->hdl, &sur40_ctrl_ops, V4L2_CID_CONTRAST, SUR40_CONTRAST_MIN, SUR40_CONTRAST_MAX, 1, clamp(contrast, (uint)SUR40_CONTRAST_MIN, (uint)SUR40_CONTRAST_MAX)); v4l2_ctrl_new_std(&sur40->hdl, &sur40_ctrl_ops, V4L2_CID_GAIN, SUR40_GAIN_MIN, SUR40_GAIN_MAX, 1, clamp(gain, (uint)SUR40_GAIN_MIN, (uint)SUR40_GAIN_MAX)); v4l2_ctrl_new_std(&sur40->hdl, &sur40_ctrl_ops, V4L2_CID_BACKLIGHT_COMPENSATION, SUR40_BACKLIGHT_MIN, SUR40_BACKLIGHT_MAX, 1, SUR40_BACKLIGHT_DEF); v4l2_ctrl_handler_setup(&sur40->hdl); if (sur40->hdl.error) { dev_err(&interface->dev, "Unable to register video controls."); v4l2_ctrl_handler_free(&sur40->hdl); error = sur40->hdl.error; goto err_unreg_v4l2; } error = video_register_device(&sur40->vdev, VFL_TYPE_TOUCH, -1); if (error) { dev_err(&interface->dev, "Unable to register video subdevice."); goto err_unreg_video; } /* we can register the device now, as it is ready */ usb_set_intfdata(interface, sur40); dev_dbg(&interface->dev, "%s is now attached\n", DRIVER_DESC); return 0; err_unreg_video: video_unregister_device(&sur40->vdev); err_unreg_v4l2: v4l2_device_unregister(&sur40->v4l2); err_free_buffer: kfree(sur40->bulk_in_buffer); err_free_input: input_free_device(input); err_free_dev: kfree(sur40); return error; } /* Unregister device & clean up. */ static void sur40_disconnect(struct usb_interface *interface) { struct sur40_state *sur40 = usb_get_intfdata(interface); v4l2_ctrl_handler_free(&sur40->hdl); video_unregister_device(&sur40->vdev); v4l2_device_unregister(&sur40->v4l2); input_unregister_device(sur40->input); kfree(sur40->bulk_in_buffer); kfree(sur40); usb_set_intfdata(interface, NULL); dev_dbg(&interface->dev, "%s is now disconnected\n", DRIVER_DESC); } /* * Setup the constraints of the queue: besides setting the number of planes * per buffer and the size and allocation context of each plane, it also * checks if sufficient buffers have been allocated. Usually 3 is a good * minimum number: many DMA engines need a minimum of 2 buffers in the * queue and you need to have another available for userspace processing. */ static int sur40_queue_setup(struct vb2_queue *q, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[]) { struct sur40_state *sur40 = vb2_get_drv_priv(q); unsigned int q_num_bufs = vb2_get_num_buffers(q); if (q_num_bufs + *nbuffers < 3) *nbuffers = 3 - q_num_bufs; if (*nplanes) return sizes[0] < sur40->pix_fmt.sizeimage ? -EINVAL : 0; *nplanes = 1; sizes[0] = sur40->pix_fmt.sizeimage; return 0; } /* * Prepare the buffer for queueing to the DMA engine: check and set the * payload size. */ static int sur40_buffer_prepare(struct vb2_buffer *vb) { struct sur40_state *sur40 = vb2_get_drv_priv(vb->vb2_queue); unsigned long size = sur40->pix_fmt.sizeimage; if (vb2_plane_size(vb, 0) < size) { dev_err(&sur40->usbdev->dev, "buffer too small (%lu < %lu)\n", vb2_plane_size(vb, 0), size); return -EINVAL; } vb2_set_plane_payload(vb, 0, size); return 0; } /* * Queue this buffer to the DMA engine. */ static void sur40_buffer_queue(struct vb2_buffer *vb) { struct sur40_state *sur40 = vb2_get_drv_priv(vb->vb2_queue); struct sur40_buffer *buf = (struct sur40_buffer *)vb; spin_lock(&sur40->qlock); list_add_tail(&buf->list, &sur40->buf_list); spin_unlock(&sur40->qlock); } static void return_all_buffers(struct sur40_state *sur40, enum vb2_buffer_state state) { struct sur40_buffer *buf, *node; spin_lock(&sur40->qlock); list_for_each_entry_safe(buf, node, &sur40->buf_list, list) { vb2_buffer_done(&buf->vb.vb2_buf, state); list_del(&buf->list); } spin_unlock(&sur40->qlock); } /* * Start streaming. First check if the minimum number of buffers have been * queued. If not, then return -ENOBUFS and the vb2 framework will call * this function again the next time a buffer has been queued until enough * buffers are available to actually start the DMA engine. */ static int sur40_start_streaming(struct vb2_queue *vq, unsigned int count) { struct sur40_state *sur40 = vb2_get_drv_priv(vq); sur40->sequence = 0; return 0; } /* * Stop the DMA engine. Any remaining buffers in the DMA queue are dequeued * and passed on to the vb2 framework marked as STATE_ERROR. */ static void sur40_stop_streaming(struct vb2_queue *vq) { struct sur40_state *sur40 = vb2_get_drv_priv(vq); vb2_wait_for_all_buffers(vq); sur40->sequence = -1; /* Release all active buffers */ return_all_buffers(sur40, VB2_BUF_STATE_ERROR); } /* V4L ioctl */ static int sur40_vidioc_querycap(struct file *file, void *priv, struct v4l2_capability *cap) { struct sur40_state *sur40 = video_drvdata(file); strscpy(cap->driver, DRIVER_SHORT, sizeof(cap->driver)); strscpy(cap->card, DRIVER_LONG, sizeof(cap->card)); usb_make_path(sur40->usbdev, cap->bus_info, sizeof(cap->bus_info)); return 0; } static int sur40_vidioc_enum_input(struct file *file, void *priv, struct v4l2_input *i) { if (i->index != 0) return -EINVAL; i->type = V4L2_INPUT_TYPE_TOUCH; i->std = V4L2_STD_UNKNOWN; strscpy(i->name, "In-Cell Sensor", sizeof(i->name)); i->capabilities = 0; return 0; } static int sur40_vidioc_s_input(struct file *file, void *priv, unsigned int i) { return (i == 0) ? 0 : -EINVAL; } static int sur40_vidioc_g_input(struct file *file, void *priv, unsigned int *i) { *i = 0; return 0; } static int sur40_vidioc_try_fmt(struct file *file, void *priv, struct v4l2_format *f) { switch (f->fmt.pix.pixelformat) { case V4L2_PIX_FMT_GREY: f->fmt.pix = sur40_pix_format[1]; break; default: f->fmt.pix = sur40_pix_format[0]; break; } return 0; } static int sur40_vidioc_s_fmt(struct file *file, void *priv, struct v4l2_format *f) { struct sur40_state *sur40 = video_drvdata(file); switch (f->fmt.pix.pixelformat) { case V4L2_PIX_FMT_GREY: sur40->pix_fmt = sur40_pix_format[1]; break; default: sur40->pix_fmt = sur40_pix_format[0]; break; } f->fmt.pix = sur40->pix_fmt; return 0; } static int sur40_vidioc_g_fmt(struct file *file, void *priv, struct v4l2_format *f) { struct sur40_state *sur40 = video_drvdata(file); f->fmt.pix = sur40->pix_fmt; return 0; } static int sur40_s_ctrl(struct v4l2_ctrl *ctrl) { struct sur40_state *sur40 = container_of(ctrl->handler, struct sur40_state, hdl); u8 value = sur40->vsvideo; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: sur40_set_irlevel(sur40, ctrl->val); break; case V4L2_CID_CONTRAST: value = (value & 0x0f) | (ctrl->val << 4); sur40_set_vsvideo(sur40, value); break; case V4L2_CID_GAIN: value = (value & 0xf0) | (ctrl->val); sur40_set_vsvideo(sur40, value); break; case V4L2_CID_BACKLIGHT_COMPENSATION: sur40_set_preprocessor(sur40, ctrl->val); break; } return 0; } static int sur40_ioctl_parm(struct file *file, void *priv, struct v4l2_streamparm *p) { if (p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; p->parm.capture.capability = V4L2_CAP_TIMEPERFRAME; p->parm.capture.timeperframe.numerator = 1; p->parm.capture.timeperframe.denominator = 60; p->parm.capture.readbuffers = 3; return 0; } static int sur40_vidioc_enum_fmt(struct file *file, void *priv, struct v4l2_fmtdesc *f) { if (f->index >= ARRAY_SIZE(sur40_pix_format)) return -EINVAL; f->pixelformat = sur40_pix_format[f->index].pixelformat; f->flags = 0; return 0; } static int sur40_vidioc_enum_framesizes(struct file *file, void *priv, struct v4l2_frmsizeenum *f) { struct sur40_state *sur40 = video_drvdata(file); if ((f->index != 0) || ((f->pixel_format != V4L2_TCH_FMT_TU08) && (f->pixel_format != V4L2_PIX_FMT_GREY))) return -EINVAL; f->type = V4L2_FRMSIZE_TYPE_DISCRETE; f->discrete.width = sur40->pix_fmt.width; f->discrete.height = sur40->pix_fmt.height; return 0; } static int sur40_vidioc_enum_frameintervals(struct file *file, void *priv, struct v4l2_frmivalenum *f) { struct sur40_state *sur40 = video_drvdata(file); if ((f->index > 0) || ((f->pixel_format != V4L2_TCH_FMT_TU08) && (f->pixel_format != V4L2_PIX_FMT_GREY)) || (f->width != sur40->pix_fmt.width) || (f->height != sur40->pix_fmt.height)) return -EINVAL; f->type = V4L2_FRMIVAL_TYPE_DISCRETE; f->discrete.denominator = 60; f->discrete.numerator = 1; return 0; } static const struct usb_device_id sur40_table[] = { { USB_DEVICE(ID_MICROSOFT, ID_SUR40) }, /* Samsung SUR40 */ { } /* terminating null entry */ }; MODULE_DEVICE_TABLE(usb, sur40_table); /* V4L2 structures */ static const struct vb2_ops sur40_queue_ops = { .queue_setup = sur40_queue_setup, .buf_prepare = sur40_buffer_prepare, .buf_queue = sur40_buffer_queue, .start_streaming = sur40_start_streaming, .stop_streaming = sur40_stop_streaming, }; static const struct vb2_queue sur40_queue = { .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, /* * VB2_USERPTR in currently not enabled: passing a user pointer to * dma-sg will result in segment sizes that are not a multiple of * 512 bytes, which is required by the host controller. */ .io_modes = VB2_MMAP | VB2_READ | VB2_DMABUF, .buf_struct_size = sizeof(struct sur40_buffer), .ops = &sur40_queue_ops, .mem_ops = &vb2_dma_sg_memops, .timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, .min_queued_buffers = 3, }; static const struct v4l2_file_operations sur40_video_fops = { .owner = THIS_MODULE, .open = v4l2_fh_open, .release = vb2_fop_release, .unlocked_ioctl = video_ioctl2, .read = vb2_fop_read, .mmap = vb2_fop_mmap, .poll = vb2_fop_poll, }; static const struct v4l2_ioctl_ops sur40_video_ioctl_ops = { .vidioc_querycap = sur40_vidioc_querycap, .vidioc_enum_fmt_vid_cap = sur40_vidioc_enum_fmt, .vidioc_try_fmt_vid_cap = sur40_vidioc_try_fmt, .vidioc_s_fmt_vid_cap = sur40_vidioc_s_fmt, .vidioc_g_fmt_vid_cap = sur40_vidioc_g_fmt, .vidioc_enum_framesizes = sur40_vidioc_enum_framesizes, .vidioc_enum_frameintervals = sur40_vidioc_enum_frameintervals, .vidioc_g_parm = sur40_ioctl_parm, .vidioc_s_parm = sur40_ioctl_parm, .vidioc_enum_input = sur40_vidioc_enum_input, .vidioc_g_input = sur40_vidioc_g_input, .vidioc_s_input = sur40_vidioc_s_input, .vidioc_reqbufs = vb2_ioctl_reqbufs, .vidioc_create_bufs = vb2_ioctl_create_bufs, .vidioc_querybuf = vb2_ioctl_querybuf, .vidioc_qbuf = vb2_ioctl_qbuf, .vidioc_dqbuf = vb2_ioctl_dqbuf, .vidioc_expbuf = vb2_ioctl_expbuf, .vidioc_streamon = vb2_ioctl_streamon, .vidioc_streamoff = vb2_ioctl_streamoff, }; static const struct video_device sur40_video_device = { .name = DRIVER_LONG, .fops = &sur40_video_fops, .ioctl_ops = &sur40_video_ioctl_ops, .release = video_device_release_empty, .device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_TOUCH | V4L2_CAP_READWRITE | V4L2_CAP_STREAMING, }; /* USB-specific object needed to register this driver with the USB subsystem. */ static struct usb_driver sur40_driver = { .name = DRIVER_SHORT, .probe = sur40_probe, .disconnect = sur40_disconnect, .id_table = sur40_table, }; module_usb_driver(sur40_driver); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 // SPDX-License-Identifier: GPL-2.0-only /* * VFIO core * * Copyright (C) 2012 Red Hat, Inc. All rights reserved. * Author: Alex Williamson <alex.williamson@redhat.com> * * Derived from original vfio: * Copyright 2010 Cisco Systems, Inc. All rights reserved. * Author: Tom Lyon, pugs@cisco.com */ #include <linux/cdev.h> #include <linux/compat.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/idr.h> #include <linux/iommu.h> #if IS_ENABLED(CONFIG_KVM) #include <linux/kvm_host.h> #endif #include <linux/list.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/mutex.h> #include <linux/pci.h> #include <linux/pseudo_fs.h> #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/uaccess.h> #include <linux/vfio.h> #include <linux/wait.h> #include <linux/sched/signal.h> #include <linux/pm_runtime.h> #include <linux/interval_tree.h> #include <linux/iova_bitmap.h> #include <linux/iommufd.h> #include "vfio.h" #define DRIVER_VERSION "0.3" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" #define DRIVER_DESC "VFIO - User Level meta-driver" #define VFIO_MAGIC 0x5646494f /* "VFIO" */ static struct vfio { struct class *device_class; struct ida device_ida; struct vfsmount *vfs_mount; int fs_count; } vfio; #ifdef CONFIG_VFIO_NOIOMMU bool vfio_noiommu __read_mostly; module_param_named(enable_unsafe_noiommu_mode, vfio_noiommu, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); #endif static DEFINE_XARRAY(vfio_device_set_xa); int vfio_assign_device_set(struct vfio_device *device, void *set_id) { unsigned long idx = (unsigned long)set_id; struct vfio_device_set *new_dev_set; struct vfio_device_set *dev_set; if (WARN_ON(!set_id)) return -EINVAL; /* * Atomically acquire a singleton object in the xarray for this set_id */ xa_lock(&vfio_device_set_xa); dev_set = xa_load(&vfio_device_set_xa, idx); if (dev_set) goto found_get_ref; xa_unlock(&vfio_device_set_xa); new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); if (!new_dev_set) return -ENOMEM; mutex_init(&new_dev_set->lock); INIT_LIST_HEAD(&new_dev_set->device_list); new_dev_set->set_id = set_id; xa_lock(&vfio_device_set_xa); dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, GFP_KERNEL); if (!dev_set) { dev_set = new_dev_set; goto found_get_ref; } kfree(new_dev_set); if (xa_is_err(dev_set)) { xa_unlock(&vfio_device_set_xa); return xa_err(dev_set); } found_get_ref: dev_set->device_count++; xa_unlock(&vfio_device_set_xa); mutex_lock(&dev_set->lock); device->dev_set = dev_set; list_add_tail(&device->dev_set_list, &dev_set->device_list); mutex_unlock(&dev_set->lock); return 0; } EXPORT_SYMBOL_GPL(vfio_assign_device_set); static void vfio_release_device_set(struct vfio_device *device) { struct vfio_device_set *dev_set = device->dev_set; if (!dev_set) return; mutex_lock(&dev_set->lock); list_del(&device->dev_set_list); mutex_unlock(&dev_set->lock); xa_lock(&vfio_device_set_xa); if (!--dev_set->device_count) { __xa_erase(&vfio_device_set_xa, (unsigned long)dev_set->set_id); mutex_destroy(&dev_set->lock); kfree(dev_set); } xa_unlock(&vfio_device_set_xa); } unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) { struct vfio_device *cur; unsigned int open_count = 0; lockdep_assert_held(&dev_set->lock); list_for_each_entry(cur, &dev_set->device_list, dev_set_list) open_count += cur->open_count; return open_count; } EXPORT_SYMBOL_GPL(vfio_device_set_open_count); struct vfio_device * vfio_find_device_in_devset(struct vfio_device_set *dev_set, struct device *dev) { struct vfio_device *cur; lockdep_assert_held(&dev_set->lock); list_for_each_entry(cur, &dev_set->device_list, dev_set_list) if (cur->dev == dev) return cur; return NULL; } EXPORT_SYMBOL_GPL(vfio_find_device_in_devset); /* * Device objects - create, release, get, put, search */ /* Device reference always implies a group reference */ void vfio_device_put_registration(struct vfio_device *device) { if (refcount_dec_and_test(&device->refcount)) complete(&device->comp); } bool vfio_device_try_get_registration(struct vfio_device *device) { return refcount_inc_not_zero(&device->refcount); } /* * VFIO driver API */ /* Release helper called by vfio_put_device() */ static void vfio_device_release(struct device *dev) { struct vfio_device *device = container_of(dev, struct vfio_device, device); vfio_release_device_set(device); ida_free(&vfio.device_ida, device->index); if (device->ops->release) device->ops->release(device); iput(device->inode); simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); kvfree(device); } static int vfio_init_device(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops); /* * Allocate and initialize vfio_device so it can be registered to vfio * core. * * Drivers should use the wrapper vfio_alloc_device() for allocation. * @size is the size of the structure to be allocated, including any * private data used by the driver. * * Driver may provide an @init callback to cover device private data. * * Use vfio_put_device() to release the structure after success return. */ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, const struct vfio_device_ops *ops) { struct vfio_device *device; int ret; if (WARN_ON(size < sizeof(struct vfio_device))) return ERR_PTR(-EINVAL); device = kvzalloc(size, GFP_KERNEL); if (!device) return ERR_PTR(-ENOMEM); ret = vfio_init_device(device, dev, ops); if (ret) goto out_free; return device; out_free: kvfree(device); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(_vfio_alloc_device); static int vfio_fs_init_fs_context(struct fs_context *fc) { return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM; } static struct file_system_type vfio_fs_type = { .name = "vfio", .owner = THIS_MODULE, .init_fs_context = vfio_fs_init_fs_context, .kill_sb = kill_anon_super, }; static struct inode *vfio_fs_inode_new(void) { struct inode *inode; int ret; ret = simple_pin_fs(&vfio_fs_type, &vfio.vfs_mount, &vfio.fs_count); if (ret) return ERR_PTR(ret); inode = alloc_anon_inode(vfio.vfs_mount->mnt_sb); if (IS_ERR(inode)) simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); return inode; } /* * Initialize a vfio_device so it can be registered to vfio core. */ static int vfio_init_device(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops) { int ret; ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL); if (ret < 0) { dev_dbg(dev, "Error to alloc index\n"); return ret; } device->index = ret; init_completion(&device->comp); device->dev = dev; device->ops = ops; device->inode = vfio_fs_inode_new(); if (IS_ERR(device->inode)) { ret = PTR_ERR(device->inode); goto out_inode; } if (ops->init) { ret = ops->init(device); if (ret) goto out_uninit; } device_initialize(&device->device); device->device.release = vfio_device_release; device->device.class = vfio.device_class; device->device.parent = device->dev; return 0; out_uninit: iput(device->inode); simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); out_inode: vfio_release_device_set(device); ida_free(&vfio.device_ida, device->index); return ret; } static int __vfio_register_dev(struct vfio_device *device, enum vfio_group_type type) { int ret; if (WARN_ON(IS_ENABLED(CONFIG_IOMMUFD) && (!device->ops->bind_iommufd || !device->ops->unbind_iommufd || !device->ops->attach_ioas || !device->ops->detach_ioas))) return -EINVAL; /* * If the driver doesn't specify a set then the device is added to a * singleton set just for itself. */ if (!device->dev_set) vfio_assign_device_set(device, device); ret = dev_set_name(&device->device, "vfio%d", device->index); if (ret) return ret; ret = vfio_device_set_group(device, type); if (ret) return ret; /* * VFIO always sets IOMMU_CACHE because we offer no way for userspace to * restore cache coherency. It has to be checked here because it is only * valid for cases where we are using iommu groups. */ if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) && !device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) { ret = -EINVAL; goto err_out; } ret = vfio_device_add(device); if (ret) goto err_out; /* Refcounting can't start until the driver calls register */ refcount_set(&device->refcount, 1); vfio_device_group_register(device); vfio_device_debugfs_init(device); return 0; err_out: vfio_device_remove_group(device); return ret; } int vfio_register_group_dev(struct vfio_device *device) { return __vfio_register_dev(device, VFIO_IOMMU); } EXPORT_SYMBOL_GPL(vfio_register_group_dev); /* * Register a virtual device without IOMMU backing. The user of this * device must not be able to directly trigger unmediated DMA. */ int vfio_register_emulated_iommu_dev(struct vfio_device *device) { return __vfio_register_dev(device, VFIO_EMULATED_IOMMU); } EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); /* * Decrement the device reference count and wait for the device to be * removed. Open file descriptors for the device... */ void vfio_unregister_group_dev(struct vfio_device *device) { unsigned int i = 0; bool interrupted = false; long rc; /* * Prevent new device opened by userspace via the * VFIO_GROUP_GET_DEVICE_FD in the group path. */ vfio_device_group_unregister(device); /* * Balances vfio_device_add() in register path, also prevents * new device opened by userspace in the cdev path. */ vfio_device_del(device); vfio_device_put_registration(device); rc = try_wait_for_completion(&device->comp); while (rc <= 0) { if (device->ops->request) device->ops->request(device, i++); if (interrupted) { rc = wait_for_completion_timeout(&device->comp, HZ * 10); } else { rc = wait_for_completion_interruptible_timeout( &device->comp, HZ * 10); if (rc < 0) { interrupted = true; dev_warn(device->dev, "Device is currently in use, task" " \"%s\" (%d) " "blocked until device is released", current->comm, task_pid_nr(current)); } } } vfio_device_debugfs_exit(device); /* Balances vfio_device_set_group in register path */ vfio_device_remove_group(device); } EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); #if IS_ENABLED(CONFIG_KVM) void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm) { void (*pfn)(struct kvm *kvm); bool (*fn)(struct kvm *kvm); bool ret; lockdep_assert_held(&device->dev_set->lock); if (!kvm) return; pfn = symbol_get(kvm_put_kvm); if (WARN_ON(!pfn)) return; fn = symbol_get(kvm_get_kvm_safe); if (WARN_ON(!fn)) { symbol_put(kvm_put_kvm); return; } ret = fn(kvm); symbol_put(kvm_get_kvm_safe); if (!ret) { symbol_put(kvm_put_kvm); return; } device->put_kvm = pfn; device->kvm = kvm; } void vfio_device_put_kvm(struct vfio_device *device) { lockdep_assert_held(&device->dev_set->lock); if (!device->kvm) return; if (WARN_ON(!device->put_kvm)) goto clear; device->put_kvm(device->kvm); device->put_kvm = NULL; symbol_put(kvm_put_kvm); clear: device->kvm = NULL; } #endif /* true if the vfio_device has open_device() called but not close_device() */ static bool vfio_assert_device_open(struct vfio_device *device) { return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); } struct vfio_device_file * vfio_allocate_device_file(struct vfio_device *device) { struct vfio_device_file *df; df = kzalloc(sizeof(*df), GFP_KERNEL_ACCOUNT); if (!df) return ERR_PTR(-ENOMEM); df->device = device; spin_lock_init(&df->kvm_ref_lock); return df; } static int vfio_df_device_first_open(struct vfio_device_file *df) { struct vfio_device *device = df->device; struct iommufd_ctx *iommufd = df->iommufd; int ret; lockdep_assert_held(&device->dev_set->lock); if (!try_module_get(device->dev->driver->owner)) return -ENODEV; if (iommufd) ret = vfio_df_iommufd_bind(df); else ret = vfio_device_group_use_iommu(device); if (ret) goto err_module_put; if (device->ops->open_device) { ret = device->ops->open_device(device); if (ret) goto err_unuse_iommu; } return 0; err_unuse_iommu: if (iommufd) vfio_df_iommufd_unbind(df); else vfio_device_group_unuse_iommu(device); err_module_put: module_put(device->dev->driver->owner); return ret; } static void vfio_df_device_last_close(struct vfio_device_file *df) { struct vfio_device *device = df->device; struct iommufd_ctx *iommufd = df->iommufd; lockdep_assert_held(&device->dev_set->lock); if (device->ops->close_device) device->ops->close_device(device); if (iommufd) vfio_df_iommufd_unbind(df); else vfio_device_group_unuse_iommu(device); module_put(device->dev->driver->owner); } int vfio_df_open(struct vfio_device_file *df) { struct vfio_device *device = df->device; int ret = 0; lockdep_assert_held(&device->dev_set->lock); /* * Only the group path allows the device to be opened multiple * times. The device cdev path doesn't have a secure way for it. */ if (device->open_count != 0 && !df->group) return -EINVAL; device->open_count++; if (device->open_count == 1) { ret = vfio_df_device_first_open(df); if (ret) device->open_count--; } return ret; } void vfio_df_close(struct vfio_device_file *df) { struct vfio_device *device = df->device; lockdep_assert_held(&device->dev_set->lock); if (!vfio_assert_device_open(device)) return; if (device->open_count == 1) vfio_df_device_last_close(df); device->open_count--; } /* * Wrapper around pm_runtime_resume_and_get(). * Return error code on failure or 0 on success. */ static inline int vfio_device_pm_runtime_get(struct vfio_device *device) { struct device *dev = device->dev; if (dev->driver && dev->driver->pm) { int ret; ret = pm_runtime_resume_and_get(dev); if (ret) { dev_info_ratelimited(dev, "vfio: runtime resume failed %d\n", ret); return -EIO; } } return 0; } /* * Wrapper around pm_runtime_put(). */ static inline void vfio_device_pm_runtime_put(struct vfio_device *device) { struct device *dev = device->dev; if (dev->driver && dev->driver->pm) pm_runtime_put(dev); } /* * VFIO Device fd */ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; if (df->group) vfio_df_group_close(df); else vfio_df_unbind_iommufd(df); vfio_device_put_registration(device); kfree(df); return 0; } /* * vfio_mig_get_next_state - Compute the next step in the FSM * @cur_fsm - The current state the device is in * @new_fsm - The target state to reach * @next_fsm - Pointer to the next step to get to new_fsm * * Return 0 upon success, otherwise -errno * Upon success the next step in the state progression between cur_fsm and * new_fsm will be set in next_fsm. * * This breaks down requests for combination transitions into smaller steps and * returns the next step to get to new_fsm. The function may need to be called * multiple times before reaching new_fsm. * */ int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state cur_fsm, enum vfio_device_mig_state new_fsm, enum vfio_device_mig_state *next_fsm) { enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 }; /* * The coding in this table requires the driver to implement the * following FSM arcs: * RESUMING -> STOP * STOP -> RESUMING * STOP -> STOP_COPY * STOP_COPY -> STOP * * If P2P is supported then the driver must also implement these FSM * arcs: * RUNNING -> RUNNING_P2P * RUNNING_P2P -> RUNNING * RUNNING_P2P -> STOP * STOP -> RUNNING_P2P * * If precopy is supported then the driver must support these additional * FSM arcs: * RUNNING -> PRE_COPY * PRE_COPY -> RUNNING * PRE_COPY -> STOP_COPY * However, if precopy and P2P are supported together then the driver * must support these additional arcs beyond the P2P arcs above: * PRE_COPY -> RUNNING * PRE_COPY -> PRE_COPY_P2P * PRE_COPY_P2P -> PRE_COPY * PRE_COPY_P2P -> RUNNING_P2P * PRE_COPY_P2P -> STOP_COPY * RUNNING -> PRE_COPY * RUNNING_P2P -> PRE_COPY_P2P * * Without P2P and precopy the driver must implement: * RUNNING -> STOP * STOP -> RUNNING * * The coding will step through multiple states for some combination * transitions; if all optional features are supported, this means the * following ones: * PRE_COPY -> PRE_COPY_P2P -> STOP_COPY * PRE_COPY -> RUNNING -> RUNNING_P2P * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING * PRE_COPY_P2P -> RUNNING_P2P -> RUNNING * PRE_COPY_P2P -> RUNNING_P2P -> STOP * PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING * RESUMING -> STOP -> RUNNING_P2P * RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P * RESUMING -> STOP -> RUNNING_P2P -> RUNNING * RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY * RESUMING -> STOP -> STOP_COPY * RUNNING -> RUNNING_P2P -> PRE_COPY_P2P * RUNNING -> RUNNING_P2P -> STOP * RUNNING -> RUNNING_P2P -> STOP -> RESUMING * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY * RUNNING_P2P -> RUNNING -> PRE_COPY * RUNNING_P2P -> STOP -> RESUMING * RUNNING_P2P -> STOP -> STOP_COPY * STOP -> RUNNING_P2P -> PRE_COPY_P2P * STOP -> RUNNING_P2P -> RUNNING * STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY * STOP_COPY -> STOP -> RESUMING * STOP_COPY -> STOP -> RUNNING_P2P * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING * * The following transitions are blocked: * STOP_COPY -> PRE_COPY * STOP_COPY -> PRE_COPY_P2P */ static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { [VFIO_DEVICE_STATE_STOP] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_RUNNING] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_PRE_COPY] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_STOP_COPY] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_RESUMING] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_RUNNING_P2P] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, [VFIO_DEVICE_STATE_ERROR] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, }; static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY, [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, [VFIO_DEVICE_STATE_ERROR] = ~0U, }; if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || (state_flags_table[cur_fsm] & device->migration_flags) != state_flags_table[cur_fsm])) return -EINVAL; if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || (state_flags_table[new_fsm] & device->migration_flags) != state_flags_table[new_fsm]) return -EINVAL; /* * Arcs touching optional and unsupported states are skipped over. The * driver will instead see an arc from the original state to the next * logical state, as per the above comment. */ *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; while ((state_flags_table[*next_fsm] & device->migration_flags) != state_flags_table[*next_fsm]) *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; } EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); /* * Convert the drivers's struct file into a FD number and return it to userspace */ static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, struct vfio_device_feature_mig_state *mig) { int ret; int fd; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { ret = fd; goto out_fput; } mig->data_fd = fd; if (copy_to_user(arg, mig, sizeof(*mig))) { ret = -EFAULT; goto out_put_unused; } fd_install(fd, filp); return 0; out_put_unused: put_unused_fd(fd); out_fput: fput(filp); return ret; } static int vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { size_t minsz = offsetofend(struct vfio_device_feature_mig_state, data_fd); struct vfio_device_feature_mig_state mig; struct file *filp = NULL; int ret; if (!device->mig_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_GET, sizeof(mig)); if (ret != 1) return ret; if (copy_from_user(&mig, arg, minsz)) return -EFAULT; if (flags & VFIO_DEVICE_FEATURE_GET) { enum vfio_device_mig_state curr_state; ret = device->mig_ops->migration_get_state(device, &curr_state); if (ret) return ret; mig.device_state = curr_state; goto out_copy; } /* Handle the VFIO_DEVICE_FEATURE_SET */ filp = device->mig_ops->migration_set_state(device, mig.device_state); if (IS_ERR(filp) || !filp) goto out_copy; return vfio_ioct_mig_return_fd(filp, arg, &mig); out_copy: mig.data_fd = -1; if (copy_to_user(arg, &mig, sizeof(mig))) return -EFAULT; if (IS_ERR(filp)) return PTR_ERR(filp); return 0; } static int vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { struct vfio_device_feature_mig_data_size data_size = {}; unsigned long stop_copy_length; int ret; if (!device->mig_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, sizeof(data_size)); if (ret != 1) return ret; ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length); if (ret) return ret; data_size.stop_copy_length = stop_copy_length; if (copy_to_user(arg, &data_size, sizeof(data_size))) return -EFAULT; return 0; } static int vfio_ioctl_device_feature_migration(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { struct vfio_device_feature_migration mig = { .flags = device->migration_flags, }; int ret; if (!device->mig_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, sizeof(mig)); if (ret != 1) return ret; if (copy_to_user(arg, &mig, sizeof(mig))) return -EFAULT; return 0; } void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, u32 req_nodes) { struct interval_tree_node *prev, *curr, *comb_start, *comb_end; unsigned long min_gap, curr_gap; /* Special shortcut when a single range is required */ if (req_nodes == 1) { unsigned long last; comb_start = interval_tree_iter_first(root, 0, ULONG_MAX); /* Empty list */ if (WARN_ON_ONCE(!comb_start)) return; curr = comb_start; while (curr) { last = curr->last; prev = curr; curr = interval_tree_iter_next(curr, 0, ULONG_MAX); if (prev != comb_start) interval_tree_remove(prev, root); } comb_start->last = last; return; } /* Combine ranges which have the smallest gap */ while (cur_nodes > req_nodes) { prev = NULL; min_gap = ULONG_MAX; curr = interval_tree_iter_first(root, 0, ULONG_MAX); while (curr) { if (prev) { curr_gap = curr->start - prev->last; if (curr_gap < min_gap) { min_gap = curr_gap; comb_start = prev; comb_end = curr; } } prev = curr; curr = interval_tree_iter_next(curr, 0, ULONG_MAX); } /* Empty list or no nodes to combine */ if (WARN_ON_ONCE(min_gap == ULONG_MAX)) break; comb_start->last = comb_end->last; interval_tree_remove(comb_end, root); cur_nodes--; } } EXPORT_SYMBOL_GPL(vfio_combine_iova_ranges); /* Ranges should fit into a single kernel page */ #define LOG_MAX_RANGES \ (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range)) static int vfio_ioctl_device_feature_logging_start(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { size_t minsz = offsetofend(struct vfio_device_feature_dma_logging_control, ranges); struct vfio_device_feature_dma_logging_range __user *ranges; struct vfio_device_feature_dma_logging_control control; struct vfio_device_feature_dma_logging_range range; struct rb_root_cached root = RB_ROOT_CACHED; struct interval_tree_node *nodes; u64 iova_end; u32 nnodes; int i, ret; if (!device->log_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, sizeof(control)); if (ret != 1) return ret; if (copy_from_user(&control, arg, minsz)) return -EFAULT; nnodes = control.num_ranges; if (!nnodes) return -EINVAL; if (nnodes > LOG_MAX_RANGES) return -E2BIG; ranges = u64_to_user_ptr(control.ranges); nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), GFP_KERNEL); if (!nodes) return -ENOMEM; for (i = 0; i < nnodes; i++) { if (copy_from_user(&range, &ranges[i], sizeof(range))) { ret = -EFAULT; goto end; } if (!IS_ALIGNED(range.iova, control.page_size) || !IS_ALIGNED(range.length, control.page_size)) { ret = -EINVAL; goto end; } if (check_add_overflow(range.iova, range.length, &iova_end) || iova_end > ULONG_MAX) { ret = -EOVERFLOW; goto end; } nodes[i].start = range.iova; nodes[i].last = range.iova + range.length - 1; if (interval_tree_iter_first(&root, nodes[i].start, nodes[i].last)) { /* Range overlapping */ ret = -EINVAL; goto end; } interval_tree_insert(nodes + i, &root); } ret = device->log_ops->log_start(device, &root, nnodes, &control.page_size); if (ret) goto end; if (copy_to_user(arg, &control, sizeof(control))) { ret = -EFAULT; device->log_ops->log_stop(device); } end: kfree(nodes); return ret; } static int vfio_ioctl_device_feature_logging_stop(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { int ret; if (!device->log_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0); if (ret != 1) return ret; return device->log_ops->log_stop(device); } static int vfio_device_log_read_and_clear(struct iova_bitmap *iter, unsigned long iova, size_t length, void *opaque) { struct vfio_device *device = opaque; return device->log_ops->log_read_and_clear(device, iova, length, iter); } static int vfio_ioctl_device_feature_logging_report(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { size_t minsz = offsetofend(struct vfio_device_feature_dma_logging_report, bitmap); struct vfio_device_feature_dma_logging_report report; struct iova_bitmap *iter; u64 iova_end; int ret; if (!device->log_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, sizeof(report)); if (ret != 1) return ret; if (copy_from_user(&report, arg, minsz)) return -EFAULT; if (report.page_size < SZ_4K || !is_power_of_2(report.page_size)) return -EINVAL; if (check_add_overflow(report.iova, report.length, &iova_end) || iova_end > ULONG_MAX) return -EOVERFLOW; iter = iova_bitmap_alloc(report.iova, report.length, report.page_size, u64_to_user_ptr(report.bitmap)); if (IS_ERR(iter)) return PTR_ERR(iter); ret = iova_bitmap_for_each(iter, device, vfio_device_log_read_and_clear); iova_bitmap_free(iter); return ret; } static int vfio_ioctl_device_feature(struct vfio_device *device, struct vfio_device_feature __user *arg) { size_t minsz = offsetofend(struct vfio_device_feature, flags); struct vfio_device_feature feature; if (copy_from_user(&feature, arg, minsz)) return -EFAULT; if (feature.argsz < minsz) return -EINVAL; /* Check unknown flags */ if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) return -EINVAL; /* GET & SET are mutually exclusive except with PROBE */ if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && (feature.flags & VFIO_DEVICE_FEATURE_SET) && (feature.flags & VFIO_DEVICE_FEATURE_GET)) return -EINVAL; switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { case VFIO_DEVICE_FEATURE_MIGRATION: return vfio_ioctl_device_feature_migration( device, feature.flags, arg->data, feature.argsz - minsz); case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: return vfio_ioctl_device_feature_mig_device_state( device, feature.flags, arg->data, feature.argsz - minsz); case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: return vfio_ioctl_device_feature_logging_start( device, feature.flags, arg->data, feature.argsz - minsz); case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: return vfio_ioctl_device_feature_logging_stop( device, feature.flags, arg->data, feature.argsz - minsz); case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: return vfio_ioctl_device_feature_logging_report( device, feature.flags, arg->data, feature.argsz - minsz); case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE: return vfio_ioctl_device_feature_migration_data_size( device, feature.flags, arg->data, feature.argsz - minsz); default: if (unlikely(!device->ops->device_feature)) return -EINVAL; return device->ops->device_feature(device, feature.flags, arg->data, feature.argsz - minsz); } } static long vfio_device_fops_unl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; void __user *uptr = (void __user *)arg; int ret; if (cmd == VFIO_DEVICE_BIND_IOMMUFD) return vfio_df_ioctl_bind_iommufd(df, uptr); /* Paired with smp_store_release() following vfio_df_open() */ if (!smp_load_acquire(&df->access_granted)) return -EINVAL; ret = vfio_device_pm_runtime_get(device); if (ret) return ret; /* cdev only ioctls */ if (IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) && !df->group) { switch (cmd) { case VFIO_DEVICE_ATTACH_IOMMUFD_PT: ret = vfio_df_ioctl_attach_pt(df, uptr); goto out; case VFIO_DEVICE_DETACH_IOMMUFD_PT: ret = vfio_df_ioctl_detach_pt(df, uptr); goto out; } } switch (cmd) { case VFIO_DEVICE_FEATURE: ret = vfio_ioctl_device_feature(device, uptr); break; default: if (unlikely(!device->ops->ioctl)) ret = -EINVAL; else ret = device->ops->ioctl(device, cmd, arg); break; } out: vfio_device_pm_runtime_put(device); return ret; } static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, size_t count, loff_t *ppos) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; /* Paired with smp_store_release() following vfio_df_open() */ if (!smp_load_acquire(&df->access_granted)) return -EINVAL; if (unlikely(!device->ops->read)) return -EINVAL; return device->ops->read(device, buf, count, ppos); } static ssize_t vfio_device_fops_write(struct file *filep, const char __user *buf, size_t count, loff_t *ppos) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; /* Paired with smp_store_release() following vfio_df_open() */ if (!smp_load_acquire(&df->access_granted)) return -EINVAL; if (unlikely(!device->ops->write)) return -EINVAL; return device->ops->write(device, buf, count, ppos); } static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; /* Paired with smp_store_release() following vfio_df_open() */ if (!smp_load_acquire(&df->access_granted)) return -EINVAL; if (unlikely(!device->ops->mmap)) return -EINVAL; return device->ops->mmap(device, vma); } #ifdef CONFIG_PROC_FS static void vfio_device_show_fdinfo(struct seq_file *m, struct file *filep) { char *path; struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; path = kobject_get_path(&device->dev->kobj, GFP_KERNEL); if (!path) return; seq_printf(m, "vfio-device-syspath: /sys%s\n", path); kfree(path); } #endif const struct file_operations vfio_device_fops = { .owner = THIS_MODULE, .open = vfio_device_fops_cdev_open, .release = vfio_device_fops_release, .read = vfio_device_fops_read, .write = vfio_device_fops_write, .unlocked_ioctl = vfio_device_fops_unl_ioctl, .compat_ioctl = compat_ptr_ioctl, .mmap = vfio_device_fops_mmap, #ifdef CONFIG_PROC_FS .show_fdinfo = vfio_device_show_fdinfo, #endif }; static struct vfio_device *vfio_device_from_file(struct file *file) { struct vfio_device_file *df = file->private_data; if (file->f_op != &vfio_device_fops) return NULL; return df->device; } /** * vfio_file_is_valid - True if the file is valid vfio file * @file: VFIO group file or VFIO device file */ bool vfio_file_is_valid(struct file *file) { return vfio_group_from_file(file) || vfio_device_from_file(file); } EXPORT_SYMBOL_GPL(vfio_file_is_valid); /** * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file * is always CPU cache coherent * @file: VFIO group file or VFIO device file * * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop * bit in DMA transactions. A return of false indicates that the user has * rights to access additional instructions such as wbinvd on x86. */ bool vfio_file_enforced_coherent(struct file *file) { struct vfio_device *device; struct vfio_group *group; group = vfio_group_from_file(file); if (group) return vfio_group_enforced_coherent(group); device = vfio_device_from_file(file); if (device) return device_iommu_capable(device->dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); return true; } EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); static void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm) { struct vfio_device_file *df = file->private_data; /* * The kvm is first recorded in the vfio_device_file, and will * be propagated to vfio_device::kvm when the file is bound to * iommufd successfully in the vfio device cdev path. */ spin_lock(&df->kvm_ref_lock); df->kvm = kvm; spin_unlock(&df->kvm_ref_lock); } /** * vfio_file_set_kvm - Link a kvm with VFIO drivers * @file: VFIO group file or VFIO device file * @kvm: KVM to link * * When a VFIO device is first opened the KVM will be available in * device->kvm if one was associated with the file. */ void vfio_file_set_kvm(struct file *file, struct kvm *kvm) { struct vfio_group *group; group = vfio_group_from_file(file); if (group) vfio_group_set_kvm(group, kvm); if (vfio_device_from_file(file)) vfio_device_file_set_kvm(file, kvm); } EXPORT_SYMBOL_GPL(vfio_file_set_kvm); /* * Sub-module support */ /* * Helper for managing a buffer of info chain capabilities, allocate or * reallocate a buffer with additional @size, filling in @id and @version * of the capability. A pointer to the new capability is returned. * * NB. The chain is based at the head of the buffer, so new entries are * added to the tail, vfio_info_cap_shift() should be called to fixup the * next offsets prior to copying to the user buffer. */ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, size_t size, u16 id, u16 version) { void *buf; struct vfio_info_cap_header *header, *tmp; /* Ensure that the next capability struct will be aligned */ size = ALIGN(size, sizeof(u64)); buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); if (!buf) { kfree(caps->buf); caps->buf = NULL; caps->size = 0; return ERR_PTR(-ENOMEM); } caps->buf = buf; header = buf + caps->size; /* Eventually copied to user buffer, zero */ memset(header, 0, size); header->id = id; header->version = version; /* Add to the end of the capability chain */ for (tmp = buf; tmp->next; tmp = buf + tmp->next) ; /* nothing */ tmp->next = caps->size; caps->size += size; return header; } EXPORT_SYMBOL_GPL(vfio_info_cap_add); void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) { struct vfio_info_cap_header *tmp; void *buf = (void *)caps->buf; /* Capability structs should start with proper alignment */ WARN_ON(!IS_ALIGNED(offset, sizeof(u64))); for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) tmp->next += offset; } EXPORT_SYMBOL(vfio_info_cap_shift); int vfio_info_add_capability(struct vfio_info_cap *caps, struct vfio_info_cap_header *cap, size_t size) { struct vfio_info_cap_header *header; header = vfio_info_cap_add(caps, size, cap->id, cap->version); if (IS_ERR(header)) return PTR_ERR(header); memcpy(header + 1, cap + 1, size - sizeof(*header)); return 0; } EXPORT_SYMBOL(vfio_info_add_capability); int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, int max_irq_type, size_t *data_size) { unsigned long minsz; size_t size; minsz = offsetofend(struct vfio_irq_set, count); if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || (hdr->count >= (U32_MAX - hdr->start)) || (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | VFIO_IRQ_SET_ACTION_TYPE_MASK))) return -EINVAL; if (data_size) *data_size = 0; if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) return -EINVAL; switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { case VFIO_IRQ_SET_DATA_NONE: size = 0; break; case VFIO_IRQ_SET_DATA_BOOL: size = sizeof(uint8_t); break; case VFIO_IRQ_SET_DATA_EVENTFD: size = sizeof(int32_t); break; default: return -EINVAL; } if (size) { if (hdr->argsz - minsz < hdr->count * size) return -EINVAL; if (!data_size) return -EINVAL; *data_size = hdr->count * size; } return 0; } EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); /* * Pin contiguous user pages and return their associated host pages for local * domain only. * @device [in] : device * @iova [in] : starting IOVA of user pages to be pinned. * @npage [in] : count of pages to be pinned. This count should not * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. * @prot [in] : protection flags * @pages[out] : array of host pages * Return error or number of pages pinned. * * A driver may only call this function if the vfio_device was created * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages(). */ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, struct page **pages) { /* group->container cannot change while a vfio device is open */ if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device))) return -EINVAL; if (!device->ops->dma_unmap) return -EINVAL; if (vfio_device_has_container(device)) return vfio_device_container_pin_pages(device, iova, npage, prot, pages); if (device->iommufd_access) { int ret; if (iova > ULONG_MAX) return -EINVAL; /* * VFIO ignores the sub page offset, npages is from the start of * a PAGE_SIZE chunk of IOVA. The caller is expected to recover * the sub page offset by doing: * pages[0] + (iova % PAGE_SIZE) */ ret = iommufd_access_pin_pages( device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE), npage * PAGE_SIZE, pages, (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0); if (ret) return ret; return npage; } return -EINVAL; } EXPORT_SYMBOL(vfio_pin_pages); /* * Unpin contiguous host pages for local domain only. * @device [in] : device * @iova [in] : starting address of user pages to be unpinned. * @npage [in] : count of pages to be unpinned. This count should not * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. */ void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) { if (WARN_ON(!vfio_assert_device_open(device))) return; if (WARN_ON(!device->ops->dma_unmap)) return; if (vfio_device_has_container(device)) { vfio_device_container_unpin_pages(device, iova, npage); return; } if (device->iommufd_access) { if (WARN_ON(iova > ULONG_MAX)) return; iommufd_access_unpin_pages(device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE), npage * PAGE_SIZE); return; } } EXPORT_SYMBOL(vfio_unpin_pages); /* * This interface allows the CPUs to perform some sort of virtual DMA on * behalf of the device. * * CPUs read/write from/into a range of IOVAs pointing to user space memory * into/from a kernel buffer. * * As the read/write of user space memory is conducted via the CPUs and is * not a real device DMA, it is not necessary to pin the user space memory. * * @device [in] : VFIO device * @iova [in] : base IOVA of a user space buffer * @data [in] : pointer to kernel buffer * @len [in] : kernel buffer length * @write : indicate read or write * Return error code on failure or 0 on success. */ int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, size_t len, bool write) { if (!data || len <= 0 || !vfio_assert_device_open(device)) return -EINVAL; if (vfio_device_has_container(device)) return vfio_device_container_dma_rw(device, iova, data, len, write); if (device->iommufd_access) { unsigned int flags = 0; if (iova > ULONG_MAX) return -EINVAL; /* VFIO historically tries to auto-detect a kthread */ if (!current->mm) flags |= IOMMUFD_ACCESS_RW_KTHREAD; if (write) flags |= IOMMUFD_ACCESS_RW_WRITE; return iommufd_access_rw(device->iommufd_access, iova, data, len, flags); } return -EINVAL; } EXPORT_SYMBOL(vfio_dma_rw); /* * Module/class support */ static int __init vfio_init(void) { int ret; ida_init(&vfio.device_ida); ret = vfio_group_init(); if (ret) return ret; ret = vfio_virqfd_init(); if (ret) goto err_virqfd; /* /sys/class/vfio-dev/vfioX */ vfio.device_class = class_create("vfio-dev"); if (IS_ERR(vfio.device_class)) { ret = PTR_ERR(vfio.device_class); goto err_dev_class; } ret = vfio_cdev_init(vfio.device_class); if (ret) goto err_alloc_dev_chrdev; vfio_debugfs_create_root(); pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; err_alloc_dev_chrdev: class_destroy(vfio.device_class); vfio.device_class = NULL; err_dev_class: vfio_virqfd_exit(); err_virqfd: vfio_group_cleanup(); return ret; } static void __exit vfio_cleanup(void) { vfio_debugfs_remove_root(); ida_destroy(&vfio.device_ida); vfio_cdev_cleanup(); class_destroy(vfio.device_class); vfio.device_class = NULL; vfio_virqfd_exit(); vfio_group_cleanup(); xa_destroy(&vfio_device_set_xa); } module_init(vfio_init); module_exit(vfio_cleanup); MODULE_IMPORT_NS("IOMMUFD"); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 1 2 1 2 2 1 3 3 1 1 2 3 22 5 5 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 4 3 1 4 2 11 10 9 1 2 1 1 210 210 2 2 2 2 2 3 1 2 1 2 3 2 2 2 2 2 2 1 2 2 4 4 4 1 4 2 1 1 1 1 1 1 1 1 3 3 2 208 210 209 11 3 11 209 12 11 1 12 4 209 210 206 209 208 11 11 3 9 209 1 1 1 1 1 1 9 11 10 10 3 3 3 1 5 5 4 3 4 204 205 204 204 204 8 203 222 118 86 205 204 15 204 205 206 204 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS emulation layer for the mixer interface * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/init.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/string.h> #include <linux/module.h> #include <linux/compat.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/control.h> #include <sound/info.h> #include <sound/mixer_oss.h> #include <linux/soundcard.h> #define OSS_ALSAEMULVER _SIOR ('M', 249, int) MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>"); MODULE_DESCRIPTION("Mixer OSS emulation for ALSA."); MODULE_LICENSE("GPL"); MODULE_ALIAS_SNDRV_MINOR(SNDRV_MINOR_OSS_MIXER); static int snd_mixer_oss_open(struct inode *inode, struct file *file) { struct snd_card *card; struct snd_mixer_oss_file *fmixer; int err; err = nonseekable_open(inode, file); if (err < 0) return err; card = snd_lookup_oss_minor_data(iminor(inode), SNDRV_OSS_DEVICE_TYPE_MIXER); if (card == NULL) return -ENODEV; if (card->mixer_oss == NULL) { snd_card_unref(card); return -ENODEV; } err = snd_card_file_add(card, file); if (err < 0) { snd_card_unref(card); return err; } fmixer = kzalloc(sizeof(*fmixer), GFP_KERNEL); if (fmixer == NULL) { snd_card_file_remove(card, file); snd_card_unref(card); return -ENOMEM; } fmixer->card = card; fmixer->mixer = card->mixer_oss; file->private_data = fmixer; if (!try_module_get(card->module)) { kfree(fmixer); snd_card_file_remove(card, file); snd_card_unref(card); return -EFAULT; } snd_card_unref(card); return 0; } static int snd_mixer_oss_release(struct inode *inode, struct file *file) { struct snd_mixer_oss_file *fmixer; if (file->private_data) { fmixer = file->private_data; module_put(fmixer->card->module); snd_card_file_remove(fmixer->card, file); kfree(fmixer); } return 0; } static int snd_mixer_oss_info(struct snd_mixer_oss_file *fmixer, mixer_info __user *_info) { struct snd_card *card = fmixer->card; struct snd_mixer_oss *mixer = fmixer->mixer; struct mixer_info info; memset(&info, 0, sizeof(info)); strscpy(info.id, mixer && mixer->id[0] ? mixer->id : card->driver, sizeof(info.id)); strscpy(info.name, mixer && mixer->name[0] ? mixer->name : card->mixername, sizeof(info.name)); info.modify_counter = card->mixer_oss_change_count; if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_mixer_oss_info_obsolete(struct snd_mixer_oss_file *fmixer, _old_mixer_info __user *_info) { struct snd_card *card = fmixer->card; struct snd_mixer_oss *mixer = fmixer->mixer; _old_mixer_info info; memset(&info, 0, sizeof(info)); strscpy(info.id, mixer && mixer->id[0] ? mixer->id : card->driver, sizeof(info.id)); strscpy(info.name, mixer && mixer->name[0] ? mixer->name : card->mixername, sizeof(info.name)); if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_mixer_oss_caps(struct snd_mixer_oss_file *fmixer) { struct snd_mixer_oss *mixer = fmixer->mixer; int result = 0; if (mixer == NULL) return -EIO; if (mixer->get_recsrc && mixer->put_recsrc) result |= SOUND_CAP_EXCL_INPUT; return result; } static int snd_mixer_oss_devmask(struct snd_mixer_oss_file *fmixer) { struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_mixer_oss_slot *pslot; int result = 0, chn; if (mixer == NULL) return -EIO; guard(mutex)(&mixer->reg_mutex); for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_volume || pslot->put_recsrc) result |= 1 << chn; } return result; } static int snd_mixer_oss_stereodevs(struct snd_mixer_oss_file *fmixer) { struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_mixer_oss_slot *pslot; int result = 0, chn; if (mixer == NULL) return -EIO; guard(mutex)(&mixer->reg_mutex); for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_volume && pslot->stereo) result |= 1 << chn; } return result; } static int snd_mixer_oss_recmask(struct snd_mixer_oss_file *fmixer) { struct snd_mixer_oss *mixer = fmixer->mixer; int result = 0; if (mixer == NULL) return -EIO; guard(mutex)(&mixer->reg_mutex); if (mixer->put_recsrc && mixer->get_recsrc) { /* exclusive */ result = mixer->mask_recsrc; } else { struct snd_mixer_oss_slot *pslot; int chn; for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_recsrc) result |= 1 << chn; } } return result; } static int snd_mixer_oss_get_recsrc(struct snd_mixer_oss_file *fmixer) { struct snd_mixer_oss *mixer = fmixer->mixer; int result = 0; if (mixer == NULL) return -EIO; guard(mutex)(&mixer->reg_mutex); if (mixer->put_recsrc && mixer->get_recsrc) { /* exclusive */ unsigned int index; result = mixer->get_recsrc(fmixer, &index); if (result < 0) return result; result = 1 << index; } else { struct snd_mixer_oss_slot *pslot; int chn; for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->get_recsrc) { int active = 0; pslot->get_recsrc(fmixer, pslot, &active); if (active) result |= 1 << chn; } } } mixer->oss_recsrc = result; return result; } static int snd_mixer_oss_set_recsrc(struct snd_mixer_oss_file *fmixer, int recsrc) { struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_mixer_oss_slot *pslot; int chn, active; unsigned int index; int result = 0; if (mixer == NULL) return -EIO; guard(mutex)(&mixer->reg_mutex); if (mixer->get_recsrc && mixer->put_recsrc) { /* exclusive input */ if (recsrc & ~mixer->oss_recsrc) recsrc &= ~mixer->oss_recsrc; mixer->put_recsrc(fmixer, ffz(~recsrc)); mixer->get_recsrc(fmixer, &index); result = 1 << index; } for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_recsrc) { active = (recsrc & (1 << chn)) ? 1 : 0; pslot->put_recsrc(fmixer, pslot, active); } } if (! result) { for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->get_recsrc) { active = 0; pslot->get_recsrc(fmixer, pslot, &active); if (active) result |= 1 << chn; } } } return result; } static int snd_mixer_oss_get_volume(struct snd_mixer_oss_file *fmixer, int slot) { struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_mixer_oss_slot *pslot; int result = 0, left, right; if (mixer == NULL || slot > 30) return -EIO; guard(mutex)(&mixer->reg_mutex); pslot = &mixer->slots[slot]; left = pslot->volume[0]; right = pslot->volume[1]; if (pslot->get_volume) result = pslot->get_volume(fmixer, pslot, &left, &right); if (!pslot->stereo) right = left; if (snd_BUG_ON(left < 0 || left > 100)) return -EIO; if (snd_BUG_ON(right < 0 || right > 100)) return -EIO; if (result >= 0) { pslot->volume[0] = left; pslot->volume[1] = right; result = (left & 0xff) | ((right & 0xff) << 8); } return result; } static int snd_mixer_oss_set_volume(struct snd_mixer_oss_file *fmixer, int slot, int volume) { struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_mixer_oss_slot *pslot; int result = 0, left = volume & 0xff, right = (volume >> 8) & 0xff; if (mixer == NULL || slot > 30) return -EIO; guard(mutex)(&mixer->reg_mutex); pslot = &mixer->slots[slot]; if (left > 100) left = 100; if (right > 100) right = 100; if (!pslot->stereo) right = left; if (pslot->put_volume) result = pslot->put_volume(fmixer, pslot, left, right); if (result < 0) return result; pslot->volume[0] = left; pslot->volume[1] = right; result = (left & 0xff) | ((right & 0xff) << 8); return result; } static int snd_mixer_oss_ioctl1(struct snd_mixer_oss_file *fmixer, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; int __user *p = argp; int tmp; if (snd_BUG_ON(!fmixer)) return -ENXIO; if (((cmd >> 8) & 0xff) == 'M') { switch (cmd) { case SOUND_MIXER_INFO: return snd_mixer_oss_info(fmixer, argp); case SOUND_OLD_MIXER_INFO: return snd_mixer_oss_info_obsolete(fmixer, argp); case SOUND_MIXER_WRITE_RECSRC: if (get_user(tmp, p)) return -EFAULT; tmp = snd_mixer_oss_set_recsrc(fmixer, tmp); if (tmp < 0) return tmp; return put_user(tmp, p); case OSS_GETVERSION: return put_user(SNDRV_OSS_VERSION, p); case OSS_ALSAEMULVER: return put_user(1, p); case SOUND_MIXER_READ_DEVMASK: tmp = snd_mixer_oss_devmask(fmixer); if (tmp < 0) return tmp; return put_user(tmp, p); case SOUND_MIXER_READ_STEREODEVS: tmp = snd_mixer_oss_stereodevs(fmixer); if (tmp < 0) return tmp; return put_user(tmp, p); case SOUND_MIXER_READ_RECMASK: tmp = snd_mixer_oss_recmask(fmixer); if (tmp < 0) return tmp; return put_user(tmp, p); case SOUND_MIXER_READ_CAPS: tmp = snd_mixer_oss_caps(fmixer); if (tmp < 0) return tmp; return put_user(tmp, p); case SOUND_MIXER_READ_RECSRC: tmp = snd_mixer_oss_get_recsrc(fmixer); if (tmp < 0) return tmp; return put_user(tmp, p); } } if (cmd & SIOC_IN) { if (get_user(tmp, p)) return -EFAULT; tmp = snd_mixer_oss_set_volume(fmixer, cmd & 0xff, tmp); if (tmp < 0) return tmp; return put_user(tmp, p); } else if (cmd & SIOC_OUT) { tmp = snd_mixer_oss_get_volume(fmixer, cmd & 0xff); if (tmp < 0) return tmp; return put_user(tmp, p); } return -ENXIO; } static long snd_mixer_oss_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return snd_mixer_oss_ioctl1(file->private_data, cmd, arg); } int snd_mixer_oss_ioctl_card(struct snd_card *card, unsigned int cmd, unsigned long arg) { struct snd_mixer_oss_file fmixer; if (snd_BUG_ON(!card)) return -ENXIO; if (card->mixer_oss == NULL) return -ENXIO; memset(&fmixer, 0, sizeof(fmixer)); fmixer.card = card; fmixer.mixer = card->mixer_oss; return snd_mixer_oss_ioctl1(&fmixer, cmd, arg); } EXPORT_SYMBOL(snd_mixer_oss_ioctl_card); #ifdef CONFIG_COMPAT /* all compatible */ static long snd_mixer_oss_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) { return snd_mixer_oss_ioctl1(file->private_data, cmd, (unsigned long)compat_ptr(arg)); } #else #define snd_mixer_oss_ioctl_compat NULL #endif /* * REGISTRATION PART */ static const struct file_operations snd_mixer_oss_f_ops = { .owner = THIS_MODULE, .open = snd_mixer_oss_open, .release = snd_mixer_oss_release, .unlocked_ioctl = snd_mixer_oss_ioctl, .compat_ioctl = snd_mixer_oss_ioctl_compat, }; /* * utilities */ static long snd_mixer_oss_conv(long val, long omin, long omax, long nmin, long nmax) { long orange = omax - omin, nrange = nmax - nmin; if (orange == 0) return 0; return DIV_ROUND_CLOSEST(nrange * (val - omin), orange) + nmin; } /* convert from alsa native to oss values (0-100) */ static long snd_mixer_oss_conv1(long val, long min, long max, int *old) { if (val == snd_mixer_oss_conv(*old, 0, 100, min, max)) return *old; return snd_mixer_oss_conv(val, min, max, 0, 100); } /* convert from oss to alsa native values */ static long snd_mixer_oss_conv2(long val, long min, long max) { return snd_mixer_oss_conv(val, 0, 100, min, max); } #if 0 static void snd_mixer_oss_recsrce_set(struct snd_card *card, int slot) { struct snd_mixer_oss *mixer = card->mixer_oss; if (mixer) mixer->mask_recsrc |= 1 << slot; } static int snd_mixer_oss_recsrce_get(struct snd_card *card, int slot) { struct snd_mixer_oss *mixer = card->mixer_oss; if (mixer && (mixer->mask_recsrc & (1 << slot))) return 1; return 0; } #endif #define SNDRV_MIXER_OSS_SIGNATURE 0x65999250 #define SNDRV_MIXER_OSS_ITEM_GLOBAL 0 #define SNDRV_MIXER_OSS_ITEM_GSWITCH 1 #define SNDRV_MIXER_OSS_ITEM_GROUTE 2 #define SNDRV_MIXER_OSS_ITEM_GVOLUME 3 #define SNDRV_MIXER_OSS_ITEM_PSWITCH 4 #define SNDRV_MIXER_OSS_ITEM_PROUTE 5 #define SNDRV_MIXER_OSS_ITEM_PVOLUME 6 #define SNDRV_MIXER_OSS_ITEM_CSWITCH 7 #define SNDRV_MIXER_OSS_ITEM_CROUTE 8 #define SNDRV_MIXER_OSS_ITEM_CVOLUME 9 #define SNDRV_MIXER_OSS_ITEM_CAPTURE 10 #define SNDRV_MIXER_OSS_ITEM_COUNT 11 #define SNDRV_MIXER_OSS_PRESENT_GLOBAL (1<<0) #define SNDRV_MIXER_OSS_PRESENT_GSWITCH (1<<1) #define SNDRV_MIXER_OSS_PRESENT_GROUTE (1<<2) #define SNDRV_MIXER_OSS_PRESENT_GVOLUME (1<<3) #define SNDRV_MIXER_OSS_PRESENT_PSWITCH (1<<4) #define SNDRV_MIXER_OSS_PRESENT_PROUTE (1<<5) #define SNDRV_MIXER_OSS_PRESENT_PVOLUME (1<<6) #define SNDRV_MIXER_OSS_PRESENT_CSWITCH (1<<7) #define SNDRV_MIXER_OSS_PRESENT_CROUTE (1<<8) #define SNDRV_MIXER_OSS_PRESENT_CVOLUME (1<<9) #define SNDRV_MIXER_OSS_PRESENT_CAPTURE (1<<10) struct slot { unsigned int signature; unsigned int present; unsigned int channels; unsigned int numid[SNDRV_MIXER_OSS_ITEM_COUNT]; unsigned int capture_item; const struct snd_mixer_oss_assign_table *assigned; unsigned int allocated: 1; }; #define ID_UNKNOWN ((unsigned int)-1) static struct snd_kcontrol *snd_mixer_oss_test_id(struct snd_mixer_oss *mixer, const char *name, int index) { struct snd_card *card = mixer->card; struct snd_ctl_elem_id id; memset(&id, 0, sizeof(id)); id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; strscpy(id.name, name, sizeof(id.name)); id.index = index; return snd_ctl_find_id(card, &id); } static void snd_mixer_oss_get_volume1_vol(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, unsigned int numid, int *left, int *right) { struct snd_ctl_elem_info *uinfo __free(kfree) = NULL; struct snd_ctl_elem_value *uctl __free(kfree) = NULL; struct snd_kcontrol *kctl; struct snd_card *card = fmixer->card; if (numid == ID_UNKNOWN) return; guard(rwsem_read)(&card->controls_rwsem); kctl = snd_ctl_find_numid(card, numid); if (!kctl) return; uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) return; if (kctl->info(kctl, uinfo)) return; if (kctl->get(kctl, uctl)) return; if (uinfo->type == SNDRV_CTL_ELEM_TYPE_BOOLEAN && uinfo->value.integer.min == 0 && uinfo->value.integer.max == 1) return; *left = snd_mixer_oss_conv1(uctl->value.integer.value[0], uinfo->value.integer.min, uinfo->value.integer.max, &pslot->volume[0]); if (uinfo->count > 1) *right = snd_mixer_oss_conv1(uctl->value.integer.value[1], uinfo->value.integer.min, uinfo->value.integer.max, &pslot->volume[1]); } static void snd_mixer_oss_get_volume1_sw(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, unsigned int numid, int *left, int *right, int route) { struct snd_ctl_elem_info *uinfo __free(kfree) = NULL; struct snd_ctl_elem_value *uctl __free(kfree) = NULL; struct snd_kcontrol *kctl; struct snd_card *card = fmixer->card; if (numid == ID_UNKNOWN) return; guard(rwsem_read)(&card->controls_rwsem); kctl = snd_ctl_find_numid(card, numid); if (!kctl) return; uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) return; if (kctl->info(kctl, uinfo)) return; if (kctl->get(kctl, uctl)) return; if (!uctl->value.integer.value[0]) { *left = 0; if (uinfo->count == 1) *right = 0; } if (uinfo->count > 1 && !uctl->value.integer.value[route ? 3 : 1]) *right = 0; } static int snd_mixer_oss_get_volume1(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int *left, int *right) { struct slot *slot = pslot->private_data; *left = *right = 100; if (slot->present & SNDRV_MIXER_OSS_PRESENT_PVOLUME) { snd_mixer_oss_get_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PVOLUME], left, right); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GVOLUME) { snd_mixer_oss_get_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GVOLUME], left, right); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GLOBAL) { snd_mixer_oss_get_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GLOBAL], left, right); } if (slot->present & SNDRV_MIXER_OSS_PRESENT_PSWITCH) { snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PSWITCH], left, right, 0); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GSWITCH) { snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GSWITCH], left, right, 0); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_PROUTE) { snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PROUTE], left, right, 1); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GROUTE) { snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GROUTE], left, right, 1); } return 0; } static void snd_mixer_oss_put_volume1_vol(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, unsigned int numid, int left, int right) { struct snd_ctl_elem_info *uinfo __free(kfree) = NULL; struct snd_ctl_elem_value *uctl __free(kfree) = NULL; struct snd_kcontrol *kctl; struct snd_card *card = fmixer->card; int res; if (numid == ID_UNKNOWN) return; guard(rwsem_read)(&card->controls_rwsem); kctl = snd_ctl_find_numid(card, numid); if (!kctl) return; uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) return; if (kctl->info(kctl, uinfo)) return; if (uinfo->type == SNDRV_CTL_ELEM_TYPE_BOOLEAN && uinfo->value.integer.min == 0 && uinfo->value.integer.max == 1) return; uctl->value.integer.value[0] = snd_mixer_oss_conv2(left, uinfo->value.integer.min, uinfo->value.integer.max); if (uinfo->count > 1) uctl->value.integer.value[1] = snd_mixer_oss_conv2(right, uinfo->value.integer.min, uinfo->value.integer.max); res = kctl->put(kctl, uctl); if (res < 0) return; if (res > 0) snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &kctl->id); } static void snd_mixer_oss_put_volume1_sw(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, unsigned int numid, int left, int right, int route) { struct snd_ctl_elem_info *uinfo __free(kfree) = NULL; struct snd_ctl_elem_value *uctl __free(kfree) = NULL; struct snd_kcontrol *kctl; struct snd_card *card = fmixer->card; int res; if (numid == ID_UNKNOWN) return; guard(rwsem_read)(&card->controls_rwsem); kctl = snd_ctl_find_numid(card, numid); if (!kctl) return; uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) return; if (kctl->info(kctl, uinfo)) return; if (uinfo->count > 1) { uctl->value.integer.value[0] = left > 0 ? 1 : 0; uctl->value.integer.value[route ? 3 : 1] = right > 0 ? 1 : 0; if (route) { uctl->value.integer.value[1] = uctl->value.integer.value[2] = 0; } } else { uctl->value.integer.value[0] = (left > 0 || right > 0) ? 1 : 0; } res = kctl->put(kctl, uctl); if (res < 0) return; if (res > 0) snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &kctl->id); } static int snd_mixer_oss_put_volume1(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int left, int right) { struct slot *slot = pslot->private_data; if (slot->present & SNDRV_MIXER_OSS_PRESENT_PVOLUME) { snd_mixer_oss_put_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PVOLUME], left, right); if (slot->present & SNDRV_MIXER_OSS_PRESENT_CVOLUME) snd_mixer_oss_put_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CVOLUME], left, right); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_CVOLUME) { snd_mixer_oss_put_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CVOLUME], left, right); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GVOLUME) { snd_mixer_oss_put_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GVOLUME], left, right); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GLOBAL) { snd_mixer_oss_put_volume1_vol(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GLOBAL], left, right); } if (left || right) { if (slot->present & SNDRV_MIXER_OSS_PRESENT_PSWITCH) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PSWITCH], left, right, 0); if (slot->present & SNDRV_MIXER_OSS_PRESENT_CSWITCH) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CSWITCH], left, right, 0); if (slot->present & SNDRV_MIXER_OSS_PRESENT_GSWITCH) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GSWITCH], left, right, 0); if (slot->present & SNDRV_MIXER_OSS_PRESENT_PROUTE) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PROUTE], left, right, 1); if (slot->present & SNDRV_MIXER_OSS_PRESENT_CROUTE) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CROUTE], left, right, 1); if (slot->present & SNDRV_MIXER_OSS_PRESENT_GROUTE) snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GROUTE], left, right, 1); } else { if (slot->present & SNDRV_MIXER_OSS_PRESENT_PSWITCH) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PSWITCH], left, right, 0); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_CSWITCH) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CSWITCH], left, right, 0); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GSWITCH) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GSWITCH], left, right, 0); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_PROUTE) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_PROUTE], left, right, 1); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_CROUTE) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CROUTE], left, right, 1); } else if (slot->present & SNDRV_MIXER_OSS_PRESENT_GROUTE) { snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_GROUTE], left, right, 1); } } return 0; } static int snd_mixer_oss_get_recsrc1_sw(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int *active) { struct slot *slot = pslot->private_data; int left, right; left = right = 1; snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CSWITCH], &left, &right, 0); *active = (left || right) ? 1 : 0; return 0; } static int snd_mixer_oss_get_recsrc1_route(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int *active) { struct slot *slot = pslot->private_data; int left, right; left = right = 1; snd_mixer_oss_get_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CROUTE], &left, &right, 1); *active = (left || right) ? 1 : 0; return 0; } static int snd_mixer_oss_put_recsrc1_sw(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int active) { struct slot *slot = pslot->private_data; snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CSWITCH], active, active, 0); return 0; } static int snd_mixer_oss_put_recsrc1_route(struct snd_mixer_oss_file *fmixer, struct snd_mixer_oss_slot *pslot, int active) { struct slot *slot = pslot->private_data; snd_mixer_oss_put_volume1_sw(fmixer, pslot, slot->numid[SNDRV_MIXER_OSS_ITEM_CROUTE], active, active, 1); return 0; } static int snd_mixer_oss_get_recsrc2(struct snd_mixer_oss_file *fmixer, unsigned int *active_index) { struct snd_card *card = fmixer->card; struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_kcontrol *kctl; struct snd_mixer_oss_slot *pslot; struct slot *slot; struct snd_ctl_elem_info *uinfo __free(kfree) = NULL; struct snd_ctl_elem_value *uctl __free(kfree) = NULL; int err, idx; uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) return -ENOMEM; guard(rwsem_read)(&card->controls_rwsem); kctl = snd_mixer_oss_test_id(mixer, "Capture Source", 0); if (!kctl) return -ENOENT; err = kctl->info(kctl, uinfo); if (err < 0) return err; err = kctl->get(kctl, uctl); if (err < 0) return err; for (idx = 0; idx < 32; idx++) { if (!(mixer->mask_recsrc & (1 << idx))) continue; pslot = &mixer->slots[idx]; slot = pslot->private_data; if (slot->signature != SNDRV_MIXER_OSS_SIGNATURE) continue; if (!(slot->present & SNDRV_MIXER_OSS_PRESENT_CAPTURE)) continue; if (slot->capture_item == uctl->value.enumerated.item[0]) { *active_index = idx; break; } } return 0; } static int snd_mixer_oss_put_recsrc2(struct snd_mixer_oss_file *fmixer, unsigned int active_index) { struct snd_card *card = fmixer->card; struct snd_mixer_oss *mixer = fmixer->mixer; struct snd_kcontrol *kctl; struct snd_mixer_oss_slot *pslot; struct slot *slot = NULL; struct snd_ctl_elem_info *uinfo __free(kfree) = NULL; struct snd_ctl_elem_value *uctl __free(kfree) = NULL; int err; unsigned int idx; uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); uctl = kzalloc(sizeof(*uctl), GFP_KERNEL); if (uinfo == NULL || uctl == NULL) return -ENOMEM; guard(rwsem_read)(&card->controls_rwsem); kctl = snd_mixer_oss_test_id(mixer, "Capture Source", 0); if (!kctl) return -ENOENT; err = kctl->info(kctl, uinfo); if (err < 0) return err; for (idx = 0; idx < 32; idx++) { if (!(mixer->mask_recsrc & (1 << idx))) continue; pslot = &mixer->slots[idx]; slot = pslot->private_data; if (slot->signature != SNDRV_MIXER_OSS_SIGNATURE) continue; if (!(slot->present & SNDRV_MIXER_OSS_PRESENT_CAPTURE)) continue; if (idx == active_index) break; slot = NULL; } if (!slot) return 0; for (idx = 0; idx < uinfo->count; idx++) uctl->value.enumerated.item[idx] = slot->capture_item; err = kctl->put(kctl, uctl); if (err > 0) snd_ctl_notify(fmixer->card, SNDRV_CTL_EVENT_MASK_VALUE, &kctl->id); return 0; } struct snd_mixer_oss_assign_table { int oss_id; const char *name; int index; }; static int snd_mixer_oss_build_test(struct snd_mixer_oss *mixer, struct slot *slot, const char *name, int index, int item) { struct snd_ctl_elem_info *info __free(kfree) = NULL; struct snd_kcontrol *kcontrol; struct snd_card *card = mixer->card; int err; scoped_guard(rwsem_read, &card->controls_rwsem) { kcontrol = snd_mixer_oss_test_id(mixer, name, index); if (kcontrol == NULL) return 0; info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; err = kcontrol->info(kcontrol, info); if (err < 0) return err; slot->numid[item] = kcontrol->id.numid; } if (info->count > slot->channels) slot->channels = info->count; slot->present |= 1 << item; return 0; } static void snd_mixer_oss_slot_free(struct snd_mixer_oss_slot *chn) { struct slot *p = chn->private_data; if (p) { if (p->allocated && p->assigned) { kfree(p->assigned->name); kfree(p->assigned); } kfree(p); } } static void mixer_slot_clear(struct snd_mixer_oss_slot *rslot) { int idx = rslot->number; /* remember this */ if (rslot->private_free) rslot->private_free(rslot); memset(rslot, 0, sizeof(*rslot)); rslot->number = idx; } /* In a separate function to keep gcc 3.2 happy - do NOT merge this in snd_mixer_oss_build_input! */ static int snd_mixer_oss_build_test_all(struct snd_mixer_oss *mixer, const struct snd_mixer_oss_assign_table *ptr, struct slot *slot) { char str[64]; int err; err = snd_mixer_oss_build_test(mixer, slot, ptr->name, ptr->index, SNDRV_MIXER_OSS_ITEM_GLOBAL); if (err) return err; sprintf(str, "%s Switch", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_GSWITCH); if (err) return err; sprintf(str, "%s Route", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_GROUTE); if (err) return err; sprintf(str, "%s Volume", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_GVOLUME); if (err) return err; sprintf(str, "%s Playback Switch", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_PSWITCH); if (err) return err; sprintf(str, "%s Playback Route", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_PROUTE); if (err) return err; sprintf(str, "%s Playback Volume", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_PVOLUME); if (err) return err; sprintf(str, "%s Capture Switch", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_CSWITCH); if (err) return err; sprintf(str, "%s Capture Route", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_CROUTE); if (err) return err; sprintf(str, "%s Capture Volume", ptr->name); err = snd_mixer_oss_build_test(mixer, slot, str, ptr->index, SNDRV_MIXER_OSS_ITEM_CVOLUME); if (err) return err; return 0; } /* * build an OSS mixer element. * ptr_allocated means the entry is dynamically allocated (change via proc file). * when replace_old = 1, the old entry is replaced with the new one. */ static int snd_mixer_oss_build_input(struct snd_mixer_oss *mixer, const struct snd_mixer_oss_assign_table *ptr, int ptr_allocated, int replace_old) { struct slot slot; struct slot *pslot; struct snd_kcontrol *kctl; struct snd_mixer_oss_slot *rslot; const char *str; /* check if already assigned */ if (mixer->slots[ptr->oss_id].get_volume && ! replace_old) return 0; memset(&slot, 0, sizeof(slot)); memset(slot.numid, 0xff, sizeof(slot.numid)); /* ID_UNKNOWN */ if (snd_mixer_oss_build_test_all(mixer, ptr, &slot)) return 0; guard(rwsem_read)(&mixer->card->controls_rwsem); kctl = NULL; if (!ptr->index) kctl = snd_mixer_oss_test_id(mixer, "Capture Source", 0); if (kctl) { struct snd_ctl_elem_info *uinfo __free(kfree) = NULL; uinfo = kzalloc(sizeof(*uinfo), GFP_KERNEL); if (!uinfo) return -ENOMEM; if (kctl->info(kctl, uinfo)) return 0; str = ptr->name; if (!strcmp(str, "Master")) str = "Mix"; else if (!strcmp(str, "Master Mono")) str = "Mix Mono"; slot.capture_item = 0; if (!strcmp(uinfo->value.enumerated.name, str)) { slot.present |= SNDRV_MIXER_OSS_PRESENT_CAPTURE; } else { for (slot.capture_item = 1; slot.capture_item < uinfo->value.enumerated.items; slot.capture_item++) { uinfo->value.enumerated.item = slot.capture_item; if (kctl->info(kctl, uinfo)) return 0; if (!strcmp(uinfo->value.enumerated.name, str)) { slot.present |= SNDRV_MIXER_OSS_PRESENT_CAPTURE; break; } } } } if (slot.present != 0) { pslot = kmalloc(sizeof(slot), GFP_KERNEL); if (! pslot) return -ENOMEM; *pslot = slot; pslot->signature = SNDRV_MIXER_OSS_SIGNATURE; pslot->assigned = ptr; pslot->allocated = ptr_allocated; rslot = &mixer->slots[ptr->oss_id]; mixer_slot_clear(rslot); rslot->stereo = slot.channels > 1 ? 1 : 0; rslot->get_volume = snd_mixer_oss_get_volume1; rslot->put_volume = snd_mixer_oss_put_volume1; /* note: ES18xx have both Capture Source and XX Capture Volume !!! */ if (slot.present & SNDRV_MIXER_OSS_PRESENT_CSWITCH) { rslot->get_recsrc = snd_mixer_oss_get_recsrc1_sw; rslot->put_recsrc = snd_mixer_oss_put_recsrc1_sw; } else if (slot.present & SNDRV_MIXER_OSS_PRESENT_CROUTE) { rslot->get_recsrc = snd_mixer_oss_get_recsrc1_route; rslot->put_recsrc = snd_mixer_oss_put_recsrc1_route; } else if (slot.present & SNDRV_MIXER_OSS_PRESENT_CAPTURE) { mixer->mask_recsrc |= 1 << ptr->oss_id; } rslot->private_data = pslot; rslot->private_free = snd_mixer_oss_slot_free; return 1; } return 0; } #ifdef CONFIG_SND_PROC_FS /* */ #define MIXER_VOL(name) [SOUND_MIXER_##name] = #name static const char * const oss_mixer_names[SNDRV_OSS_MAX_MIXERS] = { MIXER_VOL(VOLUME), MIXER_VOL(BASS), MIXER_VOL(TREBLE), MIXER_VOL(SYNTH), MIXER_VOL(PCM), MIXER_VOL(SPEAKER), MIXER_VOL(LINE), MIXER_VOL(MIC), MIXER_VOL(CD), MIXER_VOL(IMIX), MIXER_VOL(ALTPCM), MIXER_VOL(RECLEV), MIXER_VOL(IGAIN), MIXER_VOL(OGAIN), MIXER_VOL(LINE1), MIXER_VOL(LINE2), MIXER_VOL(LINE3), MIXER_VOL(DIGITAL1), MIXER_VOL(DIGITAL2), MIXER_VOL(DIGITAL3), MIXER_VOL(PHONEIN), MIXER_VOL(PHONEOUT), MIXER_VOL(VIDEO), MIXER_VOL(RADIO), MIXER_VOL(MONITOR), }; /* * /proc interface */ static void snd_mixer_oss_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_mixer_oss *mixer = entry->private_data; int i; guard(mutex)(&mixer->reg_mutex); for (i = 0; i < SNDRV_OSS_MAX_MIXERS; i++) { struct slot *p; if (! oss_mixer_names[i]) continue; p = (struct slot *)mixer->slots[i].private_data; snd_iprintf(buffer, "%s ", oss_mixer_names[i]); if (p && p->assigned) snd_iprintf(buffer, "\"%s\" %d\n", p->assigned->name, p->assigned->index); else snd_iprintf(buffer, "\"\" 0\n"); } } static void snd_mixer_oss_proc_write(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_mixer_oss *mixer = entry->private_data; char line[128], str[32], idxstr[16]; const char *cptr; unsigned int idx; int ch; struct snd_mixer_oss_assign_table *tbl; struct slot *slot; while (!snd_info_get_line(buffer, line, sizeof(line))) { cptr = snd_info_get_str(str, line, sizeof(str)); for (ch = 0; ch < SNDRV_OSS_MAX_MIXERS; ch++) if (oss_mixer_names[ch] && strcmp(oss_mixer_names[ch], str) == 0) break; if (ch >= SNDRV_OSS_MAX_MIXERS) { pr_err("ALSA: mixer_oss: invalid OSS volume '%s'\n", str); continue; } cptr = snd_info_get_str(str, cptr, sizeof(str)); if (! *str) { /* remove the entry */ scoped_guard(mutex, &mixer->reg_mutex) mixer_slot_clear(&mixer->slots[ch]); continue; } snd_info_get_str(idxstr, cptr, sizeof(idxstr)); idx = simple_strtoul(idxstr, NULL, 10); if (idx >= 0x4000) { /* too big */ pr_err("ALSA: mixer_oss: invalid index %d\n", idx); continue; } scoped_guard(mutex, &mixer->reg_mutex) { slot = (struct slot *)mixer->slots[ch].private_data; if (slot && slot->assigned && slot->assigned->index == idx && !strcmp(slot->assigned->name, str)) /* not changed */ break; tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); if (!tbl) break; tbl->oss_id = ch; tbl->name = kstrdup(str, GFP_KERNEL); if (!tbl->name) { kfree(tbl); break; } tbl->index = idx; if (snd_mixer_oss_build_input(mixer, tbl, 1, 1) <= 0) { kfree(tbl->name); kfree(tbl); } } } } static void snd_mixer_oss_proc_init(struct snd_mixer_oss *mixer) { struct snd_info_entry *entry; entry = snd_info_create_card_entry(mixer->card, "oss_mixer", mixer->card->proc_root); if (! entry) return; entry->content = SNDRV_INFO_CONTENT_TEXT; entry->mode = S_IFREG | 0644; entry->c.text.read = snd_mixer_oss_proc_read; entry->c.text.write = snd_mixer_oss_proc_write; entry->private_data = mixer; if (snd_info_register(entry) < 0) { snd_info_free_entry(entry); entry = NULL; } mixer->proc_entry = entry; } static void snd_mixer_oss_proc_done(struct snd_mixer_oss *mixer) { snd_info_free_entry(mixer->proc_entry); mixer->proc_entry = NULL; } #else /* !CONFIG_SND_PROC_FS */ #define snd_mixer_oss_proc_init(mix) #define snd_mixer_oss_proc_done(mix) #endif /* CONFIG_SND_PROC_FS */ static void snd_mixer_oss_build(struct snd_mixer_oss *mixer) { static const struct snd_mixer_oss_assign_table table[] = { { SOUND_MIXER_VOLUME, "Master", 0 }, { SOUND_MIXER_VOLUME, "Front", 0 }, /* fallback */ { SOUND_MIXER_BASS, "Tone Control - Bass", 0 }, { SOUND_MIXER_TREBLE, "Tone Control - Treble", 0 }, { SOUND_MIXER_SYNTH, "Synth", 0 }, { SOUND_MIXER_SYNTH, "FM", 0 }, /* fallback */ { SOUND_MIXER_SYNTH, "Music", 0 }, /* fallback */ { SOUND_MIXER_PCM, "PCM", 0 }, { SOUND_MIXER_SPEAKER, "Beep", 0 }, { SOUND_MIXER_SPEAKER, "PC Speaker", 0 }, /* fallback */ { SOUND_MIXER_SPEAKER, "Speaker", 0 }, /* fallback */ { SOUND_MIXER_LINE, "Line", 0 }, { SOUND_MIXER_MIC, "Mic", 0 }, { SOUND_MIXER_CD, "CD", 0 }, { SOUND_MIXER_IMIX, "Monitor Mix", 0 }, { SOUND_MIXER_ALTPCM, "PCM", 1 }, { SOUND_MIXER_ALTPCM, "Headphone", 0 }, /* fallback */ { SOUND_MIXER_ALTPCM, "Wave", 0 }, /* fallback */ { SOUND_MIXER_RECLEV, "-- nothing --", 0 }, { SOUND_MIXER_IGAIN, "Capture", 0 }, { SOUND_MIXER_OGAIN, "Playback", 0 }, { SOUND_MIXER_LINE1, "Aux", 0 }, { SOUND_MIXER_LINE2, "Aux", 1 }, { SOUND_MIXER_LINE3, "Aux", 2 }, { SOUND_MIXER_DIGITAL1, "Digital", 0 }, { SOUND_MIXER_DIGITAL1, "IEC958", 0 }, /* fallback */ { SOUND_MIXER_DIGITAL1, "IEC958 Optical", 0 }, /* fallback */ { SOUND_MIXER_DIGITAL1, "IEC958 Coaxial", 0 }, /* fallback */ { SOUND_MIXER_DIGITAL2, "Digital", 1 }, { SOUND_MIXER_DIGITAL3, "Digital", 2 }, { SOUND_MIXER_PHONEIN, "Phone", 0 }, { SOUND_MIXER_PHONEOUT, "Master Mono", 0 }, { SOUND_MIXER_PHONEOUT, "Speaker", 0 }, /*fallback*/ { SOUND_MIXER_PHONEOUT, "Mono", 0 }, /*fallback*/ { SOUND_MIXER_PHONEOUT, "Phone", 0 }, /* fallback */ { SOUND_MIXER_VIDEO, "Video", 0 }, { SOUND_MIXER_RADIO, "Radio", 0 }, { SOUND_MIXER_MONITOR, "Monitor", 0 } }; unsigned int idx; for (idx = 0; idx < ARRAY_SIZE(table); idx++) snd_mixer_oss_build_input(mixer, &table[idx], 0, 0); if (mixer->mask_recsrc) { mixer->get_recsrc = snd_mixer_oss_get_recsrc2; mixer->put_recsrc = snd_mixer_oss_put_recsrc2; } } /* * */ static int snd_mixer_oss_free1(void *private) { struct snd_mixer_oss *mixer = private; struct snd_card *card; int idx; if (!mixer) return 0; card = mixer->card; if (snd_BUG_ON(mixer != card->mixer_oss)) return -ENXIO; card->mixer_oss = NULL; for (idx = 0; idx < SNDRV_OSS_MAX_MIXERS; idx++) { struct snd_mixer_oss_slot *chn = &mixer->slots[idx]; if (chn->private_free) chn->private_free(chn); } kfree(mixer); return 0; } static int snd_mixer_oss_notify_handler(struct snd_card *card, int cmd) { struct snd_mixer_oss *mixer; if (cmd == SND_MIXER_OSS_NOTIFY_REGISTER) { int idx, err; mixer = kcalloc(2, sizeof(*mixer), GFP_KERNEL); if (mixer == NULL) return -ENOMEM; mutex_init(&mixer->reg_mutex); err = snd_register_oss_device(SNDRV_OSS_DEVICE_TYPE_MIXER, card, 0, &snd_mixer_oss_f_ops, card); if (err < 0) { dev_err(card->dev, "unable to register OSS mixer device %i:%i\n", card->number, 0); kfree(mixer); return err; } mixer->oss_dev_alloc = 1; mixer->card = card; if (*card->mixername) strscpy(mixer->name, card->mixername, sizeof(mixer->name)); else snprintf(mixer->name, sizeof(mixer->name), "mixer%i", card->number); #ifdef SNDRV_OSS_INFO_DEV_MIXERS snd_oss_info_register(SNDRV_OSS_INFO_DEV_MIXERS, card->number, mixer->name); #endif for (idx = 0; idx < SNDRV_OSS_MAX_MIXERS; idx++) mixer->slots[idx].number = idx; card->mixer_oss = mixer; snd_mixer_oss_build(mixer); snd_mixer_oss_proc_init(mixer); } else { mixer = card->mixer_oss; if (mixer == NULL) return 0; if (mixer->oss_dev_alloc) { #ifdef SNDRV_OSS_INFO_DEV_MIXERS snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_MIXERS, mixer->card->number); #endif snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIXER, mixer->card, 0); mixer->oss_dev_alloc = 0; } if (cmd == SND_MIXER_OSS_NOTIFY_DISCONNECT) return 0; snd_mixer_oss_proc_done(mixer); return snd_mixer_oss_free1(mixer); } return 0; } static int __init alsa_mixer_oss_init(void) { struct snd_card *card; int idx; snd_mixer_oss_notify_callback = snd_mixer_oss_notify_handler; for (idx = 0; idx < SNDRV_CARDS; idx++) { card = snd_card_ref(idx); if (card) { snd_mixer_oss_notify_handler(card, SND_MIXER_OSS_NOTIFY_REGISTER); snd_card_unref(card); } } return 0; } static void __exit alsa_mixer_oss_exit(void) { struct snd_card *card; int idx; snd_mixer_oss_notify_callback = NULL; for (idx = 0; idx < SNDRV_CARDS; idx++) { card = snd_card_ref(idx); if (card) { snd_mixer_oss_notify_handler(card, SND_MIXER_OSS_NOTIFY_FREE); snd_card_unref(card); } } } module_init(alsa_mixer_oss_init) module_exit(alsa_mixer_oss_exit)
36 36 36 36 36 25 25 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. * * This file contains power management functions related to interrupts. */ #include <linux/irq.h> #include <linux/module.h> #include <linux/interrupt.h> #include <linux/suspend.h> #include <linux/syscore_ops.h> #include "internals.h" void irq_pm_handle_wakeup(struct irq_desc *desc) { irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); desc->istate |= IRQS_SUSPENDED | IRQS_PENDING; desc->depth++; irq_disable(desc); pm_system_irq_wakeup(irq_desc_get_irq(desc)); } /* * Called from __setup_irq() with desc->lock held after @action has * been installed in the action chain. */ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { desc->nr_actions++; if (action->flags & IRQF_FORCE_RESUME) desc->force_resume_depth++; WARN_ON_ONCE(desc->force_resume_depth && desc->force_resume_depth != desc->nr_actions); if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth++; else if (action->flags & IRQF_COND_SUSPEND) desc->cond_suspend_depth++; WARN_ON_ONCE(desc->no_suspend_depth && (desc->no_suspend_depth + desc->cond_suspend_depth) != desc->nr_actions); } /* * Called from __free_irq() with desc->lock held after @action has * been removed from the action chain. */ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { desc->nr_actions--; if (action->flags & IRQF_FORCE_RESUME) desc->force_resume_depth--; if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth--; else if (action->flags & IRQF_COND_SUSPEND) desc->cond_suspend_depth--; } static bool suspend_device_irq(struct irq_desc *desc) { unsigned long chipflags = irq_desc_get_chip(desc)->flags; struct irq_data *irqd = &desc->irq_data; if (!desc->action || irq_desc_is_chained(desc) || desc->no_suspend_depth) return false; if (irqd_is_wakeup_set(irqd)) { irqd_set(irqd, IRQD_WAKEUP_ARMED); if ((chipflags & IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND) && irqd_irq_disabled(irqd)) { /* * Interrupt marked for wakeup is in disabled state. * Enable interrupt here to unmask/enable in irqchip * to be able to resume with such interrupts. */ __enable_irq(desc); irqd_set(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); } /* * We return true here to force the caller to issue * synchronize_irq(). We need to make sure that the * IRQD_WAKEUP_ARMED is visible before we return from * suspend_device_irqs(). */ return true; } desc->istate |= IRQS_SUSPENDED; __disable_irq(desc); /* * Hardware which has no wakeup source configuration facility * requires that the non wakeup interrupts are masked at the * chip level. The chip implementation indicates that with * IRQCHIP_MASK_ON_SUSPEND. */ if (chipflags & IRQCHIP_MASK_ON_SUSPEND) mask_irq(desc); return true; } /** * suspend_device_irqs - disable all currently enabled interrupt lines * * During system-wide suspend or hibernation device drivers need to be * prevented from receiving interrupts and this function is provided * for this purpose. * * So we disable all interrupts and mark them IRQS_SUSPENDED except * for those which are unused, those which are marked as not * suspendable via an interrupt request with the flag IRQF_NO_SUSPEND * set and those which are marked as active wakeup sources. * * The active wakeup sources are handled by the flow handler entry * code which checks for the IRQD_WAKEUP_ARMED flag, suspends the * interrupt and notifies the pm core about the wakeup. */ void suspend_device_irqs(void) { struct irq_desc *desc; int irq; for_each_irq_desc(irq, desc) { bool sync; if (irq_settings_is_nested_thread(desc)) continue; scoped_guard(raw_spinlock_irqsave, &desc->lock) sync = suspend_device_irq(desc); if (sync) synchronize_irq(irq); } } static void resume_irq(struct irq_desc *desc) { struct irq_data *irqd = &desc->irq_data; irqd_clear(irqd, IRQD_WAKEUP_ARMED); if (irqd_is_enabled_on_suspend(irqd)) { /* * Interrupt marked for wakeup was enabled during suspend * entry. Disable such interrupts to restore them back to * original state. */ __disable_irq(desc); irqd_clear(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); } if (desc->istate & IRQS_SUSPENDED) goto resume; /* Force resume the interrupt? */ if (!desc->force_resume_depth) return; /* Pretend that it got disabled ! */ desc->depth++; irq_state_set_disabled(desc); irq_state_set_masked(desc); resume: desc->istate &= ~IRQS_SUSPENDED; __enable_irq(desc); } static void resume_irqs(bool want_early) { struct irq_desc *desc; int irq; for_each_irq_desc(irq, desc) { bool is_early = desc->action && desc->action->flags & IRQF_EARLY_RESUME; if (!is_early && want_early) continue; if (irq_settings_is_nested_thread(desc)) continue; guard(raw_spinlock_irqsave)(&desc->lock); resume_irq(desc); } } /** * rearm_wake_irq - rearm a wakeup interrupt line after signaling wakeup * @irq: Interrupt to rearm */ void rearm_wake_irq(unsigned int irq) { scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { struct irq_desc *desc = scoped_irqdesc; if (!(desc->istate & IRQS_SUSPENDED) || !irqd_is_wakeup_set(&desc->irq_data)) return; desc->istate &= ~IRQS_SUSPENDED; irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); __enable_irq(desc); } } /** * irq_pm_syscore_resume - enable interrupt lines early * * Enable all interrupt lines with %IRQF_EARLY_RESUME set. */ static void irq_pm_syscore_resume(void) { resume_irqs(true); } static struct syscore_ops irq_pm_syscore_ops = { .resume = irq_pm_syscore_resume, }; static int __init irq_pm_init_ops(void) { register_syscore_ops(&irq_pm_syscore_ops); return 0; } device_initcall(irq_pm_init_ops); /** * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() * * Enable all non-%IRQF_EARLY_RESUME interrupt lines previously * disabled by suspend_device_irqs() that have the IRQS_SUSPENDED flag * set as well as those with %IRQF_FORCE_RESUME. */ void resume_device_irqs(void) { resume_irqs(false); }
27 2 1 220 217 220 22 22 3 3 3 1 1 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 // SPDX-License-Identifier: GPL-2.0-only /* * kvm asynchronous fault support * * Copyright 2010 Red Hat, Inc. * * Author: * Gleb Natapov <gleb@redhat.com> */ #include <linux/kvm_host.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/mmu_context.h> #include <linux/sched/mm.h> #include "async_pf.h" #include <trace/events/kvm.h> static struct kmem_cache *async_pf_cache; int kvm_async_pf_init(void) { async_pf_cache = KMEM_CACHE(kvm_async_pf, 0); if (!async_pf_cache) return -ENOMEM; return 0; } void kvm_async_pf_deinit(void) { kmem_cache_destroy(async_pf_cache); async_pf_cache = NULL; } void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) { INIT_LIST_HEAD(&vcpu->async_pf.done); INIT_LIST_HEAD(&vcpu->async_pf.queue); spin_lock_init(&vcpu->async_pf.lock); } static void async_pf_execute(struct work_struct *work) { struct kvm_async_pf *apf = container_of(work, struct kvm_async_pf, work); struct kvm_vcpu *vcpu = apf->vcpu; struct mm_struct *mm = vcpu->kvm->mm; unsigned long addr = apf->addr; gpa_t cr2_or_gpa = apf->cr2_or_gpa; int locked = 1; bool first; might_sleep(); /* * Attempt to pin the VM's host address space, and simply skip gup() if * acquiring a pin fail, i.e. if the process is exiting. Note, KVM * holds a reference to its associated mm_struct until the very end of * kvm_destroy_vm(), i.e. the struct itself won't be freed before this * work item is fully processed. */ if (mmget_not_zero(mm)) { mmap_read_lock(mm); get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked); if (locked) mmap_read_unlock(mm); mmput(mm); } /* * Notify and kick the vCPU even if faulting in the page failed, e.g. * so that the vCPU can retry the fault synchronously. */ if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC)) kvm_arch_async_page_present(vcpu, apf); spin_lock(&vcpu->async_pf.lock); first = list_empty(&vcpu->async_pf.done); list_add_tail(&apf->link, &vcpu->async_pf.done); spin_unlock(&vcpu->async_pf.lock); /* * The apf struct may be freed by kvm_check_async_pf_completion() as * soon as the lock is dropped. Nullify it to prevent improper usage. */ apf = NULL; if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first) kvm_arch_async_page_present_queued(vcpu); trace_kvm_async_pf_completed(addr, cr2_or_gpa); __kvm_vcpu_wake_up(vcpu); } static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work) { /* * The async #PF is "done", but KVM must wait for the work item itself, * i.e. async_pf_execute(), to run to completion. If KVM is a module, * KVM must ensure *no* code owned by the KVM (the module) can be run * after the last call to module_put(). Note, flushing the work item * is always required when the item is taken off the completion queue. * E.g. even if the vCPU handles the item in the "normal" path, the VM * could be terminated before async_pf_execute() completes. * * Wake all events skip the queue and go straight done, i.e. don't * need to be flushed (but sanity check that the work wasn't queued). */ if (work->wakeup_all) WARN_ON_ONCE(work->work.func); else flush_work(&work->work); kmem_cache_free(async_pf_cache, work); } void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) { /* cancel outstanding work queue item */ while (!list_empty(&vcpu->async_pf.queue)) { struct kvm_async_pf *work = list_first_entry(&vcpu->async_pf.queue, typeof(*work), queue); list_del(&work->queue); #ifdef CONFIG_KVM_ASYNC_PF_SYNC flush_work(&work->work); #else if (cancel_work_sync(&work->work)) kmem_cache_free(async_pf_cache, work); #endif } spin_lock(&vcpu->async_pf.lock); while (!list_empty(&vcpu->async_pf.done)) { struct kvm_async_pf *work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link); list_del(&work->link); spin_unlock(&vcpu->async_pf.lock); kvm_flush_and_free_async_pf_work(work); spin_lock(&vcpu->async_pf.lock); } spin_unlock(&vcpu->async_pf.lock); vcpu->async_pf.queued = 0; } void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) { struct kvm_async_pf *work; while (!list_empty_careful(&vcpu->async_pf.done) && kvm_arch_can_dequeue_async_page_present(vcpu)) { spin_lock(&vcpu->async_pf.lock); work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link); list_del(&work->link); spin_unlock(&vcpu->async_pf.lock); kvm_arch_async_page_ready(vcpu, work); if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC)) kvm_arch_async_page_present(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; kvm_flush_and_free_async_pf_work(work); } } /* * Try to schedule a job to handle page fault asynchronously. Returns 'true' on * success, 'false' on failure (page fault has to be handled synchronously). */ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, unsigned long hva, struct kvm_arch_async_pf *arch) { struct kvm_async_pf *work; if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU) return false; /* Arch specific code should not do async PF in this case */ if (unlikely(kvm_is_error_hva(hva))) return false; /* * do alloc nowait since if we are going to sleep anyway we * may as well sleep faulting in page */ work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT); if (!work) return false; work->wakeup_all = false; work->vcpu = vcpu; work->cr2_or_gpa = cr2_or_gpa; work->addr = hva; work->arch = *arch; INIT_WORK(&work->work, async_pf_execute); list_add_tail(&work->queue, &vcpu->async_pf.queue); vcpu->async_pf.queued++; work->notpresent_injected = kvm_arch_async_page_not_present(vcpu, work); schedule_work(&work->work); return true; } int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) { struct kvm_async_pf *work; bool first; if (!list_empty_careful(&vcpu->async_pf.done)) return 0; work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC); if (!work) return -ENOMEM; work->wakeup_all = true; INIT_LIST_HEAD(&work->queue); /* for list_del to work */ spin_lock(&vcpu->async_pf.lock); first = list_empty(&vcpu->async_pf.done); list_add_tail(&work->link, &vcpu->async_pf.done); spin_unlock(&vcpu->async_pf.lock); if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first) kvm_arch_async_page_present_queued(vcpu); vcpu->async_pf.queued++; return 0; }
9 3 6 1 5 1 1 3 1 5 1 1 1 1 2 1 1 1 8 2 8 1 1 1 1 9 1 3 8 1 1 1 1 7 1 1 8 1 9 19 12 17 15 2 17 9 9 8 1 1 1 1 2 1 1 2 2 2 2 1 1 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 // SPDX-License-Identifier: GPL-2.0+ /* * USB FTDI SIO driver * * Copyright (C) 2009 - 2013 * Johan Hovold (jhovold@gmail.com) * Copyright (C) 1999 - 2001 * Greg Kroah-Hartman (greg@kroah.com) * Bill Ryder (bryder@sgi.com) * Copyright (C) 2002 * Kuba Ober (kuba@mareimbrium.org) * * See Documentation/usb/usb-serial.rst for more information on using this * driver * * See http://ftdi-usb-sio.sourceforge.net for up to date testing info * and extra documentation * * Change entries from 2004 and earlier can be found in versions of this * file in kernel versions prior to the 2.6.24 release. * */ /* Bill Ryder - bryder@sgi.com - wrote the FTDI_SIO implementation */ /* Thanx to FTDI for so kindly providing details of the protocol required */ /* to talk to the device */ /* Thanx to gkh and the rest of the usb dev group for all code I have assimilated :-) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/serial.h> #include <linux/gpio/driver.h> #include <linux/usb/serial.h> #include "ftdi_sio.h" #include "ftdi_sio_ids.h" #define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>, Bill Ryder <bryder@sgi.com>, Kuba Ober <kuba@mareimbrium.org>, Andreas Mohr, Johan Hovold <jhovold@gmail.com>" #define DRIVER_DESC "USB FTDI Serial Converters Driver" enum ftdi_chip_type { SIO, FT232A, FT232B, FT2232C, FT232R, FT232H, FT2232H, FT4232H, FT4232HA, FT232HP, FT233HP, FT2232HP, FT2233HP, FT4232HP, FT4233HP, FTX, }; struct ftdi_private { enum ftdi_chip_type chip_type; int baud_base; /* baud base clock for divisor setting */ int custom_divisor; /* custom_divisor kludge, this is for baud_base (different from what goes to the chip!) */ u16 last_set_data_value; /* the last data state set - needed for doing * a break */ int flags; /* some ASYNC_xxxx flags are supported */ unsigned long last_dtr_rts; /* saved modem control outputs */ char prev_status; /* Used for TIOCMIWAIT */ char transmit_empty; /* If transmitter is empty or not */ u16 channel; /* channel index, or 0 for legacy types */ speed_t force_baud; /* if non-zero, force the baud rate to this value */ int force_rtscts; /* if non-zero, force RTS-CTS to always be enabled */ unsigned int latency; /* latency setting in use */ unsigned short max_packet_size; struct mutex cfg_lock; /* Avoid mess by parallel calls of config ioctl() and change_speed() */ #ifdef CONFIG_GPIOLIB struct gpio_chip gc; struct mutex gpio_lock; /* protects GPIO state */ bool gpio_registered; /* is the gpiochip in kernel registered */ bool gpio_used; /* true if the user requested a gpio */ u8 gpio_altfunc; /* which pins are in gpio mode */ u8 gpio_output; /* pin directions cache */ u8 gpio_value; /* pin value for outputs */ #endif }; struct ftdi_quirk { int (*probe)(struct usb_serial *); /* Special settings for probed ports. */ void (*port_probe)(struct ftdi_private *); }; static int ftdi_jtag_probe(struct usb_serial *serial); static int ftdi_NDI_device_setup(struct usb_serial *serial); static int ftdi_stmclite_probe(struct usb_serial *serial); static int ftdi_8u2232c_probe(struct usb_serial *serial); static void ftdi_USB_UIRT_setup(struct ftdi_private *priv); static void ftdi_HE_TIRA1_setup(struct ftdi_private *priv); static const struct ftdi_quirk ftdi_jtag_quirk = { .probe = ftdi_jtag_probe, }; static const struct ftdi_quirk ftdi_NDI_device_quirk = { .probe = ftdi_NDI_device_setup, }; static const struct ftdi_quirk ftdi_USB_UIRT_quirk = { .port_probe = ftdi_USB_UIRT_setup, }; static const struct ftdi_quirk ftdi_HE_TIRA1_quirk = { .port_probe = ftdi_HE_TIRA1_setup, }; static const struct ftdi_quirk ftdi_stmclite_quirk = { .probe = ftdi_stmclite_probe, }; static const struct ftdi_quirk ftdi_8u2232c_quirk = { .probe = ftdi_8u2232c_probe, }; /* * The 8U232AM has the same API as the sio except for: * - it can support MUCH higher baudrates; up to: * o 921600 for RS232 and 2000000 for RS422/485 at 48MHz * o 230400 at 12MHz * so .. 8U232AM's baudrate setting codes are different * - it has a two byte status code. * - it returns characters every 16ms (the FTDI does it every 40ms) * * the bcdDevice value is used to differentiate FT232BM and FT245BM from * the earlier FT8U232AM and FT8U232BM. For now, include all known VID/PID * combinations in both tables. * FIXME: perhaps bcdDevice can also identify 12MHz FT8U232AM devices, * but I don't know if those ever went into mass production. [Ian Abbott] */ /* * Device ID not listed? Test it using * /sys/bus/usb-serial/drivers/ftdi_sio/new_id and send a patch or report. */ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_BRICK_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ZEITCONTROL_TAGTRACE_MIFARE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CTI_MINI_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CTI_NANO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_AMC232_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CANUSB_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CANDAPTER_PID) }, { USB_DEVICE(FTDI_VID, FTDI_BM_ATOM_NANO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_NXTCAM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_EV3CON_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_0_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_3_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_4_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_5_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_6_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_7_PID) }, { USB_DEVICE(FTDI_VID, FTDI_USINT_CAT_PID) }, { USB_DEVICE(FTDI_VID, FTDI_USINT_WKEY_PID) }, { USB_DEVICE(FTDI_VID, FTDI_USINT_RS232_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ACTZWAVE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IRTRANS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IPLUS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IPLUS2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DMX4ALL) }, { USB_DEVICE(FTDI_VID, FTDI_SIO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_8U232AM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_8U232AM_ALT_PID) }, { USB_DEVICE(FTDI_VID, FTDI_232RL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_8U2232C_PID) , .driver_info = (kernel_ulong_t)&ftdi_8u2232c_quirk }, { USB_DEVICE(FTDI_VID, FTDI_4232H_PID) }, { USB_DEVICE(FTDI_VID, FTDI_232H_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FTX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT2233HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT4233HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT2232HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT4232HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT233HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT232HP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FT4232HA_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MICRO_CHAMELEON_PID) }, { USB_DEVICE(FTDI_VID, FTDI_RELAIS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_SNIFFER_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_THROTTLE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_GATEWAY_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_GBM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OPENDCC_GBM_BOOST_PID) }, { USB_DEVICE(NEWPORT_VID, NEWPORT_AGILIS_PID) }, { USB_DEVICE(NEWPORT_VID, NEWPORT_CONEX_CC_PID) }, { USB_DEVICE(NEWPORT_VID, NEWPORT_CONEX_AGP_PID) }, { USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_IOBOARD_PID) }, { USB_DEVICE(INTERBIOMETRICS_VID, INTERBIOMETRICS_MINI_IOBOARD_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SPROG_II) }, { USB_DEVICE(FTDI_VID, FTDI_TAGSYS_LP101_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TAGSYS_P200X_PID) }, { USB_DEVICE(FTDI_VID, FTDI_LENZ_LIUSB_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_632_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_634_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_547_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_633_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_631_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_635_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_640_PID) }, { USB_DEVICE(FTDI_VID, FTDI_XF_642_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DSS20_PID) }, { USB_DEVICE(FTDI_VID, FTDI_URBAN_0_PID) }, { USB_DEVICE(FTDI_VID, FTDI_URBAN_1_PID) }, { USB_DEVICE(FTDI_NF_RIC_VID, FTDI_NF_RIC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_VNHCPCUSB_D_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_0_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_3_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_4_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_5_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MTXORB_6_PID) }, { USB_DEVICE(FTDI_VID, FTDI_R2000KU_TRUE_RNG) }, { USB_DEVICE(FTDI_VID, FTDI_VARDAAN_PID) }, { USB_DEVICE(FTDI_VID, FTDI_AUTO_M3_OP_COM_V2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0100_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0101_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0102_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0103_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0104_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0105_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0106_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0107_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0108_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0109_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_010F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0110_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0111_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0112_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0113_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0114_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0115_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0116_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0117_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0118_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0119_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_011F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0120_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0121_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0122_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0123_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0124_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0125_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0126_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0127_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0128_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0129_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_012F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0130_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0131_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0132_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0133_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0134_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0135_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0136_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0137_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0138_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0139_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_013F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0140_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0141_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0142_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0143_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0144_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0145_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0146_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0147_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0148_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0149_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_014F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0150_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0151_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0152_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0153_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0154_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0155_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0156_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0157_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0158_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0159_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_015F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0160_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0161_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0162_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0163_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0164_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0165_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0166_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0167_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0168_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0169_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_016F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0170_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0171_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0172_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0173_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0174_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0175_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0176_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0177_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0178_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0179_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_017F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0180_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0181_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0182_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0183_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0184_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0185_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0186_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0187_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0188_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0189_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_018F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0190_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0191_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0192_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0193_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0194_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0195_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0196_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0197_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0198_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0199_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_019F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01A9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AD_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01AF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01B9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BD_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01BF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01C9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CD_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01CF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01D9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DD_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01DF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01E9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01ED_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01EF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F0_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F1_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F3_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F4_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F5_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F6_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F7_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F8_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01F9_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FA_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FB_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FC_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FD_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FF_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_4701_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9300_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9301_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9302_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9303_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9304_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9305_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9306_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9307_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9308_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9309_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930F_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9310_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9311_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9312_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9313_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9314_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9315_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9316_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9317_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9318_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9319_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931A_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931B_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931C_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931D_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931E_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931F_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PERLE_ULTRAPORT_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PIEGROUP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TNC_X_PID) }, { USB_DEVICE(FTDI_VID, FTDI_USBX_707_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2101_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2102_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2103_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2104_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2106_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2201_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2201_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2202_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2202_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2203_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2203_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2401_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2402_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2403_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_5_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_6_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_7_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2801_8_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_5_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_6_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_7_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2802_8_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_4_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_5_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_6_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_7_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803_8_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_1_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_2_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_3_PID) }, { USB_DEVICE(SEALEVEL_VID, SEALEVEL_2803R_4_PID) }, { USB_DEVICE(IDTECH_VID, IDTECH_IDT1221U_PID) }, { USB_DEVICE(OCT_VID, OCT_US101_PID) }, { USB_DEVICE(OCT_VID, OCT_DK201_PID) }, { USB_DEVICE(FTDI_VID, FTDI_HE_TIRA1_PID), .driver_info = (kernel_ulong_t)&ftdi_HE_TIRA1_quirk }, { USB_DEVICE(FTDI_VID, FTDI_USB_UIRT_PID), .driver_info = (kernel_ulong_t)&ftdi_USB_UIRT_quirk }, { USB_DEVICE(FTDI_VID, PROTEGO_SPECIAL_1) }, { USB_DEVICE(FTDI_VID, PROTEGO_R2X0) }, { USB_DEVICE(FTDI_VID, PROTEGO_SPECIAL_3) }, { USB_DEVICE(FTDI_VID, PROTEGO_SPECIAL_4) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E808_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E809_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80A_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80B_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80C_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80D_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80E_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E80F_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E888_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E889_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88A_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88B_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88C_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88D_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88E_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GUDEADS_E88F_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UO100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UM100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UR100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_ALC8500_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PYRAMID_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FHZ1000PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_US485_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_PICPRO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_PCMCIA_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_PK1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_RS232MON_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_APP70_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_PEDO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IBS_PROD_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TAVIR_STK500_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TIAO_UMPA_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLX_PLUS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORION_IO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONMX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2WI_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX3_PID) }, /* * ELV devices: */ { USB_DEVICE(FTDI_ELV_VID, FTDI_ELV_WS300_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_USR_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_MSM1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_KL100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS550_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_EC3000_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS888_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_TWS550_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FEM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_CLI7000_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_PPS7330_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_TFM100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UDF77_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UIO88_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UAD8_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UDA7_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_USI2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_T1100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_PCD200_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_ULA200_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_CSI8_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_EM1000DL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_PCK100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_RFP500_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FS20SIG_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UTP8_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS300PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS444PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FHZ1300PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_EM1010PC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS500_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_HS485_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_UMS100_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PALMSENS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_IVIUM_XSTAT_PID) }, { USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) }, { USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) }, { USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) }, { USB_DEVICE(FTDI_VID, LINX_FUTURE_1_PID) }, { USB_DEVICE(FTDI_VID, LINX_FUTURE_2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSICDU20_0_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSICDU40_1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSMACHX_2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSLOAD_N_GO_3_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSICDU64_4_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CCSPRIME8_5_PID) }, { USB_DEVICE(FTDI_VID, INSIDE_ACCESSO) }, { USB_DEVICE(INTREPID_VID, INTREPID_VALUECAN_PID) }, { USB_DEVICE(INTREPID_VID, INTREPID_NEOVI_PID) }, { USB_DEVICE(FALCOM_VID, FALCOM_TWIST_PID) }, { USB_DEVICE(FALCOM_VID, FALCOM_SAMBA_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SUUNTO_SPORTS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OCEANIC_PID) }, { USB_DEVICE(TTI_VID, TTI_QL355P_PID) }, { USB_DEVICE(FTDI_VID, FTDI_RM_CANVIEW_PID) }, { USB_DEVICE(ACTON_VID, ACTON_SPECTRAPRO_PID) }, { USB_DEVICE(CONTEC_VID, CONTEC_COM1USBH_PID) }, { USB_DEVICE(MITSUBISHI_VID, MITSUBISHI_FXUSB_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USOTL4_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USTL4_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USO9ML2_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USOPTL4_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USPTL4_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USO9ML2DR_2_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USO9ML2DR_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USOPTL4DR2_PID) }, { USB_DEVICE(BANDB_VID, BANDB_USOPTL4DR_PID) }, { USB_DEVICE(BANDB_VID, BANDB_485USB9F_2W_PID) }, { USB_DEVICE(BANDB_VID, BANDB_485USB9F_4W_PID) }, { USB_DEVICE(BANDB_VID, BANDB_232USB9M_PID) }, { USB_DEVICE(BANDB_VID, BANDB_485USBTB_2W_PID) }, { USB_DEVICE(BANDB_VID, BANDB_485USBTB_4W_PID) }, { USB_DEVICE(BANDB_VID, BANDB_TTL5USB9M_PID) }, { USB_DEVICE(BANDB_VID, BANDB_TTL3USB9M_PID) }, { USB_DEVICE(BANDB_VID, BANDB_ZZ_PROG1_USB_PID) }, { USB_DEVICE(FTDI_VID, EVER_ECO_PRO_CDS) }, { USB_DEVICE(FTDI_VID, FTDI_4N_GALAXY_DE_1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_4N_GALAXY_DE_2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_4N_GALAXY_DE_3_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_0_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_1_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_2_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_3_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_4_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_5_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_6_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_7_PID) }, { USB_DEVICE(XSENS_VID, XSENS_AWINDA_DONGLE_PID) }, { USB_DEVICE(XSENS_VID, XSENS_AWINDA_STATION_PID) }, { USB_DEVICE(XSENS_VID, XSENS_CONVERTER_PID) }, { USB_DEVICE(XSENS_VID, XSENS_MTDEVBOARD_PID) }, { USB_DEVICE(XSENS_VID, XSENS_MTIUSBCONVERTER_PID) }, { USB_DEVICE(XSENS_VID, XSENS_MTW_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OMNI1509) }, { USB_DEVICE(MOBILITY_VID, MOBILITY_USB_SERIAL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ACTIVE_ROBOTS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_KW_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_YS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_Y6_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_Y8_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_IC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_DB9_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_RS232_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MHAM_Y9_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TERATRONIK_VCP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TERATRONIK_D2XX_PID) }, { USB_DEVICE(EVOLUTION_VID, EVOLUTION_ER1_PID) }, { USB_DEVICE(EVOLUTION_VID, EVO_HYBRID_PID) }, { USB_DEVICE(EVOLUTION_VID, EVO_RCM4_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ARTEMIS_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16C_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16HR_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16HRC_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ATIK_ATK16IC_PID) }, { USB_DEVICE(KOBIL_VID, KOBIL_CONV_B1_PID) }, { USB_DEVICE(KOBIL_VID, KOBIL_CONV_KAAN_PID) }, { USB_DEVICE(POSIFLEX_VID, POSIFLEX_PP7000_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TTUSB_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ECLO_COM_1WIRE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_WESTREX_MODEL_777_PID) }, { USB_DEVICE(FTDI_VID, FTDI_WESTREX_MODEL_8900F_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PCDJ_DAC2_PID) }, { USB_DEVICE(FTDI_VID, FTDI_RRCIRKITS_LOCOBUFFER_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ASK_RDR400_PID) }, { USB_DEVICE(FTDI_VID, FTDI_NZR_SEM_USB_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_1_PID) }, { USB_DEVICE(ICOM_VID, ICOM_OPC_U_UC_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2C1_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2C2_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2D_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2VT_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2VR_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP4KVT_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP4KVR_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2KVT_PID) }, { USB_DEVICE(ICOM_VID, ICOM_ID_RP2KVR_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ACG_HFDUAL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_YEI_SERVOCENTER31_PID) }, { USB_DEVICE(FTDI_VID, FTDI_THORLABS_PID) }, { USB_DEVICE(TESTO_VID, TESTO_1_PID) }, { USB_DEVICE(TESTO_VID, TESTO_3_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GAMMA_SCOUT_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13M_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13S_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13U_PID) }, { USB_DEVICE(ELEKTOR_VID, ELEKTOR_FT323R_PID) }, { USB_DEVICE(FTDI_VID, FTDI_NDI_HUC_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NDI_SPECTRA_SCU_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NDI_FUTURE_2_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NDI_FUTURE_3_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NDI_AURORA_SCU_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(FTDI_NDI_VID, FTDI_NDI_EMGUIDE_GEMINI_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(TELLDUS_VID, TELLDUS_TELLSTICK_PID) }, { USB_DEVICE(NOVITUS_VID, NOVITUS_BONO_E_PID) }, { USB_DEVICE(FTDI_VID, RTSYSTEMS_USB_VX8_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S03_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_59_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57A_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29A_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29F_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_62B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S01_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_63_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29C_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_81B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_82B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K5D_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K4Y_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K5G_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S05_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_60_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_61_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_62_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_63B_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_64_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_65_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_92_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_92D_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_W5R_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_A5R_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_PW1_PID) }, { USB_DEVICE(FTDI_VID, FTDI_MAXSTREAM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PHI_FISCO_PID) }, { USB_DEVICE(TML_VID, TML_USB_SERIAL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELSTER_UNICOM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PROPOX_JTAGCABLEII_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PROPOX_ISPCABLEIII_PID) }, { USB_DEVICE(FTDI_VID, CYBER_CORTEX_AV_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_H_PID, 1) }, { USB_DEVICE(FIC_VID, FIC_NEO1973_DEBUG_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_OOCDLINK_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, LMI_LM3S_DEVEL_BOARD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, LMI_LM3S_EVAL_BOARD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, LMI_LM3S_ICDI_BOARD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_TURTELIZER_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_USB60F) }, { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID_SCU18) }, { USB_DEVICE(FTDI_VID, FTDI_REU_TINY_PID) }, /* Papouch devices based on FTDI chip */ { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_AP485_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB422_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485_2_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_AP485_2_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB422_2_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485S_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB485C_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_LEC_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SB232_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_TMU_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_IRAMP_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_DRAK5_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO8x8_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO4x4_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO2x2_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO10x1_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO30x3_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO60x3_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO2x16_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_QUIDO3x32_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_DRAK6_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_UPSUSB_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_MU_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_SIMUKEY_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_AD4USB_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_GMUX_PID) }, { USB_DEVICE(PAPOUCH_VID, PAPOUCH_GMSR_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DOMINTELL_DGQG_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DOMINTELL_DUSB_PID) }, { USB_DEVICE(ALTI2_VID, ALTI2_N3_PID) }, { USB_DEVICE(FTDI_VID, DIEBOLD_BCS_SE923_PID) }, { USB_DEVICE(ATMEL_VID, STK541_PID) }, { USB_DEVICE(DE_VID, STB_PID) }, { USB_DEVICE(DE_VID, WHT_PID) }, { USB_DEVICE(ADI_VID, ADI_GNICE_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(ADI_VID, ADI_GNICEPLUS_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE_AND_INTERFACE_INFO(MICROCHIP_VID, MICROCHIP_USB_BOARD_PID, USB_CLASS_VENDOR_SPEC, USB_SUBCLASS_VENDOR_SPEC, 0x00) }, { USB_DEVICE_INTERFACE_NUMBER(ACTEL_VID, MICROSEMI_ARROW_SF2PLUS_BOARD_PID, 2) }, { USB_DEVICE(JETI_VID, JETI_SPC1201_PID) }, { USB_DEVICE(MARVELL_VID, MARVELL_SHEEVAPLUG_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(LARSENBRUSGAARD_VID, LB_ALTITRACK_PID) }, { USB_DEVICE(GN_OTOMETRICS_VID, AURICAL_USB_PID) }, { USB_DEVICE(FTDI_VID, PI_C865_PID) }, { USB_DEVICE(FTDI_VID, PI_C857_PID) }, { USB_DEVICE(PI_VID, PI_C866_PID) }, { USB_DEVICE(PI_VID, PI_C663_PID) }, { USB_DEVICE(PI_VID, PI_C725_PID) }, { USB_DEVICE(PI_VID, PI_E517_PID) }, { USB_DEVICE(PI_VID, PI_C863_PID) }, { USB_DEVICE(PI_VID, PI_E861_PID) }, { USB_DEVICE(PI_VID, PI_C867_PID) }, { USB_DEVICE(PI_VID, PI_E609_PID) }, { USB_DEVICE(PI_VID, PI_E709_PID) }, { USB_DEVICE(PI_VID, PI_100F_PID) }, { USB_DEVICE(PI_VID, PI_1011_PID) }, { USB_DEVICE(PI_VID, PI_1012_PID) }, { USB_DEVICE(PI_VID, PI_1013_PID) }, { USB_DEVICE(PI_VID, PI_1014_PID) }, { USB_DEVICE(PI_VID, PI_1015_PID) }, { USB_DEVICE(PI_VID, PI_1016_PID) }, { USB_DEVICE(KONDO_VID, KONDO_USB_SERIAL_PID) }, { USB_DEVICE(BAYER_VID, BAYER_CONTOUR_CABLE_PID) }, { USB_DEVICE(FTDI_VID, MARVELL_OPENRD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, TI_XDS100V2_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, HAMEG_HO820_PID) }, { USB_DEVICE(FTDI_VID, HAMEG_HO720_PID) }, { USB_DEVICE(FTDI_VID, HAMEG_HO730_PID) }, { USB_DEVICE(FTDI_VID, HAMEG_HO870_PID) }, { USB_DEVICE(FTDI_VID, MJSG_GENERIC_PID) }, { USB_DEVICE(FTDI_VID, MJSG_SR_RADIO_PID) }, { USB_DEVICE(FTDI_VID, MJSG_HD_RADIO_PID) }, { USB_DEVICE(FTDI_VID, MJSG_XM_RADIO_PID) }, { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_ST_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_SLITE_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_SH2_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_SH4_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, SEGWAY_RMP200_PID) }, { USB_DEVICE(FTDI_VID, ACCESIO_COM4SM_PID) }, { USB_DEVICE(IONICS_VID, IONICS_PLUGCOMPUTER_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_24_MASTER_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_PC_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_USB_DMX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MIDI_TIMECODE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MINI_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MAXI_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MEDIA_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_WING_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_LOGBOOKML_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_LS_LOGBOOK_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_HS_LOGBOOK_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CINTERION_MC55I_PID) }, { USB_DEVICE(FTDI_VID, FTDI_FHE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_DOTEC_PID) }, { USB_DEVICE(QIHARDWARE_VID, MILKYMISTONE_JTAGSERIAL_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(ST_VID, ST_STMCLT_2232_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(ST_VID, ST_STMCLT_4232_PID), .driver_info = (kernel_ulong_t)&ftdi_stmclite_quirk }, { USB_DEVICE(FTDI_VID, FTDI_RF_R106) }, { USB_DEVICE(FTDI_VID, FTDI_DISTORTEC_JTAG_LOCK_PICK_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_LUMEL_PD12_PID) }, /* Crucible Devices */ { USB_DEVICE(FTDI_VID, FTDI_CT_COMET_PID) }, { USB_DEVICE(FTDI_VID, FTDI_Z3X_PID) }, /* Cressi Devices */ { USB_DEVICE(FTDI_VID, FTDI_CRESSI_PID) }, /* Brainboxes Devices */ { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_001_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_012_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_023_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_034_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_101_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_159_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_3_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_4_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_5_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_6_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_7_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_8_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_235_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_257_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_3_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_4_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_313_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_320_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_324_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_357_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_3_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_701_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_701_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_3_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_4_PID) }, /* ekey Devices */ { USB_DEVICE(FTDI_VID, FTDI_EKEY_CONV_USB_PID) }, /* Infineon Devices */ { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_TC1798_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_TC2X7_PID, 1) }, /* GE Healthcare devices */ { USB_DEVICE(GE_HEALTHCARE_VID, GE_HEALTHCARE_NEMO_TRACKER_PID) }, /* Active Research (Actisense) devices */ { USB_DEVICE(FTDI_VID, ACTISENSE_NDC_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_USG_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_NGT_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_NGW_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_UID_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_USA_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_NGX_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_D9AF_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEAGAUGE_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASWITCH_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_NMEA2000_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ETHERNET_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_WIFI_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) }, /* Belimo Automation devices */ { USB_DEVICE(FTDI_VID, BELIMO_ZTH_PID) }, { USB_DEVICE(FTDI_VID, BELIMO_ZIP_PID) }, /* ICP DAS I-756xU devices */ { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) }, { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) }, { USB_DEVICE(TI_VID, TI_CC3200_LAUNCHPAD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) }, /* EZPrototypes devices */ { USB_DEVICE(EZPROTOTYPES_VID, HJELMSLUND_USB485_ISO_PID) }, { USB_DEVICE_INTERFACE_NUMBER(UNJO_VID, UNJO_ISODEBUG_V1_PID, 1) }, /* Sienna devices */ { USB_DEVICE(FTDI_VID, FTDI_SIENNA_PID) }, { USB_DEVICE(ECHELON_VID, ECHELON_U20_PID) }, /* IDS GmbH devices */ { USB_DEVICE(IDS_VID, IDS_SI31A_PID) }, { USB_DEVICE(IDS_VID, IDS_CM31A_PID) }, /* Omron devices */ { USB_DEVICE(OMRON_VID, OMRON_CS1W_CIF31_PID) }, /* U-Blox devices */ { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ZED_PID) }, { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ODIN_PID) }, /* FreeCalypso USB adapters */ { USB_DEVICE(FTDI_VID, FTDI_FALCONIA_JTAG_BUF_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_FALCONIA_JTAG_UNBUF_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, /* GMC devices */ { USB_DEVICE(GMC_VID, GMC_Z216C_PID) }, /* Altera USB Blaster 3 */ { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6022_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6025_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6026_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6026_PID, 3) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_6029_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602A_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602A_PID, 3) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602C_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602D_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602D_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 3) }, /* Abacus Electrics */ { USB_DEVICE(FTDI_VID, ABACUS_OPTICAL_PROBE_PID) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, id_table_combined); static const char *ftdi_chip_name[] = { [SIO] = "SIO", /* the serial part of FT8U100AX */ [FT232A] = "FT232A", [FT232B] = "FT232B", [FT2232C] = "FT2232C/D", [FT232R] = "FT232R", [FT232H] = "FT232H", [FT2232H] = "FT2232H", [FT4232H] = "FT4232H", [FT4232HA] = "FT4232HA", [FT232HP] = "FT232HP", [FT233HP] = "FT233HP", [FT2232HP] = "FT2232HP", [FT2233HP] = "FT2233HP", [FT4232HP] = "FT4232HP", [FT4233HP] = "FT4233HP", [FTX] = "FT-X", }; /* Used for TIOCMIWAIT */ #define FTDI_STATUS_B0_MASK (FTDI_RS0_CTS | FTDI_RS0_DSR | FTDI_RS0_RI | FTDI_RS0_RLSD) #define FTDI_STATUS_B1_MASK (FTDI_RS_BI) /* End TIOCMIWAIT */ static void ftdi_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios); static int ftdi_get_modem_status(struct usb_serial_port *port, unsigned char status[2]); #define WDR_TIMEOUT 5000 /* default urb timeout */ #define WDR_SHORT_TIMEOUT 1000 /* shorter urb timeout */ /* * *************************************************************************** * Utility functions * *************************************************************************** */ static unsigned short int ftdi_232am_baud_base_to_divisor(int baud, int base) { unsigned short int divisor; /* divisor shifted 3 bits to the left */ int divisor3 = DIV_ROUND_CLOSEST(base, 2 * baud); if ((divisor3 & 0x7) == 7) divisor3++; /* round x.7/8 up to x+1 */ divisor = divisor3 >> 3; divisor3 &= 0x7; if (divisor3 == 1) divisor |= 0xc000; /* +0.125 */ else if (divisor3 >= 4) divisor |= 0x4000; /* +0.5 */ else if (divisor3 != 0) divisor |= 0x8000; /* +0.25 */ else if (divisor == 1) divisor = 0; /* special case for maximum baud rate */ return divisor; } static unsigned short int ftdi_232am_baud_to_divisor(int baud) { return ftdi_232am_baud_base_to_divisor(baud, 48000000); } static u32 ftdi_232bm_baud_base_to_divisor(int baud, int base) { static const unsigned char divfrac[8] = { 0, 3, 2, 4, 1, 5, 6, 7 }; u32 divisor; /* divisor shifted 3 bits to the left */ int divisor3 = DIV_ROUND_CLOSEST(base, 2 * baud); divisor = divisor3 >> 3; divisor |= (u32)divfrac[divisor3 & 0x7] << 14; /* Deal with special cases for highest baud rates. */ if (divisor == 1) /* 1.0 */ divisor = 0; else if (divisor == 0x4001) /* 1.5 */ divisor = 1; return divisor; } static u32 ftdi_232bm_baud_to_divisor(int baud) { return ftdi_232bm_baud_base_to_divisor(baud, 48000000); } static u32 ftdi_2232h_baud_base_to_divisor(int baud, int base) { static const unsigned char divfrac[8] = { 0, 3, 2, 4, 1, 5, 6, 7 }; u32 divisor; int divisor3; /* hi-speed baud rate is 10-bit sampling instead of 16-bit */ divisor3 = DIV_ROUND_CLOSEST(8 * base, 10 * baud); divisor = divisor3 >> 3; divisor |= (u32)divfrac[divisor3 & 0x7] << 14; /* Deal with special cases for highest baud rates. */ if (divisor == 1) /* 1.0 */ divisor = 0; else if (divisor == 0x4001) /* 1.5 */ divisor = 1; /* * Set this bit to turn off a divide by 2.5 on baud rate generator * This enables baud rates up to 12Mbaud but cannot reach below 1200 * baud with this bit set */ divisor |= 0x00020000; return divisor; } static u32 ftdi_2232h_baud_to_divisor(int baud) { return ftdi_2232h_baud_base_to_divisor(baud, 120000000); } #define set_mctrl(port, set) update_mctrl((port), (set), 0) #define clear_mctrl(port, clear) update_mctrl((port), 0, (clear)) static int update_mctrl(struct usb_serial_port *port, unsigned int set, unsigned int clear) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct device *dev = &port->dev; unsigned value; int rv; if (((set | clear) & (TIOCM_DTR | TIOCM_RTS)) == 0) { dev_dbg(dev, "%s - DTR|RTS not being set|cleared\n", __func__); return 0; /* no change */ } clear &= ~set; /* 'set' takes precedence over 'clear' */ value = 0; if (clear & TIOCM_DTR) value |= FTDI_SIO_SET_DTR_LOW; if (clear & TIOCM_RTS) value |= FTDI_SIO_SET_RTS_LOW; if (set & TIOCM_DTR) value |= FTDI_SIO_SET_DTR_HIGH; if (set & TIOCM_RTS) value |= FTDI_SIO_SET_RTS_HIGH; rv = usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), FTDI_SIO_SET_MODEM_CTRL_REQUEST, FTDI_SIO_SET_MODEM_CTRL_REQUEST_TYPE, value, priv->channel, NULL, 0, WDR_TIMEOUT); if (rv < 0) { dev_dbg(dev, "%s Error from MODEM_CTRL urb: DTR %s, RTS %s\n", __func__, (set & TIOCM_DTR) ? "HIGH" : (clear & TIOCM_DTR) ? "LOW" : "unchanged", (set & TIOCM_RTS) ? "HIGH" : (clear & TIOCM_RTS) ? "LOW" : "unchanged"); rv = usb_translate_errors(rv); } else { dev_dbg(dev, "%s - DTR %s, RTS %s\n", __func__, (set & TIOCM_DTR) ? "HIGH" : (clear & TIOCM_DTR) ? "LOW" : "unchanged", (set & TIOCM_RTS) ? "HIGH" : (clear & TIOCM_RTS) ? "LOW" : "unchanged"); /* FIXME: locking on last_dtr_rts */ priv->last_dtr_rts = (priv->last_dtr_rts & ~clear) | set; } return rv; } static u32 get_ftdi_divisor(struct tty_struct *tty, struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct device *dev = &port->dev; u32 div_value = 0; int div_okay = 1; int baud; baud = tty_get_baud_rate(tty); dev_dbg(dev, "%s - tty_get_baud_rate reports speed %d\n", __func__, baud); /* * Observe deprecated async-compatible custom_divisor hack, update * baudrate if needed. */ if (baud == 38400 && ((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST) && (priv->custom_divisor)) { baud = priv->baud_base / priv->custom_divisor; dev_dbg(dev, "%s - custom divisor %d sets baud rate to %d\n", __func__, priv->custom_divisor, baud); } if (!baud) baud = 9600; switch (priv->chip_type) { case SIO: switch (baud) { case 300: div_value = ftdi_sio_b300; break; case 600: div_value = ftdi_sio_b600; break; case 1200: div_value = ftdi_sio_b1200; break; case 2400: div_value = ftdi_sio_b2400; break; case 4800: div_value = ftdi_sio_b4800; break; case 9600: div_value = ftdi_sio_b9600; break; case 19200: div_value = ftdi_sio_b19200; break; case 38400: div_value = ftdi_sio_b38400; break; case 57600: div_value = ftdi_sio_b57600; break; case 115200: div_value = ftdi_sio_b115200; break; default: dev_dbg(dev, "%s - Baudrate (%d) requested is not supported\n", __func__, baud); div_value = ftdi_sio_b9600; baud = 9600; div_okay = 0; } break; case FT232A: if (baud <= 3000000) { div_value = ftdi_232am_baud_to_divisor(baud); } else { dev_dbg(dev, "%s - Baud rate too high!\n", __func__); baud = 9600; div_value = ftdi_232am_baud_to_divisor(9600); div_okay = 0; } break; case FT232B: case FT2232C: case FT232R: case FTX: if (baud <= 3000000) { u16 product_id = le16_to_cpu( port->serial->dev->descriptor.idProduct); if (((product_id == FTDI_NDI_HUC_PID) || (product_id == FTDI_NDI_SPECTRA_SCU_PID) || (product_id == FTDI_NDI_FUTURE_2_PID) || (product_id == FTDI_NDI_FUTURE_3_PID) || (product_id == FTDI_NDI_AURORA_SCU_PID)) && (baud == 19200)) { baud = 1200000; } div_value = ftdi_232bm_baud_to_divisor(baud); } else { dev_dbg(dev, "%s - Baud rate too high!\n", __func__); div_value = ftdi_232bm_baud_to_divisor(9600); div_okay = 0; baud = 9600; } break; default: if ((baud <= 12000000) && (baud >= 1200)) { div_value = ftdi_2232h_baud_to_divisor(baud); } else if (baud < 1200) { div_value = ftdi_232bm_baud_to_divisor(baud); } else { dev_dbg(dev, "%s - Baud rate too high!\n", __func__); div_value = ftdi_232bm_baud_to_divisor(9600); div_okay = 0; baud = 9600; } break; } if (div_okay) { dev_dbg(dev, "%s - Baud rate set to %d (divisor 0x%lX) on chip %s\n", __func__, baud, (unsigned long)div_value, ftdi_chip_name[priv->chip_type]); } tty_encode_baud_rate(tty, baud, baud); return div_value; } static int change_speed(struct tty_struct *tty, struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); u16 value; u16 index; u32 index_value; int rv; index_value = get_ftdi_divisor(tty, port); value = (u16)index_value; index = (u16)(index_value >> 16); if (priv->channel) index = (u16)((index << 8) | priv->channel); rv = usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), FTDI_SIO_SET_BAUDRATE_REQUEST, FTDI_SIO_SET_BAUDRATE_REQUEST_TYPE, value, index, NULL, 0, WDR_SHORT_TIMEOUT); return rv; } static int write_latency_timer(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_device *udev = port->serial->dev; int rv; int l = priv->latency; if (priv->chip_type == SIO || priv->chip_type == FT232A) return -EINVAL; if (priv->flags & ASYNC_LOW_LATENCY) l = 1; dev_dbg(&port->dev, "%s: setting latency timer = %i\n", __func__, l); rv = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), FTDI_SIO_SET_LATENCY_TIMER_REQUEST, FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE, l, priv->channel, NULL, 0, WDR_TIMEOUT); if (rv < 0) dev_err(&port->dev, "Unable to write latency timer: %i\n", rv); return rv; } static int _read_latency_timer(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_device *udev = port->serial->dev; u8 buf; int rv; rv = usb_control_msg_recv(udev, 0, FTDI_SIO_GET_LATENCY_TIMER_REQUEST, FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE, 0, priv->channel, &buf, 1, WDR_TIMEOUT, GFP_KERNEL); if (rv == 0) rv = buf; return rv; } static int read_latency_timer(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); int rv; if (priv->chip_type == SIO || priv->chip_type == FT232A) return -EINVAL; rv = _read_latency_timer(port); if (rv < 0) { dev_err(&port->dev, "Unable to read latency timer: %i\n", rv); return rv; } priv->latency = rv; return 0; } static void get_serial_info(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct ftdi_private *priv = usb_get_serial_port_data(port); mutex_lock(&priv->cfg_lock); ss->flags = priv->flags; ss->baud_base = priv->baud_base; ss->custom_divisor = priv->custom_divisor; mutex_unlock(&priv->cfg_lock); } static int set_serial_info(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct ftdi_private *priv = usb_get_serial_port_data(port); int old_flags, old_divisor; mutex_lock(&priv->cfg_lock); if (!capable(CAP_SYS_ADMIN)) { if ((ss->flags ^ priv->flags) & ~ASYNC_USR_MASK) { mutex_unlock(&priv->cfg_lock); return -EPERM; } } old_flags = priv->flags; old_divisor = priv->custom_divisor; priv->flags = ss->flags & ASYNC_FLAGS; priv->custom_divisor = ss->custom_divisor; write_latency_timer(port); if ((priv->flags ^ old_flags) & ASYNC_SPD_MASK || ((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST && priv->custom_divisor != old_divisor)) { /* warn about deprecation unless clearing */ if (priv->flags & ASYNC_SPD_MASK) dev_warn_ratelimited(&port->dev, "use of SPD flags is deprecated\n"); change_speed(tty, port); } mutex_unlock(&priv->cfg_lock); return 0; } static int get_lsr_info(struct usb_serial_port *port, unsigned int __user *retinfo) { struct ftdi_private *priv = usb_get_serial_port_data(port); unsigned int result = 0; if (priv->transmit_empty) result = TIOCSER_TEMT; if (copy_to_user(retinfo, &result, sizeof(unsigned int))) return -EFAULT; return 0; } static int ftdi_determine_type(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; struct usb_device *udev = serial->dev; unsigned int version, ifnum; version = le16_to_cpu(udev->descriptor.bcdDevice); ifnum = serial->interface->cur_altsetting->desc.bInterfaceNumber; /* Assume Hi-Speed type */ priv->baud_base = 120000000 / 2; priv->channel = CHANNEL_A + ifnum; switch (version) { case 0x200: priv->chip_type = FT232A; priv->baud_base = 48000000 / 2; priv->channel = 0; /* * FT232B devices have a bug where bcdDevice gets set to 0x200 * when iSerialNumber is 0. Assume it is an FT232B in case the * latency timer is readable. */ if (udev->descriptor.iSerialNumber == 0 && _read_latency_timer(port) >= 0) { priv->chip_type = FT232B; } break; case 0x400: priv->chip_type = FT232B; priv->baud_base = 48000000 / 2; priv->channel = 0; break; case 0x500: priv->chip_type = FT2232C; priv->baud_base = 48000000 / 2; break; case 0x600: priv->chip_type = FT232R; priv->baud_base = 48000000 / 2; priv->channel = 0; break; case 0x700: priv->chip_type = FT2232H; break; case 0x800: priv->chip_type = FT4232H; break; case 0x900: priv->chip_type = FT232H; break; case 0x1000: priv->chip_type = FTX; priv->baud_base = 48000000 / 2; break; case 0x2800: priv->chip_type = FT2233HP; break; case 0x2900: priv->chip_type = FT4233HP; break; case 0x3000: priv->chip_type = FT2232HP; break; case 0x3100: priv->chip_type = FT4232HP; break; case 0x3200: priv->chip_type = FT233HP; break; case 0x3300: priv->chip_type = FT232HP; break; case 0x3600: priv->chip_type = FT4232HA; break; default: if (version < 0x200) { priv->chip_type = SIO; priv->baud_base = 12000000 / 16; priv->channel = 0; } else { dev_err(&port->dev, "unknown device type: 0x%02x\n", version); return -ENODEV; } } dev_info(&udev->dev, "Detected %s\n", ftdi_chip_name[priv->chip_type]); return 0; } /* * Determine the maximum packet size for the device. This depends on the chip * type and the USB host capabilities. The value should be obtained from the * device descriptor as the chip will use the appropriate values for the host. */ static void ftdi_set_max_packet_size(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_interface *interface = port->serial->interface; struct usb_endpoint_descriptor *ep_desc; unsigned num_endpoints; unsigned i; num_endpoints = interface->cur_altsetting->desc.bNumEndpoints; if (!num_endpoints) return; /* * NOTE: Some customers have programmed FT232R/FT245R devices * with an endpoint size of 0 - not good. In this case, we * want to override the endpoint descriptor setting and use a * value of 64 for wMaxPacketSize. */ for (i = 0; i < num_endpoints; i++) { ep_desc = &interface->cur_altsetting->endpoint[i].desc; if (!ep_desc->wMaxPacketSize) { ep_desc->wMaxPacketSize = cpu_to_le16(0x40); dev_warn(&port->dev, "Overriding wMaxPacketSize on endpoint %d\n", usb_endpoint_num(ep_desc)); } } /* Set max packet size based on last descriptor. */ priv->max_packet_size = usb_endpoint_maxp(ep_desc); } /* * *************************************************************************** * Sysfs Attribute * *************************************************************************** */ static ssize_t latency_timer_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); if (priv->flags & ASYNC_LOW_LATENCY) return sprintf(buf, "1\n"); else return sprintf(buf, "%u\n", priv->latency); } /* Write a new value of the latency timer, in units of milliseconds. */ static ssize_t latency_timer_store(struct device *dev, struct device_attribute *attr, const char *valbuf, size_t count) { struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); u8 v; int rv; if (kstrtou8(valbuf, 10, &v)) return -EINVAL; priv->latency = v; rv = write_latency_timer(port); if (rv < 0) return -EIO; return count; } static DEVICE_ATTR_RW(latency_timer); /* Write an event character directly to the FTDI register. The ASCII value is in the low 8 bits, with the enable bit in the 9th bit. */ static ssize_t event_char_store(struct device *dev, struct device_attribute *attr, const char *valbuf, size_t count) { struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_device *udev = port->serial->dev; unsigned int v; int rv; if (kstrtouint(valbuf, 0, &v) || v >= 0x200) return -EINVAL; dev_dbg(&port->dev, "%s: setting event char = 0x%03x\n", __func__, v); rv = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), FTDI_SIO_SET_EVENT_CHAR_REQUEST, FTDI_SIO_SET_EVENT_CHAR_REQUEST_TYPE, v, priv->channel, NULL, 0, WDR_TIMEOUT); if (rv < 0) { dev_dbg(&port->dev, "Unable to write event character: %i\n", rv); return -EIO; } return count; } static DEVICE_ATTR_WO(event_char); static struct attribute *ftdi_attrs[] = { &dev_attr_event_char.attr, &dev_attr_latency_timer.attr, NULL }; static umode_t ftdi_is_visible(struct kobject *kobj, struct attribute *attr, int idx) { struct device *dev = kobj_to_dev(kobj); struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); enum ftdi_chip_type type = priv->chip_type; if (attr == &dev_attr_event_char.attr) { if (type == SIO) return 0; } if (attr == &dev_attr_latency_timer.attr) { if (type == SIO || type == FT232A) return 0; } return attr->mode; } static const struct attribute_group ftdi_group = { .attrs = ftdi_attrs, .is_visible = ftdi_is_visible, }; static const struct attribute_group *ftdi_groups[] = { &ftdi_group, NULL }; #ifdef CONFIG_GPIOLIB static int ftdi_set_bitmode(struct usb_serial_port *port, u8 mode) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; int result; u16 val; result = usb_autopm_get_interface(serial->interface); if (result) return result; val = (mode << 8) | (priv->gpio_output << 4) | priv->gpio_value; result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), FTDI_SIO_SET_BITMODE_REQUEST, FTDI_SIO_SET_BITMODE_REQUEST_TYPE, val, priv->channel, NULL, 0, WDR_TIMEOUT); if (result < 0) { dev_err(&serial->interface->dev, "bitmode request failed for value 0x%04x: %d\n", val, result); } usb_autopm_put_interface(serial->interface); return result; } static int ftdi_set_cbus_pins(struct usb_serial_port *port) { return ftdi_set_bitmode(port, FTDI_SIO_BITMODE_CBUS); } static int ftdi_exit_cbus_mode(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); priv->gpio_output = 0; priv->gpio_value = 0; return ftdi_set_bitmode(port, FTDI_SIO_BITMODE_RESET); } static int ftdi_gpio_request(struct gpio_chip *gc, unsigned int offset) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); int result; mutex_lock(&priv->gpio_lock); if (!priv->gpio_used) { /* Set default pin states, as we cannot get them from device */ priv->gpio_output = 0x00; priv->gpio_value = 0x00; result = ftdi_set_cbus_pins(port); if (result) { mutex_unlock(&priv->gpio_lock); return result; } priv->gpio_used = true; } mutex_unlock(&priv->gpio_lock); return 0; } static int ftdi_read_cbus_pins(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; u8 buf; int result; result = usb_autopm_get_interface(serial->interface); if (result) return result; result = usb_control_msg_recv(serial->dev, 0, FTDI_SIO_READ_PINS_REQUEST, FTDI_SIO_READ_PINS_REQUEST_TYPE, 0, priv->channel, &buf, 1, WDR_TIMEOUT, GFP_KERNEL); if (result == 0) result = buf; usb_autopm_put_interface(serial->interface); return result; } static int ftdi_gpio_get(struct gpio_chip *gc, unsigned int gpio) { struct usb_serial_port *port = gpiochip_get_data(gc); int result; result = ftdi_read_cbus_pins(port); if (result < 0) return result; return !!(result & BIT(gpio)); } static int ftdi_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); int result; mutex_lock(&priv->gpio_lock); if (value) priv->gpio_value |= BIT(gpio); else priv->gpio_value &= ~BIT(gpio); result = ftdi_set_cbus_pins(port); mutex_unlock(&priv->gpio_lock); return result; } static int ftdi_gpio_get_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { struct usb_serial_port *port = gpiochip_get_data(gc); int result; result = ftdi_read_cbus_pins(port); if (result < 0) return result; *bits = result & *mask; return 0; } static int ftdi_gpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); int result; mutex_lock(&priv->gpio_lock); priv->gpio_value &= ~(*mask); priv->gpio_value |= *bits & *mask; result = ftdi_set_cbus_pins(port); mutex_unlock(&priv->gpio_lock); return result; } static int ftdi_gpio_direction_get(struct gpio_chip *gc, unsigned int gpio) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); return !(priv->gpio_output & BIT(gpio)); } static int ftdi_gpio_direction_input(struct gpio_chip *gc, unsigned int gpio) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); int result; mutex_lock(&priv->gpio_lock); priv->gpio_output &= ~BIT(gpio); result = ftdi_set_cbus_pins(port); mutex_unlock(&priv->gpio_lock); return result; } static int ftdi_gpio_direction_output(struct gpio_chip *gc, unsigned int gpio, int value) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); int result; mutex_lock(&priv->gpio_lock); priv->gpio_output |= BIT(gpio); if (value) priv->gpio_value |= BIT(gpio); else priv->gpio_value &= ~BIT(gpio); result = ftdi_set_cbus_pins(port); mutex_unlock(&priv->gpio_lock); return result; } static int ftdi_gpio_init_valid_mask(struct gpio_chip *gc, unsigned long *valid_mask, unsigned int ngpios) { struct usb_serial_port *port = gpiochip_get_data(gc); struct ftdi_private *priv = usb_get_serial_port_data(port); unsigned long map = priv->gpio_altfunc; bitmap_complement(valid_mask, &map, ngpios); if (bitmap_empty(valid_mask, ngpios)) dev_dbg(&port->dev, "no CBUS pin configured for GPIO\n"); else dev_dbg(&port->dev, "CBUS%*pbl configured for GPIO\n", ngpios, valid_mask); return 0; } static int ftdi_read_eeprom(struct usb_serial *serial, void *dst, u16 addr, u16 nbytes) { int read = 0; if (addr % 2 != 0) return -EINVAL; if (nbytes % 2 != 0) return -EINVAL; /* Read EEPROM two bytes at a time */ while (read < nbytes) { int rv; rv = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), FTDI_SIO_READ_EEPROM_REQUEST, FTDI_SIO_READ_EEPROM_REQUEST_TYPE, 0, (addr + read) / 2, dst + read, 2, WDR_TIMEOUT); if (rv < 2) { if (rv >= 0) return -EIO; else return rv; } read += rv; } return 0; } static int ftdi_gpio_init_ft232h(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); u16 cbus_config; u8 *buf; int ret; int i; buf = kmalloc(4, GFP_KERNEL); if (!buf) return -ENOMEM; ret = ftdi_read_eeprom(port->serial, buf, 0x1a, 4); if (ret < 0) goto out_free; /* * FT232H CBUS Memory Map * * 0x1a: X- (upper nibble -> AC5) * 0x1b: -X (lower nibble -> AC6) * 0x1c: XX (upper nibble -> AC9 | lower nibble -> AC8) */ cbus_config = buf[2] << 8 | (buf[1] & 0xf) << 4 | (buf[0] & 0xf0) >> 4; priv->gc.ngpio = 4; priv->gpio_altfunc = 0xff; for (i = 0; i < priv->gc.ngpio; ++i) { if ((cbus_config & 0xf) == FTDI_FTX_CBUS_MUX_GPIO) priv->gpio_altfunc &= ~BIT(i); cbus_config >>= 4; } out_free: kfree(buf); return ret; } static int ftdi_gpio_init_ft232r(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); u16 cbus_config; u8 *buf; int ret; int i; buf = kmalloc(2, GFP_KERNEL); if (!buf) return -ENOMEM; ret = ftdi_read_eeprom(port->serial, buf, 0x14, 2); if (ret < 0) goto out_free; cbus_config = le16_to_cpup((__le16 *)buf); dev_dbg(&port->dev, "cbus_config = 0x%04x\n", cbus_config); priv->gc.ngpio = 4; priv->gpio_altfunc = 0xff; for (i = 0; i < priv->gc.ngpio; ++i) { if ((cbus_config & 0xf) == FTDI_FT232R_CBUS_MUX_GPIO) priv->gpio_altfunc &= ~BIT(i); cbus_config >>= 4; } out_free: kfree(buf); return ret; } static int ftdi_gpio_init_ftx(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; const u16 cbus_cfg_addr = 0x1a; const u16 cbus_cfg_size = 4; u8 *cbus_cfg_buf; int result; u8 i; cbus_cfg_buf = kmalloc(cbus_cfg_size, GFP_KERNEL); if (!cbus_cfg_buf) return -ENOMEM; result = ftdi_read_eeprom(serial, cbus_cfg_buf, cbus_cfg_addr, cbus_cfg_size); if (result < 0) goto out_free; /* FIXME: FT234XD alone has 1 GPIO, but how to recognize this IC? */ priv->gc.ngpio = 4; /* Determine which pins are configured for CBUS bitbanging */ priv->gpio_altfunc = 0xff; for (i = 0; i < priv->gc.ngpio; ++i) { if (cbus_cfg_buf[i] == FTDI_FTX_CBUS_MUX_GPIO) priv->gpio_altfunc &= ~BIT(i); } out_free: kfree(cbus_cfg_buf); return result; } static int ftdi_gpio_init(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_serial *serial = port->serial; int result; switch (priv->chip_type) { case FT232H: result = ftdi_gpio_init_ft232h(port); break; case FT232R: result = ftdi_gpio_init_ft232r(port); break; case FTX: result = ftdi_gpio_init_ftx(port); break; default: return 0; } if (result < 0) return result; mutex_init(&priv->gpio_lock); priv->gc.label = "ftdi-cbus"; priv->gc.request = ftdi_gpio_request; priv->gc.get_direction = ftdi_gpio_direction_get; priv->gc.direction_input = ftdi_gpio_direction_input; priv->gc.direction_output = ftdi_gpio_direction_output; priv->gc.init_valid_mask = ftdi_gpio_init_valid_mask; priv->gc.get = ftdi_gpio_get; priv->gc.set = ftdi_gpio_set; priv->gc.get_multiple = ftdi_gpio_get_multiple; priv->gc.set_multiple = ftdi_gpio_set_multiple; priv->gc.owner = THIS_MODULE; priv->gc.parent = &serial->interface->dev; priv->gc.base = -1; priv->gc.can_sleep = true; result = gpiochip_add_data(&priv->gc, port); if (!result) priv->gpio_registered = true; return result; } static void ftdi_gpio_remove(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); if (priv->gpio_registered) { gpiochip_remove(&priv->gc); priv->gpio_registered = false; } if (priv->gpio_used) { /* Exiting CBUS-mode does not reset pin states. */ ftdi_exit_cbus_mode(port); priv->gpio_used = false; } } #else static int ftdi_gpio_init(struct usb_serial_port *port) { return 0; } static void ftdi_gpio_remove(struct usb_serial_port *port) { } #endif /* CONFIG_GPIOLIB */ /* * *************************************************************************** * FTDI driver specific functions * *************************************************************************** */ static int ftdi_probe(struct usb_serial *serial, const struct usb_device_id *id) { const struct ftdi_quirk *quirk = (struct ftdi_quirk *)id->driver_info; if (quirk && quirk->probe) { int ret = quirk->probe(serial); if (ret != 0) return ret; } usb_set_serial_data(serial, (void *)id->driver_info); return 0; } static int ftdi_port_probe(struct usb_serial_port *port) { const struct ftdi_quirk *quirk = usb_get_serial_data(port->serial); struct ftdi_private *priv; int result; priv = kzalloc(sizeof(struct ftdi_private), GFP_KERNEL); if (!priv) return -ENOMEM; mutex_init(&priv->cfg_lock); if (quirk && quirk->port_probe) quirk->port_probe(priv); usb_set_serial_port_data(port, priv); result = ftdi_determine_type(port); if (result) goto err_free; ftdi_set_max_packet_size(port); if (read_latency_timer(port) < 0) priv->latency = 16; write_latency_timer(port); result = ftdi_gpio_init(port); if (result < 0) { dev_err(&port->serial->interface->dev, "GPIO initialisation failed: %d\n", result); } return 0; err_free: kfree(priv); return result; } /* Setup for the USB-UIRT device, which requires hardwired * baudrate (38400 gets mapped to 312500) */ /* Called from usbserial:serial_probe */ static void ftdi_USB_UIRT_setup(struct ftdi_private *priv) { priv->flags |= ASYNC_SPD_CUST; priv->custom_divisor = 77; priv->force_baud = 38400; } /* Setup for the HE-TIRA1 device, which requires hardwired * baudrate (38400 gets mapped to 100000) and RTS-CTS enabled. */ static void ftdi_HE_TIRA1_setup(struct ftdi_private *priv) { priv->flags |= ASYNC_SPD_CUST; priv->custom_divisor = 240; priv->force_baud = 38400; priv->force_rtscts = 1; } /* * Module parameter to control latency timer for NDI FTDI-based USB devices. * If this value is not set in /etc/modprobe.d/ its value will be set * to 1ms. */ static int ndi_latency_timer = 1; /* Setup for the NDI FTDI-based USB devices, which requires hardwired * baudrate (19200 gets mapped to 1200000). * * Called from usbserial:serial_probe. */ static int ftdi_NDI_device_setup(struct usb_serial *serial) { struct usb_device *udev = serial->dev; int latency = ndi_latency_timer; if (latency == 0) latency = 1; if (latency > 99) latency = 99; dev_dbg(&udev->dev, "%s setting NDI device latency to %d\n", __func__, latency); dev_info(&udev->dev, "NDI device with a latency value of %d\n", latency); /* FIXME: errors are not returned */ usb_control_msg(udev, usb_sndctrlpipe(udev, 0), FTDI_SIO_SET_LATENCY_TIMER_REQUEST, FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE, latency, 0, NULL, 0, WDR_TIMEOUT); return 0; } /* * First port on JTAG adaptors such as Olimex arm-usb-ocd or the FIC/OpenMoko * Neo1973 Debug Board is reserved for JTAG interface and can be accessed from * userspace using openocd. */ static int ftdi_jtag_probe(struct usb_serial *serial) { struct usb_interface *intf = serial->interface; int ifnum = intf->cur_altsetting->desc.bInterfaceNumber; if (ifnum == 0) { dev_info(&intf->dev, "Ignoring interface reserved for JTAG\n"); return -ENODEV; } return 0; } static int ftdi_8u2232c_probe(struct usb_serial *serial) { struct usb_device *udev = serial->dev; if (udev->manufacturer && !strcmp(udev->manufacturer, "CALAO Systems")) return ftdi_jtag_probe(serial); if (udev->product && (!strcmp(udev->product, "Arrow USB Blaster") || !strcmp(udev->product, "BeagleBone/XDS100V2") || !strcmp(udev->product, "SNAP Connect E10"))) return ftdi_jtag_probe(serial); return 0; } /* * First two ports on JTAG adaptors using an FT4232 such as STMicroelectronics's * ST Micro Connect Lite are reserved for JTAG or other non-UART interfaces and * can be accessed from userspace. * The next two ports are enabled as UARTs by default, where port 2 is * a conventional RS-232 UART. */ static int ftdi_stmclite_probe(struct usb_serial *serial) { struct usb_interface *intf = serial->interface; int ifnum = intf->cur_altsetting->desc.bInterfaceNumber; if (ifnum < 2) { dev_info(&intf->dev, "Ignoring interface reserved for JTAG\n"); return -ENODEV; } return 0; } static void ftdi_port_remove(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); ftdi_gpio_remove(port); kfree(priv); } static int ftdi_open(struct tty_struct *tty, struct usb_serial_port *port) { struct usb_device *dev = port->serial->dev; struct ftdi_private *priv = usb_get_serial_port_data(port); /* No error checking for this (will get errors later anyway) */ /* See ftdi_sio.h for description of what is reset */ usb_control_msg(dev, usb_sndctrlpipe(dev, 0), FTDI_SIO_RESET_REQUEST, FTDI_SIO_RESET_REQUEST_TYPE, FTDI_SIO_RESET_SIO, priv->channel, NULL, 0, WDR_TIMEOUT); /* Termios defaults are set by usb_serial_init. We don't change port->tty->termios - this would lose speed settings, etc. This is same behaviour as serial.c/rs_open() - Kuba */ /* ftdi_set_termios will send usb control messages */ if (tty) ftdi_set_termios(tty, port, NULL); return usb_serial_generic_open(tty, port); } static void ftdi_dtr_rts(struct usb_serial_port *port, int on) { struct ftdi_private *priv = usb_get_serial_port_data(port); /* Disable flow control */ if (!on) { if (usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), FTDI_SIO_SET_FLOW_CTRL_REQUEST, FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, 0, priv->channel, NULL, 0, WDR_TIMEOUT) < 0) { dev_err(&port->dev, "error from flowcontrol urb\n"); } } /* drop RTS and DTR */ if (on) set_mctrl(port, TIOCM_DTR | TIOCM_RTS); else clear_mctrl(port, TIOCM_DTR | TIOCM_RTS); } /* The SIO requires the first byte to have: * B0 1 * B1 0 * B2..7 length of message excluding byte 0 * * The new devices do not require this byte */ static int ftdi_prepare_write_buffer(struct usb_serial_port *port, void *dest, size_t size) { struct ftdi_private *priv; int count; unsigned long flags; priv = usb_get_serial_port_data(port); if (priv->chip_type == SIO) { unsigned char *buffer = dest; int i, len, c; count = 0; spin_lock_irqsave(&port->lock, flags); for (i = 0; i < size - 1; i += priv->max_packet_size) { len = min_t(int, size - i, priv->max_packet_size) - 1; c = kfifo_out(&port->write_fifo, &buffer[i + 1], len); if (!c) break; port->icount.tx += c; buffer[i] = (c << 2) + 1; count += c + 1; } spin_unlock_irqrestore(&port->lock, flags); } else { count = kfifo_out_locked(&port->write_fifo, dest, size, &port->lock); port->icount.tx += count; } return count; } #define FTDI_RS_ERR_MASK (FTDI_RS_BI | FTDI_RS_PE | FTDI_RS_FE | FTDI_RS_OE) static int ftdi_process_packet(struct usb_serial_port *port, struct ftdi_private *priv, unsigned char *buf, int len) { unsigned char status; bool brkint = false; int i; char flag; if (len < 2) { dev_dbg(&port->dev, "malformed packet\n"); return 0; } /* Compare new line status to the old one, signal if different/ N.B. packet may be processed more than once, but differences are only processed once. */ status = buf[0] & FTDI_STATUS_B0_MASK; if (status != priv->prev_status) { char diff_status = status ^ priv->prev_status; if (diff_status & FTDI_RS0_CTS) port->icount.cts++; if (diff_status & FTDI_RS0_DSR) port->icount.dsr++; if (diff_status & FTDI_RS0_RI) port->icount.rng++; if (diff_status & FTDI_RS0_RLSD) { struct tty_struct *tty; port->icount.dcd++; tty = tty_port_tty_get(&port->port); if (tty) usb_serial_handle_dcd_change(port, tty, status & FTDI_RS0_RLSD); tty_kref_put(tty); } wake_up_interruptible(&port->port.delta_msr_wait); priv->prev_status = status; } /* save if the transmitter is empty or not */ if (buf[1] & FTDI_RS_TEMT) priv->transmit_empty = 1; else priv->transmit_empty = 0; if (len == 2) return 0; /* status only */ /* * Break and error status must only be processed for packets with * data payload to avoid over-reporting. */ flag = TTY_NORMAL; if (buf[1] & FTDI_RS_ERR_MASK) { /* * Break takes precedence over parity, which takes precedence * over framing errors. Note that break is only associated * with the last character in the buffer and only when it's a * NUL. */ if (buf[1] & FTDI_RS_BI && buf[len - 1] == '\0') { port->icount.brk++; brkint = true; } if (buf[1] & FTDI_RS_PE) { flag = TTY_PARITY; port->icount.parity++; } else if (buf[1] & FTDI_RS_FE) { flag = TTY_FRAME; port->icount.frame++; } /* Overrun is special, not associated with a char */ if (buf[1] & FTDI_RS_OE) { port->icount.overrun++; tty_insert_flip_char(&port->port, 0, TTY_OVERRUN); } } port->icount.rx += len - 2; if (brkint || port->sysrq) { for (i = 2; i < len; i++) { if (brkint && i == len - 1) { if (usb_serial_handle_break(port)) return len - 3; flag = TTY_BREAK; } if (usb_serial_handle_sysrq_char(port, buf[i])) continue; tty_insert_flip_char(&port->port, buf[i], flag); } } else { tty_insert_flip_string_fixed_flag(&port->port, buf + 2, flag, len - 2); } return len - 2; } static void ftdi_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; struct ftdi_private *priv = usb_get_serial_port_data(port); char *data = urb->transfer_buffer; int i; int len; int count = 0; for (i = 0; i < urb->actual_length; i += priv->max_packet_size) { len = min_t(int, urb->actual_length - i, priv->max_packet_size); count += ftdi_process_packet(port, priv, &data[i], len); } if (count) tty_flip_buffer_push(&port->port); } static int ftdi_break_ctl(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct ftdi_private *priv = usb_get_serial_port_data(port); u16 value; int ret; /* break_state = -1 to turn on break, and 0 to turn off break */ /* see drivers/char/tty_io.c to see it used */ /* last_set_data_value NEVER has the break bit set in it */ if (break_state) value = priv->last_set_data_value | FTDI_SIO_SET_BREAK; else value = priv->last_set_data_value; ret = usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), FTDI_SIO_SET_DATA_REQUEST, FTDI_SIO_SET_DATA_REQUEST_TYPE, value, priv->channel, NULL, 0, WDR_TIMEOUT); if (ret < 0) { dev_err(&port->dev, "%s FAILED to enable/disable break state (state was %d)\n", __func__, break_state); return ret; } dev_dbg(&port->dev, "%s break state is %d - urb is %d\n", __func__, break_state, value); return 0; } static bool ftdi_tx_empty(struct usb_serial_port *port) { unsigned char buf[2]; int ret; ret = ftdi_get_modem_status(port, buf); if (ret == 2) { if (!(buf[1] & FTDI_RS_TEMT)) return false; } return true; } /* old_termios contains the original termios settings and tty->termios contains * the new setting to be used * WARNING: set_termios calls this with old_termios in kernel space */ static void ftdi_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct usb_device *dev = port->serial->dev; struct device *ddev = &port->dev; struct ftdi_private *priv = usb_get_serial_port_data(port); struct ktermios *termios = &tty->termios; unsigned int cflag; u16 value, index; int ret; /* Force baud rate if this device requires it, unless it is set to B0. */ if (priv->force_baud && ((termios->c_cflag & CBAUD) != B0)) { dev_dbg(ddev, "%s: forcing baud rate for this device\n", __func__); tty_encode_baud_rate(tty, priv->force_baud, priv->force_baud); } /* Force RTS-CTS if this device requires it. */ if (priv->force_rtscts) { dev_dbg(ddev, "%s: forcing rtscts for this device\n", __func__); termios->c_cflag |= CRTSCTS; } /* * All FTDI UART chips are limited to CS7/8. We shouldn't pretend to * support CS5/6 and revert the CSIZE setting instead. * * CS5 however is used to control some smartcard readers which abuse * this limitation to switch modes. Original FTDI chips fall back to * eight data bits. * * TODO: Implement a quirk to only allow this with mentioned * readers. One I know of (Argolis Smartreader V1) * returns "USB smartcard server" as iInterface string. * The vendor didn't bother with a custom VID/PID of * course. */ if (C_CSIZE(tty) == CS6) { dev_warn(ddev, "requested CSIZE setting not supported\n"); termios->c_cflag &= ~CSIZE; if (old_termios) termios->c_cflag |= old_termios->c_cflag & CSIZE; else termios->c_cflag |= CS8; } cflag = termios->c_cflag; if (!old_termios) goto no_skip; if (old_termios->c_cflag == termios->c_cflag && old_termios->c_ispeed == termios->c_ispeed && old_termios->c_ospeed == termios->c_ospeed) goto no_c_cflag_changes; /* NOTE These routines can get interrupted by ftdi_sio_read_bulk_callback - need to examine what this means - don't see any problems yet */ if ((old_termios->c_cflag & (CSIZE|PARODD|PARENB|CMSPAR|CSTOPB)) == (termios->c_cflag & (CSIZE|PARODD|PARENB|CMSPAR|CSTOPB))) goto no_data_parity_stop_changes; no_skip: /* Set number of data bits, parity, stop bits */ value = 0; value |= (cflag & CSTOPB ? FTDI_SIO_SET_DATA_STOP_BITS_2 : FTDI_SIO_SET_DATA_STOP_BITS_1); if (cflag & PARENB) { if (cflag & CMSPAR) value |= cflag & PARODD ? FTDI_SIO_SET_DATA_PARITY_MARK : FTDI_SIO_SET_DATA_PARITY_SPACE; else value |= cflag & PARODD ? FTDI_SIO_SET_DATA_PARITY_ODD : FTDI_SIO_SET_DATA_PARITY_EVEN; } else { value |= FTDI_SIO_SET_DATA_PARITY_NONE; } switch (cflag & CSIZE) { case CS5: dev_dbg(ddev, "Setting CS5 quirk\n"); break; case CS7: value |= 7; dev_dbg(ddev, "Setting CS7\n"); break; default: case CS8: value |= 8; dev_dbg(ddev, "Setting CS8\n"); break; } /* This is needed by the break command since it uses the same command - but is or'ed with this value */ priv->last_set_data_value = value; if (usb_control_msg(dev, usb_sndctrlpipe(dev, 0), FTDI_SIO_SET_DATA_REQUEST, FTDI_SIO_SET_DATA_REQUEST_TYPE, value, priv->channel, NULL, 0, WDR_SHORT_TIMEOUT) < 0) { dev_err(ddev, "%s FAILED to set databits/stopbits/parity\n", __func__); } /* Now do the baudrate */ no_data_parity_stop_changes: if ((cflag & CBAUD) == B0) { /* Disable flow control */ if (usb_control_msg(dev, usb_sndctrlpipe(dev, 0), FTDI_SIO_SET_FLOW_CTRL_REQUEST, FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, 0, priv->channel, NULL, 0, WDR_TIMEOUT) < 0) { dev_err(ddev, "%s error from disable flowcontrol urb\n", __func__); } /* Drop RTS and DTR */ clear_mctrl(port, TIOCM_DTR | TIOCM_RTS); } else { /* set the baudrate determined before */ mutex_lock(&priv->cfg_lock); if (change_speed(tty, port)) dev_err(ddev, "%s urb failed to set baudrate\n", __func__); mutex_unlock(&priv->cfg_lock); /* Ensure RTS and DTR are raised when baudrate changed from 0 */ if (old_termios && (old_termios->c_cflag & CBAUD) == B0) set_mctrl(port, TIOCM_DTR | TIOCM_RTS); } no_c_cflag_changes: /* Set hardware-assisted flow control */ value = 0; if (C_CRTSCTS(tty)) { dev_dbg(&port->dev, "enabling rts/cts flow control\n"); index = FTDI_SIO_RTS_CTS_HS; } else if (I_IXON(tty)) { dev_dbg(&port->dev, "enabling xon/xoff flow control\n"); index = FTDI_SIO_XON_XOFF_HS; value = STOP_CHAR(tty) << 8 | START_CHAR(tty); } else { dev_dbg(&port->dev, "disabling flow control\n"); index = FTDI_SIO_DISABLE_FLOW_CTRL; } index |= priv->channel; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), FTDI_SIO_SET_FLOW_CTRL_REQUEST, FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, value, index, NULL, 0, WDR_TIMEOUT); if (ret < 0) dev_err(&port->dev, "failed to set flow control: %d\n", ret); } /* * Get modem-control status. * * Returns the number of status bytes retrieved (device dependant), or * negative error code. */ static int ftdi_get_modem_status(struct usb_serial_port *port, unsigned char status[2]) { struct ftdi_private *priv = usb_get_serial_port_data(port); unsigned char *buf; int len; int ret; buf = kmalloc(2, GFP_KERNEL); if (!buf) return -ENOMEM; /* * The device returns a two byte value (the SIO a 1 byte value) in the * same format as the data returned from the IN endpoint. */ if (priv->chip_type == SIO) len = 1; else len = 2; ret = usb_control_msg(port->serial->dev, usb_rcvctrlpipe(port->serial->dev, 0), FTDI_SIO_GET_MODEM_STATUS_REQUEST, FTDI_SIO_GET_MODEM_STATUS_REQUEST_TYPE, 0, priv->channel, buf, len, WDR_TIMEOUT); /* NOTE: We allow short responses and handle that below. */ if (ret < 1) { dev_err(&port->dev, "failed to get modem status: %d\n", ret); if (ret >= 0) ret = -EIO; ret = usb_translate_errors(ret); goto out; } status[0] = buf[0]; if (ret > 1) status[1] = buf[1]; else status[1] = 0; dev_dbg(&port->dev, "%s - 0x%02x%02x\n", __func__, status[0], status[1]); out: kfree(buf); return ret; } static int ftdi_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ftdi_private *priv = usb_get_serial_port_data(port); unsigned char buf[2]; int ret; ret = ftdi_get_modem_status(port, buf); if (ret < 0) return ret; ret = (buf[0] & FTDI_SIO_DSR_MASK ? TIOCM_DSR : 0) | (buf[0] & FTDI_SIO_CTS_MASK ? TIOCM_CTS : 0) | (buf[0] & FTDI_SIO_RI_MASK ? TIOCM_RI : 0) | (buf[0] & FTDI_SIO_RLSD_MASK ? TIOCM_CD : 0) | priv->last_dtr_rts; return ret; } static int ftdi_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; return update_mctrl(port, set, clear); } static int ftdi_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct usb_serial_port *port = tty->driver_data; void __user *argp = (void __user *)arg; switch (cmd) { case TIOCSERGETLSR: return get_lsr_info(port, argp); default: break; } return -ENOIOCTLCMD; } static struct usb_serial_driver ftdi_device = { .driver = { .name = "ftdi_sio", .dev_groups = ftdi_groups, }, .description = "FTDI USB Serial Device", .id_table = id_table_combined, .num_ports = 1, .bulk_in_size = 512, .bulk_out_size = 256, .probe = ftdi_probe, .port_probe = ftdi_port_probe, .port_remove = ftdi_port_remove, .open = ftdi_open, .dtr_rts = ftdi_dtr_rts, .throttle = usb_serial_generic_throttle, .unthrottle = usb_serial_generic_unthrottle, .process_read_urb = ftdi_process_read_urb, .prepare_write_buffer = ftdi_prepare_write_buffer, .tiocmget = ftdi_tiocmget, .tiocmset = ftdi_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, .ioctl = ftdi_ioctl, .get_serial = get_serial_info, .set_serial = set_serial_info, .set_termios = ftdi_set_termios, .break_ctl = ftdi_break_ctl, .tx_empty = ftdi_tx_empty, }; static struct usb_serial_driver * const serial_drivers[] = { &ftdi_device, NULL }; module_usb_serial_driver(serial_drivers, id_table_combined); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); module_param(ndi_latency_timer, int, 0644); MODULE_PARM_DESC(ndi_latency_timer, "NDI device latency timer override");
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef CEPH_MSGR_H #define CEPH_MSGR_H /* * Data types for message passing layer used by Ceph. */ #define CEPH_MON_PORT 6789 /* default monitor port */ /* * tcp connection banner. include a protocol version. and adjust * whenever the wire protocol changes. try to keep this string length * constant. */ #define CEPH_BANNER "ceph v027" #define CEPH_BANNER_LEN 9 #define CEPH_BANNER_MAX_LEN 30 /* * messenger V2 connection banner prefix. * The full banner string should have the form: "ceph v2\n<le16>" * the 2 bytes are the length of the remaining banner. */ #define CEPH_BANNER_V2 "ceph v2\n" #define CEPH_BANNER_V2_LEN 8 #define CEPH_BANNER_V2_PREFIX_LEN (CEPH_BANNER_V2_LEN + sizeof(__le16)) /* * messenger V2 features */ #define CEPH_MSGR2_INCARNATION_1 (0ull) #define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \ static const uint64_t __maybe_unused CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \ static const uint64_t __maybe_unused CEPH_MSGR2_FEATUREMASK_##name = \ (1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation); #define HAVE_MSGR2_FEATURE(x, name) \ (((x) & (CEPH_MSGR2_FEATUREMASK_##name)) == (CEPH_MSGR2_FEATUREMASK_##name)) DEFINE_MSGR2_FEATURE( 0, 1, REVISION_1) // msgr2.1 #define CEPH_MSGR2_SUPPORTED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1) #define CEPH_MSGR2_REQUIRED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1) /* * Rollover-safe type and comparator for 32-bit sequence numbers. * Comparator returns -1, 0, or 1. */ typedef __u32 ceph_seq_t; static inline __s32 ceph_seq_cmp(__u32 a, __u32 b) { return (__s32)a - (__s32)b; } /* * entity_name -- logical name for a process participating in the * network, e.g. 'mds0' or 'osd3'. */ struct ceph_entity_name { __u8 type; /* CEPH_ENTITY_TYPE_* */ __le64 num; } __attribute__ ((packed)); #define CEPH_ENTITY_TYPE_MON 0x01 #define CEPH_ENTITY_TYPE_MDS 0x02 #define CEPH_ENTITY_TYPE_OSD 0x04 #define CEPH_ENTITY_TYPE_CLIENT 0x08 #define CEPH_ENTITY_TYPE_AUTH 0x20 #define CEPH_ENTITY_TYPE_ANY 0xFF extern const char *ceph_entity_type_name(int type); /* * entity_addr -- network address */ struct ceph_entity_addr { __le32 type; /* CEPH_ENTITY_ADDR_TYPE_* */ __le32 nonce; /* unique id for process (e.g. pid) */ struct sockaddr_storage in_addr; } __attribute__ ((packed)); static inline bool ceph_addr_equal_no_type(const struct ceph_entity_addr *lhs, const struct ceph_entity_addr *rhs) { return !memcmp(&lhs->in_addr, &rhs->in_addr, sizeof(lhs->in_addr)) && lhs->nonce == rhs->nonce; } struct ceph_entity_inst { struct ceph_entity_name name; struct ceph_entity_addr addr; } __attribute__ ((packed)); /* used by message exchange protocol */ #define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */ #define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */ #define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing incoming connection */ #define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again with higher cseq */ #define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again with higher gseq */ #define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */ #define CEPH_MSGR_TAG_MSG 7 /* message */ #define CEPH_MSGR_TAG_ACK 8 /* message ack */ #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ #define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */ /* * connection negotiation */ struct ceph_msg_connect { __le64 features; /* supported feature bits */ __le32 host_type; /* CEPH_ENTITY_TYPE_* */ __le32 global_seq; /* count connections initiated by this host */ __le32 connect_seq; /* count connections initiated in this session */ __le32 protocol_version; __le32 authorizer_protocol; __le32 authorizer_len; __u8 flags; /* CEPH_MSG_CONNECT_* */ } __attribute__ ((packed)); struct ceph_msg_connect_reply { __u8 tag; __le64 features; /* feature bits for this session */ __le32 global_seq; __le32 connect_seq; __le32 protocol_version; __le32 authorizer_len; __u8 flags; } __attribute__ ((packed)); #define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */ /* * message header */ struct ceph_msg_header_old { __le64 seq; /* message seq# for this session */ __le64 tid; /* transaction id */ __le16 type; /* message type */ __le16 priority; /* priority. higher value == higher priority */ __le16 version; /* version of message encoding */ __le32 front_len; /* bytes in main payload */ __le32 middle_len;/* bytes in middle payload */ __le32 data_len; /* bytes of data payload */ __le16 data_off; /* sender: include full offset; receiver: mask against ~PAGE_MASK */ struct ceph_entity_inst src, orig_src; __le32 reserved; __le32 crc; /* header crc32c */ } __attribute__ ((packed)); struct ceph_msg_header { __le64 seq; /* message seq# for this session */ __le64 tid; /* transaction id */ __le16 type; /* message type */ __le16 priority; /* priority. higher value == higher priority */ __le16 version; /* version of message encoding */ __le32 front_len; /* bytes in main payload */ __le32 middle_len;/* bytes in middle payload */ __le32 data_len; /* bytes of data payload */ __le16 data_off; /* sender: include full offset; receiver: mask against ~PAGE_MASK */ struct ceph_entity_name src; __le16 compat_version; __le16 reserved; __le32 crc; /* header crc32c */ } __attribute__ ((packed)); struct ceph_msg_header2 { __le64 seq; /* message seq# for this session */ __le64 tid; /* transaction id */ __le16 type; /* message type */ __le16 priority; /* priority. higher value == higher priority */ __le16 version; /* version of message encoding */ __le32 data_pre_padding_len; __le16 data_off; /* sender: include full offset; receiver: mask against ~PAGE_MASK */ __le64 ack_seq; __u8 flags; /* oldest code we think can decode this. unknown if zero. */ __le16 compat_version; __le16 reserved; } __attribute__ ((packed)); #define CEPH_MSG_PRIO_LOW 64 #define CEPH_MSG_PRIO_DEFAULT 127 #define CEPH_MSG_PRIO_HIGH 196 #define CEPH_MSG_PRIO_HIGHEST 255 /* * follows data payload */ struct ceph_msg_footer_old { __le32 front_crc, middle_crc, data_crc; __u8 flags; } __attribute__ ((packed)); struct ceph_msg_footer { __le32 front_crc, middle_crc, data_crc; // sig holds the 64 bits of the digital signature for the message PLR __le64 sig; __u8 flags; } __attribute__ ((packed)); #define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */ #define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */ #define CEPH_MSG_FOOTER_SIGNED (1<<2) /* msg was signed */ #endif
5 1 4 4 4 4 4 5 5 1 4 1 1 4 4 4 4 1 2 2 2 11 11 11 11 11 11 11 11 11 11 11 3 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 // SPDX-License-Identifier: GPL-2.0-or-later #include <crypto/hash.h> #include <linux/cpu.h> #include <linux/kref.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/workqueue.h> #include <net/tcp.h> static size_t __scratch_size; struct sigpool_scratch { local_lock_t bh_lock; void __rcu *pad; }; static DEFINE_PER_CPU(struct sigpool_scratch, sigpool_scratch) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; struct sigpool_entry { struct crypto_ahash *hash; const char *alg; struct kref kref; uint16_t needs_key:1, reserved:15; }; #define CPOOL_SIZE (PAGE_SIZE / sizeof(struct sigpool_entry)) static struct sigpool_entry cpool[CPOOL_SIZE]; static unsigned int cpool_populated; static DEFINE_MUTEX(cpool_mutex); /* Slow-path */ struct scratches_to_free { struct rcu_head rcu; unsigned int cnt; void *scratches[]; }; static void free_old_scratches(struct rcu_head *head) { struct scratches_to_free *stf; stf = container_of(head, struct scratches_to_free, rcu); while (stf->cnt--) kfree(stf->scratches[stf->cnt]); kfree(stf); } /** * sigpool_reserve_scratch - re-allocates scratch buffer, slow-path * @size: request size for the scratch/temp buffer */ static int sigpool_reserve_scratch(size_t size) { struct scratches_to_free *stf; size_t stf_sz = struct_size(stf, scratches, num_possible_cpus()); int cpu, err = 0; lockdep_assert_held(&cpool_mutex); if (__scratch_size >= size) return 0; stf = kmalloc(stf_sz, GFP_KERNEL); if (!stf) return -ENOMEM; stf->cnt = 0; size = max(size, __scratch_size); cpus_read_lock(); for_each_possible_cpu(cpu) { void *scratch, *old_scratch; scratch = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); if (!scratch) { err = -ENOMEM; break; } old_scratch = rcu_replace_pointer(per_cpu(sigpool_scratch.pad, cpu), scratch, lockdep_is_held(&cpool_mutex)); if (!cpu_online(cpu) || !old_scratch) { kfree(old_scratch); continue; } stf->scratches[stf->cnt++] = old_scratch; } cpus_read_unlock(); if (!err) __scratch_size = size; call_rcu(&stf->rcu, free_old_scratches); return err; } static void sigpool_scratch_free(void) { int cpu; for_each_possible_cpu(cpu) kfree(rcu_replace_pointer(per_cpu(sigpool_scratch.pad, cpu), NULL, lockdep_is_held(&cpool_mutex))); __scratch_size = 0; } static int __cpool_try_clone(struct crypto_ahash *hash) { struct crypto_ahash *tmp; tmp = crypto_clone_ahash(hash); if (IS_ERR(tmp)) return PTR_ERR(tmp); crypto_free_ahash(tmp); return 0; } static int __cpool_alloc_ahash(struct sigpool_entry *e, const char *alg) { struct crypto_ahash *cpu0_hash; int ret; e->alg = kstrdup(alg, GFP_KERNEL); if (!e->alg) return -ENOMEM; cpu0_hash = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(cpu0_hash)) { ret = PTR_ERR(cpu0_hash); goto out_free_alg; } e->needs_key = crypto_ahash_get_flags(cpu0_hash) & CRYPTO_TFM_NEED_KEY; ret = __cpool_try_clone(cpu0_hash); if (ret) goto out_free_cpu0_hash; e->hash = cpu0_hash; kref_init(&e->kref); return 0; out_free_cpu0_hash: crypto_free_ahash(cpu0_hash); out_free_alg: kfree(e->alg); e->alg = NULL; return ret; } /** * tcp_sigpool_alloc_ahash - allocates pool for ahash requests * @alg: name of async hash algorithm * @scratch_size: reserve a tcp_sigpool::scratch buffer of this size */ int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size) { int i, ret; /* slow-path */ mutex_lock(&cpool_mutex); ret = sigpool_reserve_scratch(scratch_size); if (ret) goto out; for (i = 0; i < cpool_populated; i++) { if (!cpool[i].alg) continue; if (strcmp(cpool[i].alg, alg)) continue; /* pairs with tcp_sigpool_release() */ if (!kref_get_unless_zero(&cpool[i].kref)) kref_init(&cpool[i].kref); ret = i; goto out; } for (i = 0; i < cpool_populated; i++) { if (!cpool[i].alg) break; } if (i >= CPOOL_SIZE) { ret = -ENOSPC; goto out; } ret = __cpool_alloc_ahash(&cpool[i], alg); if (!ret) { ret = i; if (i == cpool_populated) cpool_populated++; } out: mutex_unlock(&cpool_mutex); return ret; } EXPORT_SYMBOL_GPL(tcp_sigpool_alloc_ahash); static void __cpool_free_entry(struct sigpool_entry *e) { crypto_free_ahash(e->hash); kfree(e->alg); memset(e, 0, sizeof(*e)); } static void cpool_cleanup_work_cb(struct work_struct *work) { bool free_scratch = true; unsigned int i; mutex_lock(&cpool_mutex); for (i = 0; i < cpool_populated; i++) { if (kref_read(&cpool[i].kref) > 0) { free_scratch = false; continue; } if (!cpool[i].alg) continue; __cpool_free_entry(&cpool[i]); } if (free_scratch) sigpool_scratch_free(); mutex_unlock(&cpool_mutex); } static DECLARE_WORK(cpool_cleanup_work, cpool_cleanup_work_cb); static void cpool_schedule_cleanup(struct kref *kref) { schedule_work(&cpool_cleanup_work); } /** * tcp_sigpool_release - decreases number of users for a pool. If it was * the last user of the pool, releases any memory that was consumed. * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() */ void tcp_sigpool_release(unsigned int id) { if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return; /* slow-path */ kref_put(&cpool[id].kref, cpool_schedule_cleanup); } EXPORT_SYMBOL_GPL(tcp_sigpool_release); /** * tcp_sigpool_get - increases number of users (refcounter) for a pool * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() */ void tcp_sigpool_get(unsigned int id) { if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return; kref_get(&cpool[id].kref); } EXPORT_SYMBOL_GPL(tcp_sigpool_get); int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) __cond_acquires(RCU_BH) { struct crypto_ahash *hash; rcu_read_lock_bh(); if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) { rcu_read_unlock_bh(); return -EINVAL; } hash = crypto_clone_ahash(cpool[id].hash); if (IS_ERR(hash)) { rcu_read_unlock_bh(); return PTR_ERR(hash); } c->req = ahash_request_alloc(hash, GFP_ATOMIC); if (!c->req) { crypto_free_ahash(hash); rcu_read_unlock_bh(); return -ENOMEM; } ahash_request_set_callback(c->req, 0, NULL, NULL); /* Pairs with tcp_sigpool_reserve_scratch(), scratch area is * valid (allocated) until tcp_sigpool_end(). */ local_lock_nested_bh(&sigpool_scratch.bh_lock); c->scratch = rcu_dereference_bh(*this_cpu_ptr(&sigpool_scratch.pad)); return 0; } EXPORT_SYMBOL_GPL(tcp_sigpool_start); void tcp_sigpool_end(struct tcp_sigpool *c) __releases(RCU_BH) { struct crypto_ahash *hash = crypto_ahash_reqtfm(c->req); local_unlock_nested_bh(&sigpool_scratch.bh_lock); rcu_read_unlock_bh(); ahash_request_free(c->req); crypto_free_ahash(hash); } EXPORT_SYMBOL_GPL(tcp_sigpool_end); /** * tcp_sigpool_algo - return algorithm of tcp_sigpool * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() * @buf: buffer to return name of algorithm * @buf_len: size of @buf */ size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len) { if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return -EINVAL; return strscpy(buf, cpool[id].alg, buf_len); } EXPORT_SYMBOL_GPL(tcp_sigpool_algo); /** * tcp_sigpool_hash_skb_data - hash data in skb with initialized tcp_sigpool * @hp: tcp_sigpool pointer * @skb: buffer to add sign for * @header_len: TCP header length for this segment */ int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, const struct sk_buff *skb, unsigned int header_len) { const unsigned int head_data_len = skb_headlen(skb) > header_len ? skb_headlen(skb) - header_len : 0; const struct skb_shared_info *shi = skb_shinfo(skb); const struct tcphdr *tp = tcp_hdr(skb); struct ahash_request *req = hp->req; struct sk_buff *frag_iter; struct scatterlist sg; unsigned int i; sg_init_table(&sg, 1); sg_set_buf(&sg, ((u8 *)tp) + header_len, head_data_len); ahash_request_set_crypt(req, &sg, NULL, head_data_len); if (crypto_ahash_update(req)) return 1; for (i = 0; i < shi->nr_frags; ++i) { const skb_frag_t *f = &shi->frags[i]; unsigned int offset = skb_frag_off(f); struct page *page; page = skb_frag_page(f) + (offset >> PAGE_SHIFT); sg_set_page(&sg, page, skb_frag_size(f), offset_in_page(offset)); ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f)); if (crypto_ahash_update(req)) return 1; } skb_walk_frags(skb, frag_iter) if (tcp_sigpool_hash_skb_data(hp, frag_iter, 0)) return 1; return 0; } EXPORT_SYMBOL(tcp_sigpool_hash_skb_data); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Per-CPU pool of crypto requests");
1 2 2 1 1 1 3 3 3 3 3 3 10 2 10 10 8 1 9 8 13 10 1 13 13 2 11 1 9 1 11 11 2 2 2 6 6 6 563 548 21 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2007-2012 Siemens AG * * Written by: * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> * Sergey Lapin <slapin@ossfans.org> * Maxim Gorbachyov <maxim.gorbachev@siemens.com> * Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ #include <linux/netdevice.h> #include <linux/module.h> #include <linux/if_arp.h> #include <linux/ieee802154.h> #include <net/nl802154.h> #include <net/mac802154.h> #include <net/ieee802154_netdev.h> #include <net/cfg802154.h> #include "ieee802154_i.h" #include "driver-ops.h" int mac802154_wpan_update_llsec(struct net_device *dev) { struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev); struct wpan_dev *wpan_dev = &sdata->wpan_dev; int rc = 0; if (ops->llsec) { struct ieee802154_llsec_params params; int changed = 0; params.pan_id = wpan_dev->pan_id; changed |= IEEE802154_LLSEC_PARAM_PAN_ID; params.hwaddr = wpan_dev->extended_addr; changed |= IEEE802154_LLSEC_PARAM_HWADDR; rc = ops->llsec->set_params(dev, &params, changed); } return rc; } static int mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); struct wpan_dev *wpan_dev = &sdata->wpan_dev; struct sockaddr_ieee802154 *sa = (struct sockaddr_ieee802154 *)&ifr->ifr_addr; int err = -ENOIOCTLCMD; if (cmd != SIOCGIFADDR && cmd != SIOCSIFADDR) return err; rtnl_lock(); switch (cmd) { case SIOCGIFADDR: { u16 pan_id, short_addr; pan_id = le16_to_cpu(wpan_dev->pan_id); short_addr = le16_to_cpu(wpan_dev->short_addr); if (pan_id == IEEE802154_PANID_BROADCAST || short_addr == IEEE802154_ADDR_BROADCAST) { err = -EADDRNOTAVAIL; break; } sa->family = AF_IEEE802154; sa->addr.addr_type = IEEE802154_ADDR_SHORT; sa->addr.pan_id = pan_id; sa->addr.short_addr = short_addr; err = 0; break; } case SIOCSIFADDR: if (netif_running(dev)) { rtnl_unlock(); return -EBUSY; } dev_warn(&dev->dev, "Using DEBUGing ioctl SIOCSIFADDR isn't recommended!\n"); if (sa->family != AF_IEEE802154 || sa->addr.addr_type != IEEE802154_ADDR_SHORT || sa->addr.pan_id == IEEE802154_PANID_BROADCAST || sa->addr.short_addr == IEEE802154_ADDR_BROADCAST || sa->addr.short_addr == IEEE802154_ADDR_UNDEF) { err = -EINVAL; break; } wpan_dev->pan_id = cpu_to_le16(sa->addr.pan_id); wpan_dev->short_addr = cpu_to_le16(sa->addr.short_addr); err = mac802154_wpan_update_llsec(dev); break; } rtnl_unlock(); return err; } static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) { struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); struct sockaddr *addr = p; __le64 extended_addr; if (netif_running(dev)) return -EBUSY; /* lowpan need to be down for update * SLAAC address after ifup */ if (sdata->wpan_dev.lowpan_dev) { if (netif_running(sdata->wpan_dev.lowpan_dev)) return -EBUSY; } ieee802154_be64_to_le64(&extended_addr, addr->sa_data); if (!ieee802154_is_valid_extended_unicast_addr(extended_addr)) return -EINVAL; dev_addr_set(dev, addr->sa_data); sdata->wpan_dev.extended_addr = extended_addr; /* update lowpan interface mac address when * wpan mac has been changed */ if (sdata->wpan_dev.lowpan_dev) dev_addr_set(sdata->wpan_dev.lowpan_dev, dev->dev_addr); return mac802154_wpan_update_llsec(dev); } static int ieee802154_setup_hw(struct ieee802154_sub_if_data *sdata) { struct ieee802154_local *local = sdata->local; struct wpan_dev *wpan_dev = &sdata->wpan_dev; int ret; sdata->required_filtering = sdata->iface_default_filtering; if (local->hw.flags & IEEE802154_HW_AFILT) { local->addr_filt.pan_id = wpan_dev->pan_id; local->addr_filt.ieee_addr = wpan_dev->extended_addr; local->addr_filt.short_addr = wpan_dev->short_addr; } if (local->hw.flags & IEEE802154_HW_LBT) { ret = drv_set_lbt_mode(local, wpan_dev->lbt); if (ret < 0) return ret; } if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) { ret = drv_set_csma_params(local, wpan_dev->min_be, wpan_dev->max_be, wpan_dev->csma_retries); if (ret < 0) return ret; } if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) { ret = drv_set_max_frame_retries(local, wpan_dev->frame_retries); if (ret < 0) return ret; } return 0; } static int mac802154_slave_open(struct net_device *dev) { struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); struct ieee802154_local *local = sdata->local; int res; ASSERT_RTNL(); set_bit(SDATA_STATE_RUNNING, &sdata->state); if (!local->open_count) { res = ieee802154_setup_hw(sdata); if (res) goto err; res = drv_start(local, sdata->required_filtering, &local->addr_filt); if (res) goto err; } local->open_count++; netif_start_queue(dev); return 0; err: /* might already be clear but that doesn't matter */ clear_bit(SDATA_STATE_RUNNING, &sdata->state); return res; } static int ieee802154_check_mac_settings(struct ieee802154_local *local, struct ieee802154_sub_if_data *sdata, struct ieee802154_sub_if_data *nsdata) { struct wpan_dev *nwpan_dev = &nsdata->wpan_dev; struct wpan_dev *wpan_dev = &sdata->wpan_dev; ASSERT_RTNL(); if (sdata->iface_default_filtering != nsdata->iface_default_filtering) return -EBUSY; if (local->hw.flags & IEEE802154_HW_AFILT) { if (wpan_dev->pan_id != nwpan_dev->pan_id || wpan_dev->short_addr != nwpan_dev->short_addr || wpan_dev->extended_addr != nwpan_dev->extended_addr) return -EBUSY; } if (local->hw.flags & IEEE802154_HW_CSMA_PARAMS) { if (wpan_dev->min_be != nwpan_dev->min_be || wpan_dev->max_be != nwpan_dev->max_be || wpan_dev->csma_retries != nwpan_dev->csma_retries) return -EBUSY; } if (local->hw.flags & IEEE802154_HW_FRAME_RETRIES) { if (wpan_dev->frame_retries != nwpan_dev->frame_retries) return -EBUSY; } if (local->hw.flags & IEEE802154_HW_LBT) { if (wpan_dev->lbt != nwpan_dev->lbt) return -EBUSY; } return 0; } static int ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata, enum nl802154_iftype iftype) { struct ieee802154_local *local = sdata->local; struct ieee802154_sub_if_data *nsdata; /* we hold the RTNL here so can safely walk the list */ list_for_each_entry(nsdata, &local->interfaces, list) { if (nsdata != sdata && ieee802154_sdata_running(nsdata)) { int ret; /* TODO currently we don't support multiple node/coord * types we need to run skb_clone at rx path. Check if * there exist really an use case if we need to support * multiple node/coord types at the same time. */ if (sdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR && nsdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR) return -EBUSY; /* check all phy mac sublayer settings are the same. * We have only one phy, different values makes trouble. */ ret = ieee802154_check_mac_settings(local, sdata, nsdata); if (ret < 0) return ret; } } return 0; } static int mac802154_wpan_open(struct net_device *dev) { int rc; struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); struct wpan_dev *wpan_dev = &sdata->wpan_dev; rc = ieee802154_check_concurrent_iface(sdata, wpan_dev->iftype); if (rc < 0) return rc; return mac802154_slave_open(dev); } static int mac802154_slave_close(struct net_device *dev) { struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); struct ieee802154_local *local = sdata->local; ASSERT_RTNL(); if (mac802154_is_scanning(local)) mac802154_abort_scan_locked(local, sdata); if (mac802154_is_beaconing(local)) mac802154_stop_beacons_locked(local, sdata); netif_stop_queue(dev); local->open_count--; clear_bit(SDATA_STATE_RUNNING, &sdata->state); if (!local->open_count) ieee802154_stop_device(local); return 0; } static int mac802154_set_header_security(struct ieee802154_sub_if_data *sdata, struct ieee802154_hdr *hdr, const struct ieee802154_mac_cb *cb) { struct ieee802154_llsec_params params; u8 level; mac802154_llsec_get_params(&sdata->sec, &params); if (!params.enabled && cb->secen_override && cb->secen) return -EINVAL; if (!params.enabled || (cb->secen_override && !cb->secen) || !params.out_level) return 0; if (cb->seclevel_override && !cb->seclevel) return -EINVAL; level = cb->seclevel_override ? cb->seclevel : params.out_level; hdr->fc.security_enabled = 1; hdr->sec.level = level; hdr->sec.key_id_mode = params.out_key.mode; if (params.out_key.mode == IEEE802154_SCF_KEY_SHORT_INDEX) hdr->sec.short_src = params.out_key.short_source; else if (params.out_key.mode == IEEE802154_SCF_KEY_HW_INDEX) hdr->sec.extended_src = params.out_key.extended_source; hdr->sec.key_id = params.out_key.id; return 0; } static int ieee802154_header_create(struct sk_buff *skb, struct net_device *dev, const struct ieee802154_addr *daddr, const struct ieee802154_addr *saddr, unsigned len) { struct ieee802154_hdr hdr; struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); struct wpan_dev *wpan_dev = &sdata->wpan_dev; struct ieee802154_mac_cb *cb = mac_cb(skb); int hlen; if (!daddr) return -EINVAL; memset(&hdr.fc, 0, sizeof(hdr.fc)); hdr.fc.type = cb->type; hdr.fc.security_enabled = cb->secen; hdr.fc.ack_request = cb->ackreq; hdr.seq = atomic_inc_return(&dev->ieee802154_ptr->dsn) & 0xFF; if (mac802154_set_header_security(sdata, &hdr, cb) < 0) return -EINVAL; if (!saddr) { if (wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST) || wpan_dev->short_addr == cpu_to_le16(IEEE802154_ADDR_UNDEF) || wpan_dev->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST)) { hdr.source.mode = IEEE802154_ADDR_LONG; hdr.source.extended_addr = wpan_dev->extended_addr; } else { hdr.source.mode = IEEE802154_ADDR_SHORT; hdr.source.short_addr = wpan_dev->short_addr; } hdr.source.pan_id = wpan_dev->pan_id; } else { hdr.source = *(const struct ieee802154_addr *)saddr; } hdr.dest = *(const struct ieee802154_addr *)daddr; hlen = ieee802154_hdr_push(skb, &hdr); if (hlen < 0) return -EINVAL; skb_reset_mac_header(skb); skb->mac_len = hlen; if (len > ieee802154_max_payload(&hdr)) return -EMSGSIZE; return hlen; } static const struct wpan_dev_header_ops ieee802154_header_ops = { .create = ieee802154_header_create, }; /* This header create functionality assumes a 8 byte array for * source and destination pointer at maximum. To adapt this for * the 802.15.4 dataframe header we use extended address handling * here only and intra pan connection. fc fields are mostly fallback * handling. For provide dev_hard_header for dgram sockets. */ static int mac802154_header_create(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len) { struct ieee802154_hdr hdr; struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); struct wpan_dev *wpan_dev = &sdata->wpan_dev; struct ieee802154_mac_cb cb = { }; int hlen; if (!daddr) return -EINVAL; memset(&hdr.fc, 0, sizeof(hdr.fc)); hdr.fc.type = IEEE802154_FC_TYPE_DATA; hdr.fc.ack_request = wpan_dev->ackreq; hdr.seq = atomic_inc_return(&dev->ieee802154_ptr->dsn) & 0xFF; /* TODO currently a workaround to give zero cb block to set * security parameters defaults according MIB. */ if (mac802154_set_header_security(sdata, &hdr, &cb) < 0) return -EINVAL; hdr.dest.pan_id = wpan_dev->pan_id; hdr.dest.mode = IEEE802154_ADDR_LONG; ieee802154_be64_to_le64(&hdr.dest.extended_addr, daddr); hdr.source.pan_id = hdr.dest.pan_id; hdr.source.mode = IEEE802154_ADDR_LONG; if (!saddr) hdr.source.extended_addr = wpan_dev->extended_addr; else ieee802154_be64_to_le64(&hdr.source.extended_addr, saddr); hlen = ieee802154_hdr_push(skb, &hdr); if (hlen < 0) return -EINVAL; skb_reset_mac_header(skb); skb->mac_len = hlen; if (len > ieee802154_max_payload(&hdr)) return -EMSGSIZE; return hlen; } static int mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr) { struct ieee802154_hdr hdr; if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) { pr_debug("malformed packet\n"); return 0; } if (hdr.source.mode == IEEE802154_ADDR_LONG) { ieee802154_le64_to_be64(haddr, &hdr.source.extended_addr); return IEEE802154_EXTENDED_ADDR_LEN; } return 0; } static const struct header_ops mac802154_header_ops = { .create = mac802154_header_create, .parse = mac802154_header_parse, }; static const struct net_device_ops mac802154_wpan_ops = { .ndo_open = mac802154_wpan_open, .ndo_stop = mac802154_slave_close, .ndo_start_xmit = ieee802154_subif_start_xmit, .ndo_do_ioctl = mac802154_wpan_ioctl, .ndo_set_mac_address = mac802154_wpan_mac_addr, }; static const struct net_device_ops mac802154_monitor_ops = { .ndo_open = mac802154_wpan_open, .ndo_stop = mac802154_slave_close, .ndo_start_xmit = ieee802154_monitor_start_xmit, }; static void mac802154_wpan_free(struct net_device *dev) { struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); mac802154_llsec_destroy(&sdata->sec); } static void ieee802154_if_setup(struct net_device *dev) { dev->addr_len = IEEE802154_EXTENDED_ADDR_LEN; memset(dev->broadcast, 0xff, IEEE802154_EXTENDED_ADDR_LEN); /* Let hard_header_len set to IEEE802154_MIN_HEADER_LEN. AF_PACKET * will not send frames without any payload, but ack frames * has no payload, so substract one that we can send a 3 bytes * frame. The xmit callback assumes at least a hard header where two * bytes fc and sequence field are set. */ dev->hard_header_len = IEEE802154_MIN_HEADER_LEN - 1; /* The auth_tag header is for security and places in private payload * room of mac frame which stucks between payload and FCS field. */ dev->needed_tailroom = IEEE802154_MAX_AUTH_TAG_LEN + IEEE802154_FCS_LEN; /* The mtu size is the payload without mac header in this case. * We have a dynamic length header with a minimum header length * which is hard_header_len. In this case we let mtu to the size * of maximum payload which is IEEE802154_MTU - IEEE802154_FCS_LEN - * hard_header_len. The FCS which is set by hardware or ndo_start_xmit * and the minimum mac header which can be evaluated inside driver * layer. The rest of mac header will be part of payload if greater * than hard_header_len. */ dev->mtu = IEEE802154_MTU - IEEE802154_FCS_LEN - dev->hard_header_len; dev->tx_queue_len = 300; dev->flags = IFF_NOARP | IFF_BROADCAST; } static int ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, enum nl802154_iftype type) { struct wpan_dev *wpan_dev = &sdata->wpan_dev; int ret; u8 tmp; /* set some type-dependent values */ sdata->wpan_dev.iftype = type; get_random_bytes(&tmp, sizeof(tmp)); atomic_set(&wpan_dev->bsn, tmp); get_random_bytes(&tmp, sizeof(tmp)); atomic_set(&wpan_dev->dsn, tmp); /* defaults per 802.15.4-2011 */ wpan_dev->min_be = 3; wpan_dev->max_be = 5; wpan_dev->csma_retries = 4; wpan_dev->frame_retries = 3; wpan_dev->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); wpan_dev->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); switch (type) { case NL802154_IFTYPE_COORD: case NL802154_IFTYPE_NODE: ieee802154_be64_to_le64(&wpan_dev->extended_addr, sdata->dev->dev_addr); sdata->dev->header_ops = &mac802154_header_ops; sdata->dev->needs_free_netdev = true; sdata->dev->priv_destructor = mac802154_wpan_free; sdata->dev->netdev_ops = &mac802154_wpan_ops; sdata->dev->ml_priv = &mac802154_mlme_wpan; sdata->iface_default_filtering = IEEE802154_FILTERING_4_FRAME_FIELDS; wpan_dev->header_ops = &ieee802154_header_ops; mutex_init(&sdata->sec_mtx); mac802154_llsec_init(&sdata->sec); ret = mac802154_wpan_update_llsec(sdata->dev); if (ret < 0) return ret; break; case NL802154_IFTYPE_MONITOR: sdata->dev->needs_free_netdev = true; sdata->dev->netdev_ops = &mac802154_monitor_ops; sdata->iface_default_filtering = IEEE802154_FILTERING_NONE; break; default: BUG(); } return 0; } struct net_device * ieee802154_if_add(struct ieee802154_local *local, const char *name, unsigned char name_assign_type, enum nl802154_iftype type, __le64 extended_addr) { u8 addr[IEEE802154_EXTENDED_ADDR_LEN]; struct net_device *ndev = NULL; struct ieee802154_sub_if_data *sdata = NULL; int ret; ASSERT_RTNL(); ndev = alloc_netdev(sizeof(*sdata), name, name_assign_type, ieee802154_if_setup); if (!ndev) return ERR_PTR(-ENOMEM); ndev->needed_headroom = local->hw.extra_tx_headroom + IEEE802154_MAX_HEADER_LEN; ret = dev_alloc_name(ndev, ndev->name); if (ret < 0) goto err; ieee802154_le64_to_be64(ndev->perm_addr, &local->hw.phy->perm_extended_addr); switch (type) { case NL802154_IFTYPE_COORD: case NL802154_IFTYPE_NODE: ndev->type = ARPHRD_IEEE802154; if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) { ieee802154_le64_to_be64(addr, &extended_addr); dev_addr_set(ndev, addr); } else { dev_addr_set(ndev, ndev->perm_addr); } break; case NL802154_IFTYPE_MONITOR: ndev->type = ARPHRD_IEEE802154_MONITOR; break; default: ret = -EINVAL; goto err; } /* TODO check this */ SET_NETDEV_DEV(ndev, &local->phy->dev); dev_net_set(ndev, wpan_phy_net(local->hw.phy)); sdata = netdev_priv(ndev); ndev->ieee802154_ptr = &sdata->wpan_dev; memcpy(sdata->name, ndev->name, IFNAMSIZ); sdata->dev = ndev; sdata->wpan_dev.wpan_phy = local->hw.phy; sdata->local = local; INIT_LIST_HEAD(&sdata->wpan_dev.list); /* setup type-dependent data */ ret = ieee802154_setup_sdata(sdata, type); if (ret) goto err; ret = register_netdevice(ndev); if (ret < 0) goto err; mutex_lock(&local->iflist_mtx); list_add_tail_rcu(&sdata->list, &local->interfaces); mutex_unlock(&local->iflist_mtx); return ndev; err: free_netdev(ndev); return ERR_PTR(ret); } void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata) { ASSERT_RTNL(); mutex_lock(&sdata->local->iflist_mtx); if (list_empty(&sdata->local->interfaces)) { mutex_unlock(&sdata->local->iflist_mtx); return; } list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); synchronize_rcu(); unregister_netdevice(sdata->dev); } void ieee802154_remove_interfaces(struct ieee802154_local *local) { struct ieee802154_sub_if_data *sdata, *tmp; mutex_lock(&local->iflist_mtx); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); unregister_netdevice(sdata->dev); } mutex_unlock(&local->iflist_mtx); } static int netdev_notify(struct notifier_block *nb, unsigned long state, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ieee802154_sub_if_data *sdata; if (state != NETDEV_CHANGENAME) return NOTIFY_DONE; if (!dev->ieee802154_ptr || !dev->ieee802154_ptr->wpan_phy) return NOTIFY_DONE; if (dev->ieee802154_ptr->wpan_phy->privid != mac802154_wpan_phy_privid) return NOTIFY_DONE; sdata = IEEE802154_DEV_TO_SUB_IF(dev); memcpy(sdata->name, dev->name, IFNAMSIZ); return NOTIFY_OK; } static struct notifier_block mac802154_netdev_notifier = { .notifier_call = netdev_notify, }; int ieee802154_iface_init(void) { return register_netdevice_notifier(&mac802154_netdev_notifier); } void ieee802154_iface_exit(void) { unregister_netdevice_notifier(&mac802154_netdev_notifier); }
2 5 83 1 3 79 3 3 9 83 2 2 2 2 2 2 533 524 7 42 10 40 1 497 113 114 40 13 8 7 8 15 23 22 9 13 37 37 74 3 70 9 83 83 83 1 82 9 9 9 1 9 8 1 44 3 1 1 39 1 28 9 20 17 38 49 48 17 46 26 7 7 1 6 6 1 40 17 25 31 9 33 8 14 26 39 32 33 27 6 23 23 8 33 34 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/errno.h> #include <linux/kmod.h> #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/module.h> #include <linux/device.h> #include <linux/wait.h> #include <linux/bitops.h> #include <linux/seq_file.h> #include <linux/uaccess.h> #include <linux/ratelimit.h> #include "tty.h" #undef LDISC_DEBUG_HANGUP #ifdef LDISC_DEBUG_HANGUP #define tty_ldisc_debug(tty, f, args...) tty_debug(tty, f, ##args) #else #define tty_ldisc_debug(tty, f, args...) #endif /* lockdep nested classes for tty->ldisc_sem */ enum { LDISC_SEM_NORMAL, LDISC_SEM_OTHER, }; /* * This guards the refcounted line discipline lists. The lock * must be taken with irqs off because there are hangup path * callers who will do ldisc lookups and cannot sleep. */ static DEFINE_RAW_SPINLOCK(tty_ldiscs_lock); /* Line disc dispatch table */ static struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS]; /** * tty_register_ldisc - install a line discipline * @new_ldisc: pointer to the ldisc object * * Installs a new line discipline into the kernel. The discipline is set up as * unreferenced and then made available to the kernel from this point onwards. * * Locking: takes %tty_ldiscs_lock to guard against ldisc races */ int tty_register_ldisc(struct tty_ldisc_ops *new_ldisc) { unsigned long flags; if (new_ldisc->num < N_TTY || new_ldisc->num >= NR_LDISCS) return -EINVAL; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); tty_ldiscs[new_ldisc->num] = new_ldisc; raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); return 0; } EXPORT_SYMBOL(tty_register_ldisc); /** * tty_unregister_ldisc - unload a line discipline * @ldisc: ldisc number * * Remove a line discipline from the kernel providing it is not currently in * use. * * Locking: takes %tty_ldiscs_lock to guard against ldisc races */ void tty_unregister_ldisc(struct tty_ldisc_ops *ldisc) { unsigned long flags; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); tty_ldiscs[ldisc->num] = NULL; raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); } EXPORT_SYMBOL(tty_unregister_ldisc); static struct tty_ldisc_ops *get_ldops(int disc) { unsigned long flags; struct tty_ldisc_ops *ldops, *ret; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); ret = ERR_PTR(-EINVAL); ldops = tty_ldiscs[disc]; if (ldops) { ret = ERR_PTR(-EAGAIN); if (try_module_get(ldops->owner)) ret = ldops; } raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); return ret; } static void put_ldops(struct tty_ldisc_ops *ldops) { unsigned long flags; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); module_put(ldops->owner); raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); } int tty_ldisc_autoload = IS_BUILTIN(CONFIG_LDISC_AUTOLOAD); /** * tty_ldisc_get - take a reference to an ldisc * @tty: tty device * @disc: ldisc number * * Takes a reference to a line discipline. Deals with refcounts and module * locking counts. If the discipline is not available, its module loaded, if * possible. * * Returns: * * -%EINVAL if the discipline index is not [%N_TTY .. %NR_LDISCS] or if the * discipline is not registered * * -%EAGAIN if request_module() failed to load or register the discipline * * -%ENOMEM if allocation failure * * Otherwise, returns a pointer to the discipline and bumps the ref count * * Locking: takes %tty_ldiscs_lock to guard against ldisc races */ static struct tty_ldisc *tty_ldisc_get(struct tty_struct *tty, int disc) { struct tty_ldisc *ld; struct tty_ldisc_ops *ldops; if (disc < N_TTY || disc >= NR_LDISCS) return ERR_PTR(-EINVAL); /* * Get the ldisc ops - we may need to request them to be loaded * dynamically and try again. */ ldops = get_ldops(disc); if (IS_ERR(ldops)) { if (!capable(CAP_SYS_MODULE) && !tty_ldisc_autoload) return ERR_PTR(-EPERM); request_module("tty-ldisc-%d", disc); ldops = get_ldops(disc); if (IS_ERR(ldops)) return ERR_CAST(ldops); } /* * There is no way to handle allocation failure of only 16 bytes. * Let's simplify error handling and save more memory. */ ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL | __GFP_NOFAIL); ld->ops = ldops; ld->tty = tty; return ld; } /** * tty_ldisc_put - release the ldisc * @ld: lisdsc to release * * Complement of tty_ldisc_get(). */ static void tty_ldisc_put(struct tty_ldisc *ld) { if (WARN_ON_ONCE(!ld)) return; put_ldops(ld->ops); kfree(ld); } static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) { return (*pos < NR_LDISCS) ? pos : NULL; } static void *tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) { (*pos)++; return (*pos < NR_LDISCS) ? pos : NULL; } static void tty_ldiscs_seq_stop(struct seq_file *m, void *v) { } static int tty_ldiscs_seq_show(struct seq_file *m, void *v) { int i = *(loff_t *)v; struct tty_ldisc_ops *ldops; ldops = get_ldops(i); if (IS_ERR(ldops)) return 0; seq_printf(m, "%-10s %2d\n", ldops->name ? ldops->name : "???", i); put_ldops(ldops); return 0; } const struct seq_operations tty_ldiscs_seq_ops = { .start = tty_ldiscs_seq_start, .next = tty_ldiscs_seq_next, .stop = tty_ldiscs_seq_stop, .show = tty_ldiscs_seq_show, }; /** * tty_ldisc_ref_wait - wait for the tty ldisc * @tty: tty device * * Dereference the line discipline for the terminal and take a reference to it. * If the line discipline is in flux then wait patiently until it changes. * * Returns: %NULL if the tty has been hungup and not re-opened with a new file * descriptor, otherwise valid ldisc reference * * Note 1: Must not be called from an IRQ/timer context. The caller must also * be careful not to hold other locks that will deadlock against a discipline * change, such as an existing ldisc reference (which we check for). * * Note 2: a file_operations routine (read/poll/write) should use this function * to wait for any ldisc lifetime events to finish. */ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { struct tty_ldisc *ld; ldsem_down_read(&tty->ldisc_sem, MAX_SCHEDULE_TIMEOUT); ld = tty->ldisc; if (!ld) ldsem_up_read(&tty->ldisc_sem); return ld; } EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); /** * tty_ldisc_ref - get the tty ldisc * @tty: tty device * * Dereference the line discipline for the terminal and take a reference to it. * If the line discipline is in flux then return %NULL. Can be called from IRQ * and timer functions. */ struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) { struct tty_ldisc *ld = NULL; if (ldsem_down_read_trylock(&tty->ldisc_sem)) { ld = tty->ldisc; if (!ld) ldsem_up_read(&tty->ldisc_sem); } return ld; } EXPORT_SYMBOL_GPL(tty_ldisc_ref); /** * tty_ldisc_deref - free a tty ldisc reference * @ld: reference to free up * * Undoes the effect of tty_ldisc_ref() or tty_ldisc_ref_wait(). May be called * in IRQ context. */ void tty_ldisc_deref(struct tty_ldisc *ld) { ldsem_up_read(&ld->tty->ldisc_sem); } EXPORT_SYMBOL_GPL(tty_ldisc_deref); static inline int __tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { return ldsem_down_write(&tty->ldisc_sem, timeout); } static inline int __tty_ldisc_lock_nested(struct tty_struct *tty, unsigned long timeout) { return ldsem_down_write_nested(&tty->ldisc_sem, LDISC_SEM_OTHER, timeout); } static inline void __tty_ldisc_unlock(struct tty_struct *tty) { ldsem_up_write(&tty->ldisc_sem); } int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { int ret; /* Kindly asking blocked readers to release the read side */ set_bit(TTY_LDISC_CHANGING, &tty->flags); wake_up_interruptible_all(&tty->read_wait); wake_up_interruptible_all(&tty->write_wait); ret = __tty_ldisc_lock(tty, timeout); if (!ret) return -EBUSY; set_bit(TTY_LDISC_HALTED, &tty->flags); return 0; } void tty_ldisc_unlock(struct tty_struct *tty) { clear_bit(TTY_LDISC_HALTED, &tty->flags); /* Can be cleared here - ldisc_unlock will wake up writers firstly */ clear_bit(TTY_LDISC_CHANGING, &tty->flags); __tty_ldisc_unlock(tty); } static int tty_ldisc_lock_pair_timeout(struct tty_struct *tty, struct tty_struct *tty2, unsigned long timeout) { int ret; if (tty < tty2) { ret = __tty_ldisc_lock(tty, timeout); if (ret) { ret = __tty_ldisc_lock_nested(tty2, timeout); if (!ret) __tty_ldisc_unlock(tty); } } else { /* if this is possible, it has lots of implications */ WARN_ON_ONCE(tty == tty2); if (tty2 && tty != tty2) { ret = __tty_ldisc_lock(tty2, timeout); if (ret) { ret = __tty_ldisc_lock_nested(tty, timeout); if (!ret) __tty_ldisc_unlock(tty2); } } else ret = __tty_ldisc_lock(tty, timeout); } if (!ret) return -EBUSY; set_bit(TTY_LDISC_HALTED, &tty->flags); if (tty2) set_bit(TTY_LDISC_HALTED, &tty2->flags); return 0; } static void tty_ldisc_lock_pair(struct tty_struct *tty, struct tty_struct *tty2) { tty_ldisc_lock_pair_timeout(tty, tty2, MAX_SCHEDULE_TIMEOUT); } static void tty_ldisc_unlock_pair(struct tty_struct *tty, struct tty_struct *tty2) { __tty_ldisc_unlock(tty); if (tty2) __tty_ldisc_unlock(tty2); } /** * tty_ldisc_flush - flush line discipline queue * @tty: tty to flush ldisc for * * Flush the line discipline queue (if any) and the tty flip buffers for this * @tty. */ void tty_ldisc_flush(struct tty_struct *tty) { struct tty_ldisc *ld = tty_ldisc_ref(tty); tty_buffer_flush(tty, ld); if (ld) tty_ldisc_deref(ld); } EXPORT_SYMBOL_GPL(tty_ldisc_flush); /** * tty_set_termios_ldisc - set ldisc field * @tty: tty structure * @disc: line discipline number * * This is probably overkill for real world processors but they are not on hot * paths so a little discipline won't do any harm. * * The line discipline-related tty_struct fields are reset to prevent the ldisc * driver from re-using stale information for the new ldisc instance. * * Locking: takes termios_rwsem */ static void tty_set_termios_ldisc(struct tty_struct *tty, int disc) { down_write(&tty->termios_rwsem); tty->termios.c_line = disc; up_write(&tty->termios_rwsem); tty->disc_data = NULL; tty->receive_room = 0; } /** * tty_ldisc_open - open a line discipline * @tty: tty we are opening the ldisc on * @ld: discipline to open * * A helper opening method. Also a convenient debugging and check point. * * Locking: always called with BTM already held. */ static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld) { WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags)); if (ld->ops->open) { int ret; /* BTM here locks versus a hangup event */ ret = ld->ops->open(tty); if (ret) clear_bit(TTY_LDISC_OPEN, &tty->flags); tty_ldisc_debug(tty, "%p: opened\n", ld); return ret; } return 0; } /** * tty_ldisc_close - close a line discipline * @tty: tty we are opening the ldisc on * @ld: discipline to close * * A helper close method. Also a convenient debugging and check point. */ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) { lockdep_assert_held_write(&tty->ldisc_sem); WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags)); clear_bit(TTY_LDISC_OPEN, &tty->flags); if (ld->ops->close) ld->ops->close(tty); tty_ldisc_debug(tty, "%p: closed\n", ld); } /** * tty_ldisc_failto - helper for ldisc failback * @tty: tty to open the ldisc on * @ld: ldisc we are trying to fail back to * * Helper to try and recover a tty when switching back to the old ldisc fails * and we need something attached. */ static int tty_ldisc_failto(struct tty_struct *tty, int ld) { struct tty_ldisc *disc = tty_ldisc_get(tty, ld); int r; lockdep_assert_held_write(&tty->ldisc_sem); if (IS_ERR(disc)) return PTR_ERR(disc); tty->ldisc = disc; tty_set_termios_ldisc(tty, ld); r = tty_ldisc_open(tty, disc); if (r < 0) tty_ldisc_put(disc); return r; } /** * tty_ldisc_restore - helper for tty ldisc change * @tty: tty to recover * @old: previous ldisc * * Restore the previous line discipline or %N_TTY when a line discipline change * fails due to an open error */ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) { /* There is an outstanding reference here so this is safe */ if (tty_ldisc_failto(tty, old->ops->num) < 0) { const char *name = tty_name(tty); pr_warn("Falling back ldisc for %s.\n", name); /* * The traditional behaviour is to fall back to N_TTY, we * want to avoid falling back to N_NULL unless we have no * choice to avoid the risk of breaking anything */ if (tty_ldisc_failto(tty, N_TTY) < 0 && tty_ldisc_failto(tty, N_NULL) < 0) panic("Couldn't open N_NULL ldisc for %s.", name); } } /** * tty_set_ldisc - set line discipline * @tty: the terminal to set * @disc: the line discipline number * * Set the discipline of a tty line. Must be called from a process context. The * ldisc change logic has to protect itself against any overlapping ldisc * change (including on the other end of pty pairs), the close of one side of a * tty/pty pair, and eventually hangup. */ int tty_set_ldisc(struct tty_struct *tty, int disc) { int retval; struct tty_ldisc *old_ldisc, *new_ldisc; new_ldisc = tty_ldisc_get(tty, disc); if (IS_ERR(new_ldisc)) return PTR_ERR(new_ldisc); tty_lock(tty); retval = tty_ldisc_lock(tty, 5 * HZ); if (retval) goto err; if (!tty->ldisc) { retval = -EIO; goto out; } /* Check the no-op case */ if (tty->ldisc->ops->num == disc) goto out; if (test_bit(TTY_HUPPED, &tty->flags)) { /* We were raced by hangup */ retval = -EIO; goto out; } if (tty->ops->ldisc_ok) { retval = tty->ops->ldisc_ok(tty, disc); if (retval) goto out; } old_ldisc = tty->ldisc; /* Shutdown the old discipline. */ tty_ldisc_close(tty, old_ldisc); /* Now set up the new line discipline. */ tty->ldisc = new_ldisc; tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, new_ldisc); if (retval < 0) { /* Back to the old one or N_TTY if we can't */ tty_ldisc_put(new_ldisc); tty_ldisc_restore(tty, old_ldisc); } if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) { down_read(&tty->termios_rwsem); tty->ops->set_ldisc(tty); up_read(&tty->termios_rwsem); } /* * At this point we hold a reference to the new ldisc and a * reference to the old ldisc, or we hold two references to * the old ldisc (if it was restored as part of error cleanup * above). In either case, releasing a single reference from * the old ldisc is correct. */ new_ldisc = old_ldisc; out: tty_ldisc_unlock(tty); /* * Restart the work queue in case no characters kick it off. Safe if * already running */ tty_buffer_restart_work(tty->port); err: tty_ldisc_put(new_ldisc); /* drop the extra reference */ tty_unlock(tty); return retval; } EXPORT_SYMBOL_GPL(tty_set_ldisc); /** * tty_ldisc_kill - teardown ldisc * @tty: tty being released * * Perform final close of the ldisc and reset @tty->ldisc */ static void tty_ldisc_kill(struct tty_struct *tty) { lockdep_assert_held_write(&tty->ldisc_sem); if (!tty->ldisc) return; /* * Now kill off the ldisc */ tty_ldisc_close(tty, tty->ldisc); tty_ldisc_put(tty->ldisc); /* Force an oops if we mess this up */ tty->ldisc = NULL; } /** * tty_reset_termios - reset terminal state * @tty: tty to reset * * Restore a terminal to the driver default state. */ static void tty_reset_termios(struct tty_struct *tty) { down_write(&tty->termios_rwsem); tty->termios = tty->driver->init_termios; tty->termios.c_ispeed = tty_termios_input_baud_rate(&tty->termios); tty->termios.c_ospeed = tty_termios_baud_rate(&tty->termios); up_write(&tty->termios_rwsem); } /** * tty_ldisc_reinit - reinitialise the tty ldisc * @tty: tty to reinit * @disc: line discipline to reinitialize * * Completely reinitialize the line discipline state, by closing the current * instance, if there is one, and opening a new instance. If an error occurs * opening the new non-%N_TTY instance, the instance is dropped and @tty->ldisc * reset to %NULL. The caller can then retry with %N_TTY instead. * * Returns: 0 if successful, otherwise error code < 0 */ int tty_ldisc_reinit(struct tty_struct *tty, int disc) { struct tty_ldisc *ld; int retval; lockdep_assert_held_write(&tty->ldisc_sem); ld = tty_ldisc_get(tty, disc); if (IS_ERR(ld)) { BUG_ON(disc == N_TTY); return PTR_ERR(ld); } if (tty->ldisc) { tty_ldisc_close(tty, tty->ldisc); tty_ldisc_put(tty->ldisc); } /* switch the line discipline */ tty->ldisc = ld; tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, tty->ldisc); if (retval) { tty_ldisc_put(tty->ldisc); tty->ldisc = NULL; } return retval; } /** * tty_ldisc_hangup - hangup ldisc reset * @tty: tty being hung up * @reinit: whether to re-initialise the tty * * Some tty devices reset their termios when they receive a hangup event. In * that situation we must also switch back to %N_TTY properly before we reset * the termios data. * * Locking: We can take the ldisc mutex as the rest of the code is careful to * allow for this. * * In the pty pair case this occurs in the close() path of the tty itself so we * must be careful about locking rules. */ void tty_ldisc_hangup(struct tty_struct *tty, bool reinit) { struct tty_ldisc *ld; tty_ldisc_debug(tty, "%p: hangup\n", tty->ldisc); ld = tty_ldisc_ref(tty); if (ld != NULL) { if (ld->ops->flush_buffer) ld->ops->flush_buffer(tty); tty_driver_flush_buffer(tty); if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && ld->ops->write_wakeup) ld->ops->write_wakeup(tty); if (ld->ops->hangup) ld->ops->hangup(tty); tty_ldisc_deref(ld); } wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT); wake_up_interruptible_poll(&tty->read_wait, EPOLLIN); /* * Shutdown the current line discipline, and reset it to * N_TTY if need be. * * Avoid racing set_ldisc or tty_ldisc_release */ tty_ldisc_lock(tty, MAX_SCHEDULE_TIMEOUT); if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) tty_reset_termios(tty); if (tty->ldisc) { if (reinit) { if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0 && tty_ldisc_reinit(tty, N_TTY) < 0) WARN_ON(tty_ldisc_reinit(tty, N_NULL) < 0); } else tty_ldisc_kill(tty); } tty_ldisc_unlock(tty); } /** * tty_ldisc_setup - open line discipline * @tty: tty being shut down * @o_tty: pair tty for pty/tty pairs * * Called during the initial open of a tty/pty pair in order to set up the line * disciplines and bind them to the @tty. This has no locking issues as the * device isn't yet active. */ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) { int retval = tty_ldisc_open(tty, tty->ldisc); if (retval) return retval; if (o_tty) { /* * Called without o_tty->ldisc_sem held, as o_tty has been * just allocated and no one has a reference to it. */ retval = tty_ldisc_open(o_tty, o_tty->ldisc); if (retval) { tty_ldisc_close(tty, tty->ldisc); return retval; } } return 0; } /** * tty_ldisc_release - release line discipline * @tty: tty being shut down (or one end of pty pair) * * Called during the final close of a tty or a pty pair in order to shut down * the line discpline layer. On exit, each tty's ldisc is %NULL. */ void tty_ldisc_release(struct tty_struct *tty) { struct tty_struct *o_tty = tty->link; /* * Shutdown this line discipline. As this is the final close, * it does not race with the set_ldisc code path. */ tty_ldisc_lock_pair(tty, o_tty); tty_ldisc_kill(tty); if (o_tty) tty_ldisc_kill(o_tty); tty_ldisc_unlock_pair(tty, o_tty); /* * And the memory resources remaining (buffers, termios) will be * disposed of when the kref hits zero */ tty_ldisc_debug(tty, "released\n"); } /** * tty_ldisc_init - ldisc setup for new tty * @tty: tty being allocated * * Set up the line discipline objects for a newly allocated tty. Note that the * tty structure is not completely set up when this call is made. */ int tty_ldisc_init(struct tty_struct *tty) { struct tty_ldisc *ld = tty_ldisc_get(tty, N_TTY); if (IS_ERR(ld)) return PTR_ERR(ld); tty->ldisc = ld; return 0; } /** * tty_ldisc_deinit - ldisc cleanup for new tty * @tty: tty that was allocated recently * * The tty structure must not be completely set up (tty_ldisc_setup()) when * this call is made. */ void tty_ldisc_deinit(struct tty_struct *tty) { /* no ldisc_sem, tty is being destroyed */ if (tty->ldisc) tty_ldisc_put(tty->ldisc); tty->ldisc = NULL; }
2 2 2 2 2 2 75 76 32 32 70 69 4 4 4 19 19 184 51 6 48 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API for algorithms (i.e., low-level API). * * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/algapi.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/fips.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/module.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/workqueue.h> #include "internal.h" static LIST_HEAD(crypto_template_list); static inline void crypto_check_module_sig(struct module *mod) { if (fips_enabled && mod && !module_sig_ok(mod)) panic("Module %s signature verification failed in FIPS mode\n", module_name(mod)); } static int crypto_check_alg(struct crypto_alg *alg) { crypto_check_module_sig(alg->cra_module); if (!alg->cra_name[0] || !alg->cra_driver_name[0]) return -EINVAL; if (alg->cra_alignmask & (alg->cra_alignmask + 1)) return -EINVAL; /* General maximums for all algs. */ if (alg->cra_alignmask > MAX_ALGAPI_ALIGNMASK) return -EINVAL; if (alg->cra_blocksize > MAX_ALGAPI_BLOCKSIZE) return -EINVAL; /* Lower maximums for specific alg types. */ if (!alg->cra_type && (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_CIPHER) { if (alg->cra_alignmask > MAX_CIPHER_ALIGNMASK) return -EINVAL; if (alg->cra_blocksize > MAX_CIPHER_BLOCKSIZE) return -EINVAL; } if (alg->cra_priority < 0) return -EINVAL; refcount_set(&alg->cra_refcnt, 1); return 0; } static void crypto_free_instance(struct crypto_instance *inst) { inst->alg.cra_type->free(inst); } static void crypto_destroy_instance_workfn(struct work_struct *w) { struct crypto_template *tmpl = container_of(w, struct crypto_template, free_work); struct crypto_instance *inst; struct hlist_node *n; HLIST_HEAD(list); down_write(&crypto_alg_sem); hlist_for_each_entry_safe(inst, n, &tmpl->dead, list) { if (refcount_read(&inst->alg.cra_refcnt) != -1) continue; hlist_del(&inst->list); hlist_add_head(&inst->list, &list); } up_write(&crypto_alg_sem); hlist_for_each_entry_safe(inst, n, &list, list) crypto_free_instance(inst); } static void crypto_destroy_instance(struct crypto_alg *alg) { struct crypto_instance *inst = container_of(alg, struct crypto_instance, alg); struct crypto_template *tmpl = inst->tmpl; refcount_set(&alg->cra_refcnt, -1); schedule_work(&tmpl->free_work); } /* * This function adds a spawn to the list secondary_spawns which * will be used at the end of crypto_remove_spawns to unregister * instances, unless the spawn happens to be one that is depended * on by the new algorithm (nalg in crypto_remove_spawns). * * This function is also responsible for resurrecting any algorithms * in the dependency chain of nalg by unsetting n->dead. */ static struct list_head *crypto_more_spawns(struct crypto_alg *alg, struct list_head *stack, struct list_head *top, struct list_head *secondary_spawns) { struct crypto_spawn *spawn, *n; spawn = list_first_entry_or_null(stack, struct crypto_spawn, list); if (!spawn) return NULL; n = list_prev_entry(spawn, list); list_move(&spawn->list, secondary_spawns); if (list_is_last(&n->list, stack)) return top; n = list_next_entry(n, list); if (!spawn->dead) n->dead = false; return &n->inst->alg.cra_users; } static void crypto_remove_instance(struct crypto_instance *inst, struct list_head *list) { struct crypto_template *tmpl = inst->tmpl; if (crypto_is_dead(&inst->alg)) return; inst->alg.cra_flags |= CRYPTO_ALG_DEAD; if (!tmpl) return; list_del_init(&inst->alg.cra_list); hlist_del(&inst->list); hlist_add_head(&inst->list, &tmpl->dead); BUG_ON(!list_empty(&inst->alg.cra_users)); crypto_alg_put(&inst->alg); } /* * Given an algorithm alg, remove all algorithms that depend on it * through spawns. If nalg is not null, then exempt any algorithms * that is depended on by nalg. This is useful when nalg itself * depends on alg. */ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, struct crypto_alg *nalg) { u32 new_type = (nalg ?: alg)->cra_flags; struct crypto_spawn *spawn, *n; LIST_HEAD(secondary_spawns); struct list_head *spawns; LIST_HEAD(stack); LIST_HEAD(top); spawns = &alg->cra_users; list_for_each_entry_safe(spawn, n, spawns, list) { if ((spawn->alg->cra_flags ^ new_type) & spawn->mask) continue; list_move(&spawn->list, &top); } /* * Perform a depth-first walk starting from alg through * the cra_users tree. The list stack records the path * from alg to the current spawn. */ spawns = &top; do { while (!list_empty(spawns)) { struct crypto_instance *inst; spawn = list_first_entry(spawns, struct crypto_spawn, list); inst = spawn->inst; list_move(&spawn->list, &stack); spawn->dead = !spawn->registered || &inst->alg != nalg; if (!spawn->registered) break; BUG_ON(&inst->alg == alg); if (&inst->alg == nalg) break; spawns = &inst->alg.cra_users; /* * Even if spawn->registered is true, the * instance itself may still be unregistered. * This is because it may have failed during * registration. Therefore we still need to * make the following test. * * We may encounter an unregistered instance here, since * an instance's spawns are set up prior to the instance * being registered. An unregistered instance will have * NULL ->cra_users.next, since ->cra_users isn't * properly initialized until registration. But an * unregistered instance cannot have any users, so treat * it the same as ->cra_users being empty. */ if (spawns->next == NULL) break; } } while ((spawns = crypto_more_spawns(alg, &stack, &top, &secondary_spawns))); /* * Remove all instances that are marked as dead. Also * complete the resurrection of the others by moving them * back to the cra_users list. */ list_for_each_entry_safe(spawn, n, &secondary_spawns, list) { if (!spawn->dead) list_move(&spawn->list, &spawn->alg->cra_users); else if (spawn->registered) crypto_remove_instance(spawn->inst, list); } } EXPORT_SYMBOL_GPL(crypto_remove_spawns); static void crypto_alg_finish_registration(struct crypto_alg *alg, struct list_head *algs_to_put) { struct crypto_alg *q; list_for_each_entry(q, &crypto_alg_list, cra_list) { if (q == alg) continue; if (crypto_is_moribund(q)) continue; if (crypto_is_larval(q)) continue; if (strcmp(alg->cra_name, q->cra_name)) continue; if (strcmp(alg->cra_driver_name, q->cra_driver_name) && q->cra_priority > alg->cra_priority) continue; crypto_remove_spawns(q, algs_to_put, alg); } crypto_notify(CRYPTO_MSG_ALG_LOADED, alg); } static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg) { struct crypto_larval *larval; if (!IS_ENABLED(CONFIG_CRYPTO_SELFTESTS) || (alg->cra_flags & CRYPTO_ALG_INTERNAL)) return NULL; /* No self-test needed */ larval = crypto_larval_alloc(alg->cra_name, alg->cra_flags | CRYPTO_ALG_TESTED, 0); if (IS_ERR(larval)) return larval; larval->adult = crypto_mod_get(alg); if (!larval->adult) { kfree(larval); return ERR_PTR(-ENOENT); } refcount_set(&larval->alg.cra_refcnt, 1); memcpy(larval->alg.cra_driver_name, alg->cra_driver_name, CRYPTO_MAX_ALG_NAME); larval->alg.cra_priority = alg->cra_priority; return larval; } static struct crypto_larval * __crypto_register_alg(struct crypto_alg *alg, struct list_head *algs_to_put) { struct crypto_alg *q; struct crypto_larval *larval; int ret = -EAGAIN; if (crypto_is_dead(alg)) goto err; INIT_LIST_HEAD(&alg->cra_users); ret = -EEXIST; list_for_each_entry(q, &crypto_alg_list, cra_list) { if (q == alg) goto err; if (crypto_is_moribund(q)) continue; if (crypto_is_larval(q)) { if (!strcmp(alg->cra_driver_name, q->cra_driver_name)) goto err; continue; } if (!strcmp(q->cra_driver_name, alg->cra_name) || !strcmp(q->cra_driver_name, alg->cra_driver_name) || !strcmp(q->cra_name, alg->cra_driver_name)) goto err; } larval = crypto_alloc_test_larval(alg); if (IS_ERR(larval)) goto out; list_add(&alg->cra_list, &crypto_alg_list); if (larval) { /* No cheating! */ alg->cra_flags &= ~CRYPTO_ALG_TESTED; list_add(&larval->alg.cra_list, &crypto_alg_list); } else { alg->cra_flags |= CRYPTO_ALG_TESTED; crypto_alg_finish_registration(alg, algs_to_put); } out: return larval; err: larval = ERR_PTR(ret); goto out; } void crypto_alg_tested(const char *name, int err) { struct crypto_larval *test; struct crypto_alg *alg; struct crypto_alg *q; LIST_HEAD(list); down_write(&crypto_alg_sem); list_for_each_entry(q, &crypto_alg_list, cra_list) { if (crypto_is_moribund(q) || !crypto_is_larval(q)) continue; test = (struct crypto_larval *)q; if (!strcmp(q->cra_driver_name, name)) goto found; } pr_err("alg: Unexpected test result for %s: %d\n", name, err); up_write(&crypto_alg_sem); return; found: q->cra_flags |= CRYPTO_ALG_DEAD; alg = test->adult; if (crypto_is_dead(alg)) goto complete; if (err == -ECANCELED) alg->cra_flags |= CRYPTO_ALG_FIPS_INTERNAL; else if (err) goto complete; else alg->cra_flags &= ~CRYPTO_ALG_FIPS_INTERNAL; alg->cra_flags |= CRYPTO_ALG_TESTED; crypto_alg_finish_registration(alg, &list); complete: list_del_init(&test->alg.cra_list); complete_all(&test->completion); up_write(&crypto_alg_sem); crypto_alg_put(&test->alg); crypto_remove_final(&list); } EXPORT_SYMBOL_GPL(crypto_alg_tested); void crypto_remove_final(struct list_head *list) { struct crypto_alg *alg; struct crypto_alg *n; list_for_each_entry_safe(alg, n, list, cra_list) { list_del_init(&alg->cra_list); crypto_alg_put(alg); } } EXPORT_SYMBOL_GPL(crypto_remove_final); static void crypto_free_alg(struct crypto_alg *alg) { unsigned int algsize = alg->cra_type->algsize; u8 *p = (u8 *)alg - algsize; crypto_destroy_alg(alg); kfree(p); } int crypto_register_alg(struct crypto_alg *alg) { struct crypto_larval *larval; bool test_started = false; LIST_HEAD(algs_to_put); int err; alg->cra_flags &= ~CRYPTO_ALG_DEAD; err = crypto_check_alg(alg); if (err) return err; if (alg->cra_flags & CRYPTO_ALG_DUP_FIRST && !WARN_ON_ONCE(alg->cra_destroy)) { unsigned int algsize = alg->cra_type->algsize; u8 *p = (u8 *)alg - algsize; p = kmemdup(p, algsize + sizeof(*alg), GFP_KERNEL); if (!p) return -ENOMEM; alg = (void *)(p + algsize); alg->cra_destroy = crypto_free_alg; } down_write(&crypto_alg_sem); larval = __crypto_register_alg(alg, &algs_to_put); if (!IS_ERR_OR_NULL(larval)) { test_started = crypto_boot_test_finished(); larval->test_started = test_started; } up_write(&crypto_alg_sem); if (IS_ERR(larval)) { crypto_alg_put(alg); return PTR_ERR(larval); } if (test_started) crypto_schedule_test(larval); else crypto_remove_final(&algs_to_put); return 0; } EXPORT_SYMBOL_GPL(crypto_register_alg); static int crypto_remove_alg(struct crypto_alg *alg, struct list_head *list) { if (unlikely(list_empty(&alg->cra_list))) return -ENOENT; alg->cra_flags |= CRYPTO_ALG_DEAD; list_del_init(&alg->cra_list); crypto_remove_spawns(alg, list, NULL); return 0; } void crypto_unregister_alg(struct crypto_alg *alg) { int ret; LIST_HEAD(list); down_write(&crypto_alg_sem); ret = crypto_remove_alg(alg, &list); up_write(&crypto_alg_sem); if (WARN(ret, "Algorithm %s is not registered", alg->cra_driver_name)) return; WARN_ON(!alg->cra_destroy && refcount_read(&alg->cra_refcnt) != 1); list_add(&alg->cra_list, &list); crypto_remove_final(&list); } EXPORT_SYMBOL_GPL(crypto_unregister_alg); int crypto_register_algs(struct crypto_alg *algs, int count) { int i, ret; for (i = 0; i < count; i++) { ret = crypto_register_alg(&algs[i]); if (ret) goto err; } return 0; err: for (--i; i >= 0; --i) crypto_unregister_alg(&algs[i]); return ret; } EXPORT_SYMBOL_GPL(crypto_register_algs); void crypto_unregister_algs(struct crypto_alg *algs, int count) { int i; for (i = 0; i < count; i++) crypto_unregister_alg(&algs[i]); } EXPORT_SYMBOL_GPL(crypto_unregister_algs); int crypto_register_template(struct crypto_template *tmpl) { struct crypto_template *q; int err = -EEXIST; INIT_WORK(&tmpl->free_work, crypto_destroy_instance_workfn); down_write(&crypto_alg_sem); crypto_check_module_sig(tmpl->module); list_for_each_entry(q, &crypto_template_list, list) { if (q == tmpl) goto out; } list_add(&tmpl->list, &crypto_template_list); err = 0; out: up_write(&crypto_alg_sem); return err; } EXPORT_SYMBOL_GPL(crypto_register_template); int crypto_register_templates(struct crypto_template *tmpls, int count) { int i, err; for (i = 0; i < count; i++) { err = crypto_register_template(&tmpls[i]); if (err) goto out; } return 0; out: for (--i; i >= 0; --i) crypto_unregister_template(&tmpls[i]); return err; } EXPORT_SYMBOL_GPL(crypto_register_templates); void crypto_unregister_template(struct crypto_template *tmpl) { struct crypto_instance *inst; struct hlist_node *n; struct hlist_head *list; LIST_HEAD(users); down_write(&crypto_alg_sem); BUG_ON(list_empty(&tmpl->list)); list_del_init(&tmpl->list); list = &tmpl->instances; hlist_for_each_entry(inst, list, list) { int err = crypto_remove_alg(&inst->alg, &users); BUG_ON(err); } up_write(&crypto_alg_sem); hlist_for_each_entry_safe(inst, n, list, list) { BUG_ON(refcount_read(&inst->alg.cra_refcnt) != 1); crypto_free_instance(inst); } crypto_remove_final(&users); flush_work(&tmpl->free_work); } EXPORT_SYMBOL_GPL(crypto_unregister_template); void crypto_unregister_templates(struct crypto_template *tmpls, int count) { int i; for (i = count - 1; i >= 0; --i) crypto_unregister_template(&tmpls[i]); } EXPORT_SYMBOL_GPL(crypto_unregister_templates); static struct crypto_template *__crypto_lookup_template(const char *name) { struct crypto_template *q, *tmpl = NULL; down_read(&crypto_alg_sem); list_for_each_entry(q, &crypto_template_list, list) { if (strcmp(q->name, name)) continue; if (unlikely(!crypto_tmpl_get(q))) continue; tmpl = q; break; } up_read(&crypto_alg_sem); return tmpl; } struct crypto_template *crypto_lookup_template(const char *name) { return try_then_request_module(__crypto_lookup_template(name), "crypto-%s", name); } EXPORT_SYMBOL_GPL(crypto_lookup_template); int crypto_register_instance(struct crypto_template *tmpl, struct crypto_instance *inst) { struct crypto_larval *larval; struct crypto_spawn *spawn; u32 fips_internal = 0; LIST_HEAD(algs_to_put); int err; err = crypto_check_alg(&inst->alg); if (err) return err; inst->alg.cra_module = tmpl->module; inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE; inst->alg.cra_destroy = crypto_destroy_instance; down_write(&crypto_alg_sem); larval = ERR_PTR(-EAGAIN); for (spawn = inst->spawns; spawn;) { struct crypto_spawn *next; if (spawn->dead) goto unlock; next = spawn->next; spawn->inst = inst; spawn->registered = true; fips_internal |= spawn->alg->cra_flags; crypto_mod_put(spawn->alg); spawn = next; } inst->alg.cra_flags |= (fips_internal & CRYPTO_ALG_FIPS_INTERNAL); larval = __crypto_register_alg(&inst->alg, &algs_to_put); if (IS_ERR(larval)) goto unlock; else if (larval) larval->test_started = true; hlist_add_head(&inst->list, &tmpl->instances); inst->tmpl = tmpl; unlock: up_write(&crypto_alg_sem); if (IS_ERR(larval)) return PTR_ERR(larval); if (larval) crypto_schedule_test(larval); else crypto_remove_final(&algs_to_put); return 0; } EXPORT_SYMBOL_GPL(crypto_register_instance); void crypto_unregister_instance(struct crypto_instance *inst) { LIST_HEAD(list); down_write(&crypto_alg_sem); crypto_remove_spawns(&inst->alg, &list, NULL); crypto_remove_instance(inst, &list); up_write(&crypto_alg_sem); crypto_remove_final(&list); } EXPORT_SYMBOL_GPL(crypto_unregister_instance); int crypto_grab_spawn(struct crypto_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { struct crypto_alg *alg; int err = -EAGAIN; if (WARN_ON_ONCE(inst == NULL)) return -EINVAL; /* Allow the result of crypto_attr_alg_name() to be passed directly */ if (IS_ERR(name)) return PTR_ERR(name); alg = crypto_find_alg(name, spawn->frontend, type | CRYPTO_ALG_FIPS_INTERNAL, mask); if (IS_ERR(alg)) return PTR_ERR(alg); down_write(&crypto_alg_sem); if (!crypto_is_moribund(alg)) { list_add(&spawn->list, &alg->cra_users); spawn->alg = alg; spawn->mask = mask; spawn->next = inst->spawns; inst->spawns = spawn; inst->alg.cra_flags |= (alg->cra_flags & CRYPTO_ALG_INHERITED_FLAGS); err = 0; } up_write(&crypto_alg_sem); if (err) crypto_mod_put(alg); return err; } EXPORT_SYMBOL_GPL(crypto_grab_spawn); void crypto_drop_spawn(struct crypto_spawn *spawn) { if (!spawn->alg) /* not yet initialized? */ return; down_write(&crypto_alg_sem); if (!spawn->dead) list_del(&spawn->list); up_write(&crypto_alg_sem); if (!spawn->registered) crypto_mod_put(spawn->alg); } EXPORT_SYMBOL_GPL(crypto_drop_spawn); static struct crypto_alg *crypto_spawn_alg(struct crypto_spawn *spawn) { struct crypto_alg *alg = ERR_PTR(-EAGAIN); struct crypto_alg *target; bool shoot = false; down_read(&crypto_alg_sem); if (!spawn->dead) { alg = spawn->alg; if (!crypto_mod_get(alg)) { target = crypto_alg_get(alg); shoot = true; alg = ERR_PTR(-EAGAIN); } } up_read(&crypto_alg_sem); if (shoot) { crypto_shoot_alg(target); crypto_alg_put(target); } return alg; } struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type, u32 mask) { struct crypto_alg *alg; struct crypto_tfm *tfm; alg = crypto_spawn_alg(spawn); if (IS_ERR(alg)) return ERR_CAST(alg); tfm = ERR_PTR(-EINVAL); if (unlikely((alg->cra_flags ^ type) & mask)) goto out_put_alg; tfm = __crypto_alloc_tfm(alg, type, mask); if (IS_ERR(tfm)) goto out_put_alg; return tfm; out_put_alg: crypto_mod_put(alg); return tfm; } EXPORT_SYMBOL_GPL(crypto_spawn_tfm); void *crypto_spawn_tfm2(struct crypto_spawn *spawn) { struct crypto_alg *alg; struct crypto_tfm *tfm; alg = crypto_spawn_alg(spawn); if (IS_ERR(alg)) return ERR_CAST(alg); tfm = crypto_create_tfm(alg, spawn->frontend); if (IS_ERR(tfm)) goto out_put_alg; return tfm; out_put_alg: crypto_mod_put(alg); return tfm; } EXPORT_SYMBOL_GPL(crypto_spawn_tfm2); int crypto_register_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&crypto_chain, nb); } EXPORT_SYMBOL_GPL(crypto_register_notifier); int crypto_unregister_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&crypto_chain, nb); } EXPORT_SYMBOL_GPL(crypto_unregister_notifier); struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb) { struct rtattr *rta = tb[0]; struct crypto_attr_type *algt; if (!rta) return ERR_PTR(-ENOENT); if (RTA_PAYLOAD(rta) < sizeof(*algt)) return ERR_PTR(-EINVAL); if (rta->rta_type != CRYPTOA_TYPE) return ERR_PTR(-EINVAL); algt = RTA_DATA(rta); return algt; } EXPORT_SYMBOL_GPL(crypto_get_attr_type); /** * crypto_check_attr_type() - check algorithm type and compute inherited mask * @tb: the template parameters * @type: the algorithm type the template would be instantiated as * @mask_ret: (output) the mask that should be passed to crypto_grab_*() * to restrict the flags of any inner algorithms * * Validate that the algorithm type the user requested is compatible with the * one the template would actually be instantiated as. E.g., if the user is * doing crypto_alloc_shash("cbc(aes)", ...), this would return an error because * the "cbc" template creates an "skcipher" algorithm, not an "shash" algorithm. * * Also compute the mask to use to restrict the flags of any inner algorithms. * * Return: 0 on success; -errno on failure */ int crypto_check_attr_type(struct rtattr **tb, u32 type, u32 *mask_ret) { struct crypto_attr_type *algt; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) return PTR_ERR(algt); if ((algt->type ^ type) & algt->mask) return -EINVAL; *mask_ret = crypto_algt_inherited_mask(algt); return 0; } EXPORT_SYMBOL_GPL(crypto_check_attr_type); const char *crypto_attr_alg_name(struct rtattr *rta) { struct crypto_attr_alg *alga; if (!rta) return ERR_PTR(-ENOENT); if (RTA_PAYLOAD(rta) < sizeof(*alga)) return ERR_PTR(-EINVAL); if (rta->rta_type != CRYPTOA_ALG) return ERR_PTR(-EINVAL); alga = RTA_DATA(rta); alga->name[CRYPTO_MAX_ALG_NAME - 1] = 0; return alga->name; } EXPORT_SYMBOL_GPL(crypto_attr_alg_name); int __crypto_inst_setname(struct crypto_instance *inst, const char *name, const char *driver, struct crypto_alg *alg) { if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", name, alg->cra_name) >= CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", driver, alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; return 0; } EXPORT_SYMBOL_GPL(__crypto_inst_setname); void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen) { INIT_LIST_HEAD(&queue->list); queue->backlog = &queue->list; queue->qlen = 0; queue->max_qlen = max_qlen; } EXPORT_SYMBOL_GPL(crypto_init_queue); int crypto_enqueue_request(struct crypto_queue *queue, struct crypto_async_request *request) { int err = -EINPROGRESS; if (unlikely(queue->qlen >= queue->max_qlen)) { if (!(request->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { err = -ENOSPC; goto out; } err = -EBUSY; if (queue->backlog == &queue->list) queue->backlog = &request->list; } queue->qlen++; list_add_tail(&request->list, &queue->list); out: return err; } EXPORT_SYMBOL_GPL(crypto_enqueue_request); void crypto_enqueue_request_head(struct crypto_queue *queue, struct crypto_async_request *request) { if (unlikely(queue->qlen >= queue->max_qlen)) queue->backlog = queue->backlog->prev; queue->qlen++; list_add(&request->list, &queue->list); } EXPORT_SYMBOL_GPL(crypto_enqueue_request_head); struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue) { struct list_head *request; if (unlikely(!queue->qlen)) return NULL; queue->qlen--; if (queue->backlog != &queue->list) queue->backlog = queue->backlog->next; request = queue->list.next; list_del_init(request); return list_entry(request, struct crypto_async_request, list); } EXPORT_SYMBOL_GPL(crypto_dequeue_request); static inline void crypto_inc_byte(u8 *a, unsigned int size) { u8 *b = (a + size); u8 c; for (; size; size--) { c = *--b + 1; *b = c; if (c) break; } } void crypto_inc(u8 *a, unsigned int size) { __be32 *b = (__be32 *)(a + size); u32 c; if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || IS_ALIGNED((unsigned long)b, __alignof__(*b))) for (; size >= 4; size -= 4) { c = be32_to_cpu(*--b) + 1; *b = cpu_to_be32(c); if (likely(c)) return; } crypto_inc_byte(a, size); } EXPORT_SYMBOL_GPL(crypto_inc); unsigned int crypto_alg_extsize(struct crypto_alg *alg) { return alg->cra_ctxsize + (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1)); } EXPORT_SYMBOL_GPL(crypto_alg_extsize); int crypto_type_has_alg(const char *name, const struct crypto_type *frontend, u32 type, u32 mask) { int ret = 0; struct crypto_alg *alg = crypto_find_alg(name, frontend, type, mask); if (!IS_ERR(alg)) { crypto_mod_put(alg); ret = 1; } return ret; } EXPORT_SYMBOL_GPL(crypto_type_has_alg); static void __init crypto_start_tests(void) { if (!IS_BUILTIN(CONFIG_CRYPTO_ALGAPI)) return; if (!IS_ENABLED(CONFIG_CRYPTO_SELFTESTS)) return; set_crypto_boot_test_finished(); for (;;) { struct crypto_larval *larval = NULL; struct crypto_alg *q; down_write(&crypto_alg_sem); list_for_each_entry(q, &crypto_alg_list, cra_list) { struct crypto_larval *l; if (!crypto_is_larval(q)) continue; l = (void *)q; if (!crypto_is_test_larval(l)) continue; if (l->test_started) continue; l->test_started = true; larval = l; break; } up_write(&crypto_alg_sem); if (!larval) break; crypto_schedule_test(larval); } } static int __init crypto_algapi_init(void) { crypto_init_proc(); crypto_start_tests(); return 0; } static void __exit crypto_algapi_exit(void) { crypto_exit_proc(); } /* * We run this at late_initcall so that all the built-in algorithms * have had a chance to register themselves first. */ late_initcall(crypto_algapi_init); module_exit(crypto_algapi_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cryptographic algorithms API"); MODULE_SOFTDEP("pre: cryptomgr");
1930 1 2196 185 2282 2277 2292 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_LIST_BL_H #define _LINUX_LIST_BL_H #include <linux/list.h> #include <linux/bit_spinlock.h> /* * Special version of lists, where head of the list has a lock in the lowest * bit. This is useful for scalable hash tables without increasing memory * footprint overhead. * * For modification operations, the 0 bit of hlist_bl_head->first * pointer must be set. * * With some small modifications, this can easily be adapted to store several * arbitrary bits (not just a single lock bit), if the need arises to store * some fast and compact auxiliary data. */ #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) #define LIST_BL_LOCKMASK 1UL #else #define LIST_BL_LOCKMASK 0UL #endif #ifdef CONFIG_DEBUG_LIST #define LIST_BL_BUG_ON(x) BUG_ON(x) #else #define LIST_BL_BUG_ON(x) #endif struct hlist_bl_head { struct hlist_bl_node *first; }; struct hlist_bl_node { struct hlist_bl_node *next, **pprev; }; #define INIT_HLIST_BL_HEAD(ptr) \ ((ptr)->first = NULL) static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) { h->next = NULL; h->pprev = NULL; } #define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member) static inline bool hlist_bl_unhashed(const struct hlist_bl_node *h) { return !h->pprev; } static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h) { return (struct hlist_bl_node *) ((unsigned long)h->first & ~LIST_BL_LOCKMASK); } static inline void hlist_bl_set_first(struct hlist_bl_head *h, struct hlist_bl_node *n) { LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) != LIST_BL_LOCKMASK); h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK); } static inline bool hlist_bl_empty(const struct hlist_bl_head *h) { return !((unsigned long)READ_ONCE(h->first) & ~LIST_BL_LOCKMASK); } static inline void hlist_bl_add_head(struct hlist_bl_node *n, struct hlist_bl_head *h) { struct hlist_bl_node *first = hlist_bl_first(h); n->next = first; if (first) first->pprev = &n->next; n->pprev = &h->first; hlist_bl_set_first(h, n); } static inline void hlist_bl_add_before(struct hlist_bl_node *n, struct hlist_bl_node *next) { struct hlist_bl_node **pprev = next->pprev; n->pprev = pprev; n->next = next; next->pprev = &n->next; /* pprev may be `first`, so be careful not to lose the lock bit */ WRITE_ONCE(*pprev, (struct hlist_bl_node *) ((uintptr_t)n | ((uintptr_t)*pprev & LIST_BL_LOCKMASK))); } static inline void hlist_bl_add_behind(struct hlist_bl_node *n, struct hlist_bl_node *prev) { n->next = prev->next; n->pprev = &prev->next; prev->next = n; if (n->next) n->next->pprev = &n->next; } static inline void __hlist_bl_del(struct hlist_bl_node *n) { struct hlist_bl_node *next = n->next; struct hlist_bl_node **pprev = n->pprev; LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); /* pprev may be `first`, so be careful not to lose the lock bit */ WRITE_ONCE(*pprev, (struct hlist_bl_node *) ((unsigned long)next | ((unsigned long)*pprev & LIST_BL_LOCKMASK))); if (next) next->pprev = pprev; } static inline void hlist_bl_del(struct hlist_bl_node *n) { __hlist_bl_del(n); n->next = LIST_POISON1; n->pprev = LIST_POISON2; } static inline void hlist_bl_del_init(struct hlist_bl_node *n) { if (!hlist_bl_unhashed(n)) { __hlist_bl_del(n); INIT_HLIST_BL_NODE(n); } } static inline void hlist_bl_lock(struct hlist_bl_head *b) { bit_spin_lock(0, (unsigned long *)b); } static inline void hlist_bl_unlock(struct hlist_bl_head *b) { __bit_spin_unlock(0, (unsigned long *)b); } static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) { return bit_spin_is_locked(0, (unsigned long *)b); } /** * hlist_bl_for_each_entry - iterate over list of given type * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_node to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * */ #define hlist_bl_for_each_entry(tpos, pos, head, member) \ for (pos = hlist_bl_first(head); \ pos && \ ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \ pos = pos->next) /** * hlist_bl_for_each_entry_safe - iterate over list of given type safe against removal of list entry * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_node to use as a loop cursor. * @n: another &struct hlist_node to use as temporary storage * @head: the head for your list. * @member: the name of the hlist_node within the struct. */ #define hlist_bl_for_each_entry_safe(tpos, pos, n, head, member) \ for (pos = hlist_bl_first(head); \ pos && ({ n = pos->next; 1; }) && \ ({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \ pos = n) #endif
3017 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 /* SPDX-License-Identifier: GPL-2.0+ */ /* * RCU-based infrastructure for lightweight reader-writer locking * * Copyright (c) 2015, Red Hat, Inc. * * Author: Oleg Nesterov <oleg@redhat.com> */ #ifndef _LINUX_RCU_SYNC_H_ #define _LINUX_RCU_SYNC_H_ #include <linux/wait.h> #include <linux/rcupdate.h> /* Structure to mediate between updaters and fastpath-using readers. */ struct rcu_sync { int gp_state; int gp_count; wait_queue_head_t gp_wait; struct rcu_head cb_head; }; /** * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? * @rsp: Pointer to rcu_sync structure to use for synchronization * * Returns true if readers are permitted to use their fastpaths. Must be * invoked within some flavor of RCU read-side critical section. */ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) { RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), "suspicious rcu_sync_is_idle() usage"); return !READ_ONCE(rsp->gp_state); /* GP_IDLE */ } extern void rcu_sync_init(struct rcu_sync *); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); extern void rcu_sync_dtor(struct rcu_sync *); #define __RCU_SYNC_INITIALIZER(name) { \ .gp_state = 0, \ .gp_count = 0, \ .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ } #define DEFINE_RCU_SYNC(name) \ struct rcu_sync name = __RCU_SYNC_INITIALIZER(name) #endif /* _LINUX_RCU_SYNC_H_ */
6 7 6 14 7 9 9 9 7 7 6 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * linux/include/linux/jbd2.h * * Written by Stephen C. Tweedie <sct@redhat.com> * * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved * * Definitions for transaction data structures for the buffer cache * filesystem journaling support. */ #ifndef _LINUX_JBD2_H #define _LINUX_JBD2_H /* Allow this file to be included directly into e2fsprogs */ #ifndef __KERNEL__ #include "jfs_compat.h" #define JBD2_DEBUG #else #include <linux/types.h> #include <linux/buffer_head.h> #include <linux/journal-head.h> #include <linux/stddef.h> #include <linux/mutex.h> #include <linux/timer.h> #include <linux/slab.h> #include <linux/bit_spinlock.h> #include <linux/blkdev.h> #include <linux/crc32c.h> #endif #define journal_oom_retry 1 /* * Define JBD2_PARANIOD_IOFAIL to cause a kernel BUG() if ext4 finds * certain classes of error which can occur due to failed IOs. Under * normal use we want ext4 to continue after such errors, because * hardware _can_ fail, but for debugging purposes when running tests on * known-good hardware we may want to trap these errors. */ #undef JBD2_PARANOID_IOFAIL /* * The default maximum commit age, in seconds. */ #define JBD2_DEFAULT_MAX_COMMIT_AGE 5 #ifdef CONFIG_JBD2_DEBUG /* * Define JBD2_EXPENSIVE_CHECKING to enable more expensive internal * consistency checks. By default we don't do this unless * CONFIG_JBD2_DEBUG is on. */ #define JBD2_EXPENSIVE_CHECKING void __jbd2_debug(int level, const char *file, const char *func, unsigned int line, const char *fmt, ...); #define jbd2_debug(n, fmt, a...) \ __jbd2_debug((n), __FILE__, __func__, __LINE__, (fmt), ##a) #else #define jbd2_debug(n, fmt, a...) no_printk(fmt, ##a) #endif extern void *jbd2_alloc(size_t size, gfp_t flags); extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 #define JBD2_DEFAULT_FAST_COMMIT_BLOCKS 256 #ifdef __KERNEL__ /** * typedef handle_t - The handle_t type represents a single atomic update being performed by some process. * * All filesystem modifications made by the process go * through this handle. Recursive operations (such as quota operations) * are gathered into a single update. * * The buffer credits field is used to account for journaled buffers * being modified by the running process. To ensure that there is * enough log space for all outstanding operations, we need to limit the * number of outstanding buffers possible at any time. When the * operation completes, any buffer credits not used are credited back to * the transaction, so that at all times we know how many buffers the * outstanding updates on a transaction might possibly touch. * * This is an opaque datatype. **/ typedef struct jbd2_journal_handle handle_t; /* Atomic operation type */ /** * typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem. * * journal_t is linked to from the fs superblock structure. * * We use the journal_t to keep track of all outstanding transaction * activity on the filesystem, and to manage the state of the log * writing process. * * This is an opaque datatype. **/ typedef struct journal_s journal_t; /* Journal control structure */ #endif /* * Internal structures used by the logging mechanism: */ #define JBD2_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */ /* * On-disk structures */ /* * Descriptor block types: */ #define JBD2_DESCRIPTOR_BLOCK 1 #define JBD2_COMMIT_BLOCK 2 #define JBD2_SUPERBLOCK_V1 3 #define JBD2_SUPERBLOCK_V2 4 #define JBD2_REVOKE_BLOCK 5 /* * Standard header for all descriptor blocks: */ typedef struct journal_header_s { __be32 h_magic; __be32 h_blocktype; __be32 h_sequence; } journal_header_t; /* * Checksum types. */ #define JBD2_CRC32_CHKSUM 1 #define JBD2_MD5_CHKSUM 2 #define JBD2_SHA1_CHKSUM 3 #define JBD2_CRC32C_CHKSUM 4 #define JBD2_CRC32_CHKSUM_SIZE 4 #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32)) /* * Commit block header for storing transactional checksums: * * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum* * fields are used to store a checksum of the descriptor and data blocks. * * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum * field is used to store crc32c(uuid+commit_block). Each journal metadata * block gets its own checksum, and data block checksums are stored in * journal_block_tag (in the descriptor). The other h_chksum* fields are * not used. * * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses * journal_block_tag3_t to store a full 32-bit checksum. Everything else * is the same as v2. * * Checksum v1, v2, and v3 are mutually exclusive features. */ struct commit_header { __be32 h_magic; __be32 h_blocktype; __be32 h_sequence; unsigned char h_chksum_type; unsigned char h_chksum_size; unsigned char h_padding[2]; __be32 h_chksum[JBD2_CHECKSUM_BYTES]; __be64 h_commit_sec; __be32 h_commit_nsec; }; /* * The block tag: used to describe a single buffer in the journal. * t_blocknr_high is only used if INCOMPAT_64BIT is set, so this * raw struct shouldn't be used for pointer math or sizeof() - use * journal_tag_bytes(journal) instead to compute this. */ typedef struct journal_block_tag3_s { __be32 t_blocknr; /* The on-disk block number */ __be32 t_flags; /* See below */ __be32 t_blocknr_high; /* most-significant high 32bits. */ __be32 t_checksum; /* crc32c(uuid+seq+block) */ } journal_block_tag3_t; typedef struct journal_block_tag_s { __be32 t_blocknr; /* The on-disk block number */ __be16 t_checksum; /* truncated crc32c(uuid+seq+block) */ __be16 t_flags; /* See below */ __be32 t_blocknr_high; /* most-significant high 32bits. */ } journal_block_tag_t; /* Tail of descriptor or revoke block, for checksumming */ struct jbd2_journal_block_tail { __be32 t_checksum; /* crc32c(uuid+descr_block) */ }; /* * The revoke descriptor: used on disk to describe a series of blocks to * be revoked from the log */ typedef struct jbd2_journal_revoke_header_s { journal_header_t r_header; __be32 r_count; /* Count of bytes used in the block */ } jbd2_journal_revoke_header_t; /* Definitions for the journal tag flags word: */ #define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */ #define JBD2_FLAG_SAME_UUID 2 /* block has same uuid as previous */ #define JBD2_FLAG_DELETED 4 /* block deleted by this transaction */ #define JBD2_FLAG_LAST_TAG 8 /* last tag in this descriptor block */ /* * The journal superblock. All fields are in big-endian byte order. */ typedef struct journal_superblock_s { /* 0x0000 */ journal_header_t s_header; /* 0x000C */ /* Static information describing the journal */ __be32 s_blocksize; /* journal device blocksize */ __be32 s_maxlen; /* total blocks in journal file */ __be32 s_first; /* first block of log information */ /* 0x0018 */ /* Dynamic information describing the current state of the log */ __be32 s_sequence; /* first commit ID expected in log */ __be32 s_start; /* blocknr of start of log */ /* 0x0020 */ /* Error value, as set by jbd2_journal_abort(). */ __be32 s_errno; /* 0x0024 */ /* Remaining fields are only valid in a version-2 superblock */ __be32 s_feature_compat; /* compatible feature set */ __be32 s_feature_incompat; /* incompatible feature set */ __be32 s_feature_ro_compat; /* readonly-compatible feature set */ /* 0x0030 */ __u8 s_uuid[16]; /* 128-bit uuid for journal */ /* 0x0040 */ __be32 s_nr_users; /* Nr of filesystems sharing log */ __be32 s_dynsuper; /* Blocknr of dynamic superblock copy*/ /* 0x0048 */ __be32 s_max_transaction; /* Limit of journal blocks per trans.*/ __be32 s_max_trans_data; /* Limit of data blocks per trans. */ /* 0x0050 */ __u8 s_checksum_type; /* checksum type */ __u8 s_padding2[3]; /* 0x0054 */ __be32 s_num_fc_blks; /* Number of fast commit blocks */ __be32 s_head; /* blocknr of head of log, only uptodate * while the filesystem is clean */ /* 0x005C */ __u32 s_padding[40]; __be32 s_checksum; /* crc32c(superblock) */ /* 0x0100 */ __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ /* 0x0400 */ } journal_superblock_t; #define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001 #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 #define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 #define JBD2_FEATURE_INCOMPAT_FAST_COMMIT 0x00000020 /* See "journal feature predicate functions" below */ /* Features known to this kernel version: */ #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ JBD2_FEATURE_INCOMPAT_64BIT | \ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ JBD2_FEATURE_INCOMPAT_CSUM_V2 | \ JBD2_FEATURE_INCOMPAT_CSUM_V3 | \ JBD2_FEATURE_INCOMPAT_FAST_COMMIT) #ifdef __KERNEL__ #include <linux/fs.h> #include <linux/sched.h> enum jbd_state_bits { BH_JBD /* Has an attached ext3 journal_head */ = BH_PrivateStart, BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ BH_Freed, /* Has been freed (truncated) */ BH_Revoked, /* Has been revoked from the log */ BH_RevokeValid, /* Revoked flag is valid */ BH_JBDDirty, /* Is dirty but journaled */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ BH_Shadow, /* IO on shadow buffer is running */ BH_Verified, /* Metadata block has been verified ok */ BH_JBDPrivateStart, /* First bit available for private use by FS */ }; BUFFER_FNS(JBD, jbd) BUFFER_FNS(JWrite, jwrite) BUFFER_FNS(JBDDirty, jbddirty) TAS_BUFFER_FNS(JBDDirty, jbddirty) BUFFER_FNS(Revoked, revoked) TAS_BUFFER_FNS(Revoked, revoked) BUFFER_FNS(RevokeValid, revokevalid) TAS_BUFFER_FNS(RevokeValid, revokevalid) BUFFER_FNS(Freed, freed) BUFFER_FNS(Shadow, shadow) BUFFER_FNS(Verified, verified) static inline struct buffer_head *jh2bh(struct journal_head *jh) { return jh->b_bh; } static inline struct journal_head *bh2jh(struct buffer_head *bh) { return bh->b_private; } static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) { bit_spin_lock(BH_JournalHead, &bh->b_state); } static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) { bit_spin_unlock(BH_JournalHead, &bh->b_state); } #define J_ASSERT(assert) BUG_ON(!(assert)) #define J_ASSERT_BH(bh, expr) J_ASSERT(expr) #define J_ASSERT_JH(jh, expr) J_ASSERT(expr) #if defined(JBD2_PARANOID_IOFAIL) #define J_EXPECT(expr, why...) J_ASSERT(expr) #define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) #define J_EXPECT_JH(jh, expr, why...) J_ASSERT_JH(jh, expr) #else #define __journal_expect(expr, why...) \ ({ \ int val = (expr); \ if (!val) { \ printk(KERN_ERR \ "JBD2 unexpected failure: %s: %s;\n", \ __func__, #expr); \ printk(KERN_ERR why "\n"); \ } \ val; \ }) #define J_EXPECT(expr, why...) __journal_expect(expr, ## why) #define J_EXPECT_BH(bh, expr, why...) __journal_expect(expr, ## why) #define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) #endif /* Flags in jbd_inode->i_flags */ #define __JI_COMMIT_RUNNING 0 #define __JI_WRITE_DATA 1 #define __JI_WAIT_DATA 2 /* * Commit of the inode data in progress. We use this flag to protect us from * concurrent deletion of inode. We cannot use reference to inode for this * since we cannot afford doing last iput() on behalf of kjournald */ #define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING) /* Write allocated dirty buffers in this inode before commit */ #define JI_WRITE_DATA (1 << __JI_WRITE_DATA) /* Wait for outstanding data writes for this inode before commit */ #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** * struct jbd2_inode - The jbd_inode type is the structure linking inodes in * ordered mode present in a transaction so that we can sync them during commit. */ struct jbd2_inode { /** * @i_transaction: * * Which transaction does this inode belong to? Either the running * transaction or the committing one. [j_list_lock] */ transaction_t *i_transaction; /** * @i_next_transaction: * * Pointer to the running transaction modifying inode's data in case * there is already a committing transaction touching it. [j_list_lock] */ transaction_t *i_next_transaction; /** * @i_list: List of inodes in the i_transaction [j_list_lock] */ struct list_head i_list; /** * @i_vfs_inode: * * VFS inode this inode belongs to [constant for lifetime of structure] */ struct inode *i_vfs_inode; /** * @i_flags: Flags of inode [j_list_lock] */ unsigned long i_flags; /** * @i_dirty_start: * * Offset in bytes where the dirty range for this inode starts. * [j_list_lock] */ loff_t i_dirty_start; /** * @i_dirty_end: * * Inclusive offset in bytes where the dirty range for this inode * ends. [j_list_lock] */ loff_t i_dirty_end; }; struct jbd2_revoke_table_s; /** * struct jbd2_journal_handle - The jbd2_journal_handle type is the concrete * type associated with handle_t. * @h_transaction: Which compound transaction is this update a part of? * @h_journal: Which journal handle belongs to - used iff h_reserved set. * @h_rsv_handle: Handle reserved for finishing the logical operation. * @h_total_credits: Number of remaining buffers we are allowed to add to * journal. These are dirty buffers and revoke descriptor blocks. * @h_revoke_credits: Number of remaining revoke records available for handle * @h_ref: Reference count on this handle. * @h_err: Field for caller's use to track errors through large fs operations. * @h_sync: Flag for sync-on-close. * @h_reserved: Flag for handle for reserved credits. * @h_aborted: Flag indicating fatal error on handle. * @h_type: For handle statistics. * @h_line_no: For handle statistics. * @h_start_jiffies: Handle Start time. * @h_requested_credits: Holds @h_total_credits after handle is started. * @h_revoke_credits_requested: Holds @h_revoke_credits after handle is started. * @saved_alloc_context: Saved context while transaction is open. **/ /* Docbook can't yet cope with the bit fields, but will leave the documentation * in so it can be fixed later. */ struct jbd2_journal_handle { union { transaction_t *h_transaction; /* Which journal handle belongs to - used iff h_reserved set */ journal_t *h_journal; }; handle_t *h_rsv_handle; int h_total_credits; int h_revoke_credits; int h_revoke_credits_requested; int h_ref; int h_err; /* Flags [no locking] */ unsigned int h_sync: 1; unsigned int h_reserved: 1; unsigned int h_aborted: 1; unsigned int h_type: 8; unsigned int h_line_no: 16; unsigned long h_start_jiffies; unsigned int h_requested_credits; unsigned int saved_alloc_context; }; /* * Some stats for checkpoint phase */ struct transaction_chp_stats_s { unsigned long cs_chp_time; __u32 cs_forced_to_close; __u32 cs_written; __u32 cs_dropped; }; /* The transaction_t type is the guts of the journaling mechanism. It * tracks a compound transaction through its various states: * * RUNNING: accepting new updates * LOCKED: Updates still running but we don't accept new ones * RUNDOWN: Updates are tidying up but have finished requesting * new buffers to modify (state not used for now) * FLUSH: All updates complete, but we are still writing to disk * COMMIT: All data on disk, writing commit record * FINISHED: We still have to keep the transaction for checkpointing. * * The transaction keeps track of all of the buffers modified by a * running transaction, and all of the buffers committed but not yet * flushed to home for finished transactions. * (Locking Documentation improved by LockDoc) */ /* * Lock ranking: * * j_list_lock * ->jbd_lock_bh_journal_head() (This is "innermost") * * j_state_lock * ->b_state_lock * * b_state_lock * ->j_list_lock * * j_state_lock * ->j_list_lock (journal_unmap_buffer) * */ struct transaction_s { /* Pointer to the journal for this transaction. [no locking] */ journal_t *t_journal; /* Sequence number for this transaction [no locking] */ tid_t t_tid; /* * Transaction's current state * [no locking - only kjournald2 alters this] * [j_list_lock] guards transition of a transaction into T_FINISHED * state and subsequent call of __jbd2_journal_drop_transaction() * FIXME: needs barriers * KLUDGE: [use j_state_lock] */ enum { T_RUNNING, T_LOCKED, T_SWITCH, T_FLUSH, T_COMMIT, T_COMMIT_DFLUSH, T_COMMIT_JFLUSH, T_COMMIT_CALLBACK, T_FINISHED } t_state; /* * Where in the log does this transaction's commit start? [no locking] */ unsigned long t_log_start; /* * Number of buffers on the t_buffers list [j_list_lock, no locks * needed for jbd2 thread] */ int t_nr_buffers; /* * Doubly-linked circular list of all buffers reserved but not yet * modified by this transaction [j_list_lock, no locks needed fo * jbd2 thread] */ struct journal_head *t_reserved_list; /* * Doubly-linked circular list of all metadata buffers owned by this * transaction [j_list_lock, no locks needed for jbd2 thread] */ struct journal_head *t_buffers; /* * Doubly-linked circular list of all forget buffers (superseded * buffers which we can un-checkpoint once this transaction commits) * [j_list_lock] */ struct journal_head *t_forget; /* * Doubly-linked circular list of all buffers still to be flushed before * this transaction can be checkpointed. [j_list_lock] */ struct journal_head *t_checkpoint_list; /* * Doubly-linked circular list of metadata buffers being * shadowed by log IO. The IO buffers on the iobuf list and * the shadow buffers on this list match each other one for * one at all times. [j_list_lock, no locks needed for jbd2 * thread] */ struct journal_head *t_shadow_list; /* * List of inodes associated with the transaction; e.g., ext4 uses * this to track inodes in data=ordered and data=journal mode that * need special handling on transaction commit; also used by ocfs2. * [j_list_lock] */ struct list_head t_inode_list; /* * Longest time some handle had to wait for running transaction */ unsigned long t_max_wait; /* * When transaction started */ unsigned long t_start; /* * When commit was requested [j_state_lock] */ unsigned long t_requested; /* * Checkpointing stats [j_list_lock] */ struct transaction_chp_stats_s t_chp_stats; /* * Number of outstanding updates running on this transaction * [none] */ atomic_t t_updates; /* * Number of blocks reserved for this transaction in the journal. * This is including all credits reserved when starting transaction * handles as well as all journal descriptor blocks needed for this * transaction. [none] */ atomic_t t_outstanding_credits; /* * Number of revoke records for this transaction added by already * stopped handles. [none] */ atomic_t t_outstanding_revokes; /* * How many handles used this transaction? [none] */ atomic_t t_handle_count; /* * Forward and backward links for the circular list of all transactions * awaiting checkpoint. [j_list_lock] */ transaction_t *t_cpnext, *t_cpprev; /* * When will the transaction expire (become due for commit), in jiffies? * [no locking] */ unsigned long t_expires; /* * When this transaction started, in nanoseconds [no locking] */ ktime_t t_start_time; /* * This transaction is being forced and some process is * waiting for it to finish. */ unsigned int t_synchronous_commit:1; /* Disk flush needs to be sent to fs partition [no locking] */ int t_need_data_flush; }; struct transaction_run_stats_s { unsigned long rs_wait; unsigned long rs_request_delay; unsigned long rs_running; unsigned long rs_locked; unsigned long rs_flushing; unsigned long rs_logging; __u32 rs_handle_count; __u32 rs_blocks; __u32 rs_blocks_logged; }; struct transaction_stats_s { unsigned long ts_tid; unsigned long ts_requested; struct transaction_run_stats_s run; }; static inline unsigned long jbd2_time_diff(unsigned long start, unsigned long end) { if (end >= start) return end - start; return end + (MAX_JIFFY_OFFSET - start); } #define JBD2_NR_BATCH 64 enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; #define JBD2_FC_REPLAY_STOP 0 #define JBD2_FC_REPLAY_CONTINUE 1 /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. */ struct journal_s { /** * @j_flags: General journaling state flags [j_state_lock, * no lock for quick racy checks] */ unsigned long j_flags; /** * @j_errno: * * Is there an outstanding uncleared error on the journal (from a prior * abort)? [j_state_lock] */ int j_errno; /** * @j_abort_mutex: Lock the whole aborting procedure. */ struct mutex j_abort_mutex; /** * @j_sb_buffer: The first part of the superblock buffer. */ struct buffer_head *j_sb_buffer; /** * @j_superblock: The second part of the superblock buffer. */ journal_superblock_t *j_superblock; /** * @j_state_lock: Protect the various scalars in the journal. */ rwlock_t j_state_lock; /** * @j_barrier_count: * * Number of processes waiting to create a barrier lock [j_state_lock, * no lock for quick racy checks] */ int j_barrier_count; /** * @j_barrier: The barrier lock itself. */ struct mutex j_barrier; /** * @j_running_transaction: * * Transactions: The current running transaction... * [j_state_lock, no lock for quick racy checks] [caller holding * open handle] */ transaction_t *j_running_transaction; /** * @j_committing_transaction: * * the transaction we are pushing to disk * [j_state_lock] [caller holding open handle] */ transaction_t *j_committing_transaction; /** * @j_checkpoint_transactions: * * ... and a linked circular list of all transactions waiting for * checkpointing. [j_list_lock] */ transaction_t *j_checkpoint_transactions; /** * @j_wait_transaction_locked: * * Wait queue for waiting for a locked transaction to start committing, * or for a barrier lock to be released. */ wait_queue_head_t j_wait_transaction_locked; /** * @j_wait_done_commit: Wait queue for waiting for commit to complete. */ wait_queue_head_t j_wait_done_commit; /** * @j_wait_commit: Wait queue to trigger commit. */ wait_queue_head_t j_wait_commit; /** * @j_wait_updates: Wait queue to wait for updates to complete. */ wait_queue_head_t j_wait_updates; /** * @j_wait_reserved: * * Wait queue to wait for reserved buffer credits to drop. */ wait_queue_head_t j_wait_reserved; /** * @j_fc_wait: * * Wait queue to wait for completion of async fast commits. */ wait_queue_head_t j_fc_wait; /** * @j_checkpoint_mutex: * * Semaphore for locking against concurrent checkpoints. */ struct mutex j_checkpoint_mutex; /** * @j_chkpt_bhs: * * List of buffer heads used by the checkpoint routine. This * was moved from jbd2_log_do_checkpoint() to reduce stack * usage. Access to this array is controlled by the * @j_checkpoint_mutex. [j_checkpoint_mutex] */ struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH]; /** * @j_shrinker: * * Journal head shrinker, reclaim buffer's journal head which * has been written back. */ struct shrinker *j_shrinker; /** * @j_checkpoint_jh_count: * * Number of journal buffers on the checkpoint list. [j_list_lock] */ struct percpu_counter j_checkpoint_jh_count; /** * @j_shrink_transaction: * * Record next transaction will shrink on the checkpoint list. * [j_list_lock] */ transaction_t *j_shrink_transaction; /** * @j_head: * * Journal head: identifies the first unused block in the journal. * [j_state_lock] */ unsigned long j_head; /** * @j_tail: * * Journal tail: identifies the oldest still-used block in the journal. * [j_state_lock] */ unsigned long j_tail; /** * @j_free: * * Journal free: how many free blocks are there in the journal? * [j_state_lock] */ unsigned long j_free; /** * @j_first: * * The block number of the first usable block in the journal * [j_state_lock]. */ unsigned long j_first; /** * @j_last: * * The block number one beyond the last usable block in the journal * [j_state_lock]. */ unsigned long j_last; /** * @j_fc_first: * * The block number of the first fast commit block in the journal * [j_state_lock]. */ unsigned long j_fc_first; /** * @j_fc_off: * * Number of fast commit blocks currently allocated. Accessed only * during fast commit. Currently only process can do fast commit, so * this field is not protected by any lock. */ unsigned long j_fc_off; /** * @j_fc_last: * * The block number one beyond the last fast commit block in the journal * [j_state_lock]. */ unsigned long j_fc_last; /** * @j_dev: Device where we store the journal. */ struct block_device *j_dev; /** * @j_blocksize: Block size for the location where we store the journal. */ int j_blocksize; /** * @j_blk_offset: * * Starting block offset into the device where we store the journal. */ unsigned long long j_blk_offset; /** * @j_devname: Journal device name. */ char j_devname[BDEVNAME_SIZE+24]; /** * @j_fs_dev: * * Device which holds the client fs. For internal journal this will be * equal to j_dev. */ struct block_device *j_fs_dev; /** * @j_fs_dev_wb_err: * * Records the errseq of the client fs's backing block device. */ errseq_t j_fs_dev_wb_err; /** * @j_total_len: Total maximum capacity of the journal region on disk. */ unsigned int j_total_len; /** * @j_reserved_credits: * * Number of buffers reserved from the running transaction. */ atomic_t j_reserved_credits; /** * @j_list_lock: Protects the buffer lists and internal buffer state. */ spinlock_t j_list_lock; /** * @j_inode: * * Optional inode where we store the journal. If present, all * journal block numbers are mapped into this inode via bmap(). */ struct inode *j_inode; /** * @j_tail_sequence: * * Sequence number of the oldest transaction in the log [j_state_lock] */ tid_t j_tail_sequence; /** * @j_transaction_sequence: * * Sequence number of the next transaction to grant [j_state_lock] */ tid_t j_transaction_sequence; /** * @j_commit_sequence: * * Sequence number of the most recently committed transaction * [j_state_lock, no lock for quick racy checks] */ tid_t j_commit_sequence; /** * @j_commit_request: * * Sequence number of the most recent transaction wanting commit * [j_state_lock, no lock for quick racy checks] */ tid_t j_commit_request; /** * @j_uuid: * * Journal uuid: identifies the object (filesystem, LVM volume etc) * backed by this journal. This will eventually be replaced by an array * of uuids, allowing us to index multiple devices within a single * journal and to perform atomic updates across them. */ __u8 j_uuid[16]; /** * @j_task: Pointer to the current commit thread for this journal. */ struct task_struct *j_task; /** * @j_max_transaction_buffers: * * Maximum number of metadata buffers to allow in a single compound * commit transaction. */ int j_max_transaction_buffers; /** * @j_revoke_records_per_block: * * Number of revoke records that fit in one descriptor block. */ int j_revoke_records_per_block; /** * @j_transaction_overhead_buffers: * * Number of blocks each transaction needs for its own bookkeeping */ int j_transaction_overhead_buffers; /** * @j_commit_interval: * * What is the maximum transaction lifetime before we begin a commit? */ unsigned long j_commit_interval; /** * @j_commit_timer: The timer used to wakeup the commit thread. */ struct timer_list j_commit_timer; /** * @j_revoke_lock: Protect the revoke table. */ spinlock_t j_revoke_lock; /** * @j_revoke: * * The revoke table - maintains the list of revoked blocks in the * current transaction. */ struct jbd2_revoke_table_s *j_revoke; /** * @j_revoke_table: Alternate revoke tables for j_revoke. */ struct jbd2_revoke_table_s *j_revoke_table[2]; /** * @j_wbuf: Array of bhs for jbd2_journal_commit_transaction. */ struct buffer_head **j_wbuf; /** * @j_fc_wbuf: Array of fast commit bhs for fast commit. Accessed only * during a fast commit. Currently only process can do fast commit, so * this field is not protected by any lock. */ struct buffer_head **j_fc_wbuf; /** * @j_wbufsize: * * Size of @j_wbuf array. */ int j_wbufsize; /** * @j_fc_wbufsize: * * Size of @j_fc_wbuf array. */ int j_fc_wbufsize; /** * @j_last_sync_writer: * * The pid of the last person to run a synchronous operation * through the journal. */ pid_t j_last_sync_writer; /** * @j_average_commit_time: * * The average amount of time in nanoseconds it takes to commit a * transaction to disk. [j_state_lock] */ u64 j_average_commit_time; /** * @j_min_batch_time: * * Minimum time that we should wait for additional filesystem operations * to get batched into a synchronous handle in microseconds. */ u32 j_min_batch_time; /** * @j_max_batch_time: * * Maximum time that we should wait for additional filesystem operations * to get batched into a synchronous handle in microseconds. */ u32 j_max_batch_time; /** * @j_commit_callback: * * This function is called when a transaction is closed. */ void (*j_commit_callback)(journal_t *, transaction_t *); /** * @j_submit_inode_data_buffers: * * This function is called for all inodes associated with the * committing transaction marked with JI_WRITE_DATA flag * before we start to write out the transaction to the journal. */ int (*j_submit_inode_data_buffers) (struct jbd2_inode *); /** * @j_finish_inode_data_buffers: * * This function is called for all inodes associated with the * committing transaction marked with JI_WAIT_DATA flag * after we have written the transaction to the journal * but before we write out the commit block. */ int (*j_finish_inode_data_buffers) (struct jbd2_inode *); /* * Journal statistics */ /** * @j_history_lock: Protect the transactions statistics history. */ spinlock_t j_history_lock; /** * @j_proc_entry: procfs entry for the jbd statistics directory. */ struct proc_dir_entry *j_proc_entry; /** * @j_stats: Overall statistics. */ struct transaction_stats_s j_stats; /** * @j_failed_commit: Failed journal commit ID. */ unsigned int j_failed_commit; /** * @j_private: * * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here. */ void *j_private; /** * @j_csum_seed: * * Precomputed journal UUID checksum for seeding other checksums. */ __u32 j_csum_seed; #ifdef CONFIG_DEBUG_LOCK_ALLOC /** * @j_trans_commit_map: * * Lockdep entity to track transaction commit dependencies. Handles * hold this "lock" for read, when we wait for commit, we acquire the * "lock" for writing. This matches the properties of jbd2 journalling * where the running transaction has to wait for all handles to be * dropped to commit that transaction and also acquiring a handle may * require transaction commit to finish. */ struct lockdep_map j_trans_commit_map; #endif /** * @j_fc_cleanup_callback: * * Clean-up after fast commit or full commit. JBD2 calls this function * after every commit operation. */ void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid); /** * @j_fc_replay_callback: * * File-system specific function that performs replay of a fast * commit. JBD2 calls this function for each fast commit block found in * the journal. This function should return JBD2_FC_REPLAY_CONTINUE * to indicate that the block was processed correctly and more fast * commit replay should continue. Return value of JBD2_FC_REPLAY_STOP * indicates the end of replay (no more blocks remaining). A negative * return value indicates error. */ int (*j_fc_replay_callback)(struct journal_s *journal, struct buffer_head *bh, enum passtype pass, int off, tid_t expected_commit_id); /** * @j_bmap: * * Bmap function that should be used instead of the generic * VFS bmap function. */ int (*j_bmap)(struct journal_s *journal, sector_t *block); }; #define jbd2_might_wait_for_commit(j) \ do { \ rwsem_acquire(&j->j_trans_commit_map, 0, 0, _THIS_IP_); \ rwsem_release(&j->j_trans_commit_map, _THIS_IP_); \ } while (0) /* * We can support any known requested features iff the * superblock is not in version 1. Otherwise we fail to support any * extended sb features. */ static inline bool jbd2_format_support_feature(journal_t *j) { return j->j_superblock->s_header.h_blocktype != cpu_to_be32(JBD2_SUPERBLOCK_V1); } /* journal feature predicate functions */ #define JBD2_FEATURE_COMPAT_FUNCS(name, flagname) \ static inline bool jbd2_has_feature_##name(journal_t *j) \ { \ return (jbd2_format_support_feature(j) && \ ((j)->j_superblock->s_feature_compat & \ cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname)) != 0); \ } \ static inline void jbd2_set_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_compat |= \ cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \ } \ static inline void jbd2_clear_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_compat &= \ ~cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \ } #define JBD2_FEATURE_RO_COMPAT_FUNCS(name, flagname) \ static inline bool jbd2_has_feature_##name(journal_t *j) \ { \ return (jbd2_format_support_feature(j) && \ ((j)->j_superblock->s_feature_ro_compat & \ cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname)) != 0); \ } \ static inline void jbd2_set_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_ro_compat |= \ cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \ } \ static inline void jbd2_clear_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_ro_compat &= \ ~cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \ } #define JBD2_FEATURE_INCOMPAT_FUNCS(name, flagname) \ static inline bool jbd2_has_feature_##name(journal_t *j) \ { \ return (jbd2_format_support_feature(j) && \ ((j)->j_superblock->s_feature_incompat & \ cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname)) != 0); \ } \ static inline void jbd2_set_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_incompat |= \ cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \ } \ static inline void jbd2_clear_feature_##name(journal_t *j) \ { \ (j)->j_superblock->s_feature_incompat &= \ ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \ } JBD2_FEATURE_COMPAT_FUNCS(checksum, CHECKSUM) JBD2_FEATURE_INCOMPAT_FUNCS(revoke, REVOKE) JBD2_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT) JBD2_FEATURE_INCOMPAT_FUNCS(async_commit, ASYNC_COMMIT) JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) /* Journal high priority write IO operation flags */ #define JBD2_JOURNAL_REQ_FLAGS (REQ_META | REQ_SYNC | REQ_IDLE) /* * Journal flag definitions */ #define JBD2_UNMOUNT 0x001 /* Journal thread is being destroyed */ #define JBD2_ABORT 0x002 /* Journaling has been aborted for errors. */ #define JBD2_ACK_ERR 0x004 /* The errno in the sb has been acked */ #define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */ #define JBD2_LOADED 0x010 /* The journal superblock has been loaded */ #define JBD2_BARRIER 0x020 /* Use IDE barriers */ #define JBD2_CYCLE_RECORD 0x080 /* Journal cycled record log on * clean and empty filesystem * logging area */ #define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */ #define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */ #define JBD2_JOURNAL_FLUSH_DISCARD 0x0001 #define JBD2_JOURNAL_FLUSH_ZEROOUT 0x0002 #define JBD2_JOURNAL_FLUSH_VALID (JBD2_JOURNAL_FLUSH_DISCARD | \ JBD2_JOURNAL_FLUSH_ZEROOUT) /* * Function declarations for the journaling transaction and buffer * management */ /* Filing buffers */ extern bool __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh) { list_add_tail(&bh->b_assoc_buffers, head); } static inline void jbd2_unfile_log_bh(struct buffer_head *bh) { list_del_init(&bh->b_assoc_buffers); } /* Log buffer allocation */ struct buffer_head *jbd2_journal_get_descriptor_buffer(transaction_t *, int); void jbd2_descriptor_block_csum_set(journal_t *, struct buffer_head *); int jbd2_journal_next_log_block(journal_t *, unsigned long long *); int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, unsigned long *block); int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); /* Commit management */ extern void jbd2_journal_commit_transaction(journal_t *); /* Checkpoint list management */ enum jbd2_shrink_type {JBD2_SHRINK_DESTROY, JBD2_SHRINK_BUSY_STOP, JBD2_SHRINK_BUSY_SKIP}; void __jbd2_journal_clean_checkpoint_list(journal_t *journal, enum jbd2_shrink_type type); unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan); int __jbd2_journal_remove_checkpoint(struct journal_head *); int jbd2_journal_try_remove_checkpoint(struct journal_head *jh); void jbd2_journal_destroy_checkpoint(journal_t *journal); void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); /* * Triggers */ struct jbd2_buffer_trigger_type { /* * Fired a the moment data to write to the journal are known to be * stable - so either at the moment b_frozen_data is created or just * before a buffer is written to the journal. mapped_data is a mapped * buffer that is the frozen data for commit. */ void (*t_frozen)(struct jbd2_buffer_trigger_type *type, struct buffer_head *bh, void *mapped_data, size_t size); /* * Fired during journal abort for dirty buffers that will not be * committed. */ void (*t_abort)(struct jbd2_buffer_trigger_type *type, struct buffer_head *bh); }; extern void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, struct jbd2_buffer_trigger_type *triggers); extern void jbd2_buffer_abort_trigger(struct journal_head *jh, struct jbd2_buffer_trigger_type *triggers); /* Buffer IO */ extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, struct buffer_head **bh_out, sector_t blocknr); /* Transaction cache support */ extern void jbd2_journal_destroy_transaction_cache(void); extern int __init jbd2_journal_init_transaction_cache(void); extern void jbd2_journal_free_transaction(transaction_t *); /* * Journal locking. * * We need to lock the journal during transaction state changes so that nobody * ever tries to take a handle on the running transaction while we are in the * middle of moving it to the commit phase. j_state_lock does this. * * Note that the locking is completely interrupt unsafe. We never touch * journal structures from interrupts. */ static inline handle_t *journal_current_handle(void) { return current->journal_info; } /* The journaling code user interface: * * Create and destroy handles * Register buffer modifications against the current transaction. */ extern handle_t *jbd2_journal_start(journal_t *, int nblocks); extern handle_t *jbd2__journal_start(journal_t *, int blocks, int rsv_blocks, int revoke_records, gfp_t gfp_mask, unsigned int type, unsigned int line_no); extern int jbd2_journal_restart(handle_t *, int nblocks); extern int jbd2__journal_restart(handle_t *, int nblocks, int revoke_records, gfp_t gfp_mask); extern int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, unsigned int line_no); extern void jbd2_journal_free_reserved(handle_t *handle); extern int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records); extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *); void jbd2_journal_set_triggers(struct buffer_head *, struct jbd2_buffer_trigger_type *type); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); int jbd2_journal_invalidate_folio(journal_t *, struct folio *, size_t offset, size_t length); bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio); extern int jbd2_journal_stop(handle_t *); extern int jbd2_journal_flush(journal_t *journal, unsigned int flags); extern void jbd2_journal_lock_updates (journal_t *); extern void jbd2_journal_unlock_updates (journal_t *); void jbd2_journal_wait_updates(journal_t *); extern journal_t * jbd2_journal_init_dev(struct block_device *bdev, struct block_device *fs_dev, unsigned long long start, int len, int bsize); extern journal_t * jbd2_journal_init_inode (struct inode *); extern int jbd2_journal_update_format (journal_t *); extern int jbd2_journal_check_used_features (journal_t *, unsigned long, unsigned long, unsigned long); extern int jbd2_journal_check_available_features (journal_t *, unsigned long, unsigned long, unsigned long); extern int jbd2_journal_set_features (journal_t *, unsigned long, unsigned long, unsigned long); extern void jbd2_journal_clear_features (journal_t *, unsigned long, unsigned long, unsigned long); extern int jbd2_journal_load (journal_t *journal); extern int jbd2_journal_destroy (journal_t *); extern int jbd2_journal_recover (journal_t *journal); extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *); extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, unsigned long, blk_opf_t); extern void jbd2_journal_abort (journal_t *, int); extern int jbd2_journal_errno (journal_t *); extern void jbd2_journal_ack_err (journal_t *); extern int jbd2_journal_clear_err (journal_t *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_force_commit_nested(journal_t *); extern int jbd2_journal_inode_ranged_write(handle_t *handle, struct jbd2_inode *inode, loff_t start_byte, loff_t length); extern int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *inode, loff_t start_byte, loff_t length); extern int jbd2_journal_finish_inode_data_buffers( struct jbd2_inode *jinode); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, struct jbd2_inode *inode, loff_t new_size); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); /* * journal_head management */ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh); struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh); void jbd2_journal_put_journal_head(struct journal_head *jh); /* * handle management */ extern struct kmem_cache *jbd2_handle_cache; /* * This specialized allocator has to be a macro for its allocations to be * accounted separately (to have a separate alloc_tag). The typecast is * intentional to enforce typesafety. */ #define jbd2_alloc_handle(_gfp_flags) \ ((handle_t *)kmem_cache_zalloc(jbd2_handle_cache, _gfp_flags)) static inline void jbd2_free_handle(handle_t *handle) { kmem_cache_free(jbd2_handle_cache, handle); } /* * jbd2_inode management (optional, for those file systems that want to use * dynamically allocated jbd2_inode structures) */ extern struct kmem_cache *jbd2_inode_cache; /* * This specialized allocator has to be a macro for its allocations to be * accounted separately (to have a separate alloc_tag). The typecast is * intentional to enforce typesafety. */ #define jbd2_alloc_inode(_gfp_flags) \ ((struct jbd2_inode *)kmem_cache_alloc(jbd2_inode_cache, _gfp_flags)) static inline void jbd2_free_inode(struct jbd2_inode *jinode) { kmem_cache_free(jbd2_inode_cache, jinode); } /* Primary revoke support */ #define JOURNAL_REVOKE_DEFAULT_HASH 256 extern int jbd2_journal_init_revoke(journal_t *, int); extern void jbd2_journal_destroy_revoke_record_cache(void); extern void jbd2_journal_destroy_revoke_table_cache(void); extern int __init jbd2_journal_init_revoke_record_cache(void); extern int __init jbd2_journal_init_revoke_table_cache(void); struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size); void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table); extern void jbd2_journal_destroy_revoke(journal_t *); extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); extern void jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); extern void jbd2_journal_write_revoke_records(transaction_t *transaction, struct list_head *log_bufs); /* Recovery revoke support */ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t); extern void jbd2_journal_clear_revoke(journal_t *); extern void jbd2_journal_switch_revoke_table(journal_t *journal); extern void jbd2_clear_buffer_revoked_flags(journal_t *journal); /* * The log thread user interface: * * Request space in the current transaction, and force transaction commit * transitions on demand. */ int jbd2_log_start_commit(journal_t *journal, tid_t tid); int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); int jbd2_log_wait_commit(journal_t *journal, tid_t tid); int jbd2_transaction_committed(journal_t *journal, tid_t tid); int jbd2_complete_transaction(journal_t *journal, tid_t tid); int jbd2_log_do_checkpoint(journal_t *journal); int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); void __jbd2_log_wait_for_space(journal_t *journal); extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); int jbd2_fc_end_commit_fallback(journal_t *journal); int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); void jbd2_fc_release_bufs(journal_t *journal); /* * is_journal_abort * * Simple test wrapper function to test the JBD2_ABORT state flag. This * bit, when set, indicates that we have had a fatal error somewhere, * either inside the journaling layer or indicated to us by the client * (eg. ext3), and that we and should not commit any further * transactions. */ static inline int is_journal_aborted(journal_t *journal) { return journal->j_flags & JBD2_ABORT; } static inline int is_handle_aborted(handle_t *handle) { if (handle->h_aborted || !handle->h_transaction) return 1; return is_journal_aborted(handle->h_transaction->t_journal); } static inline void jbd2_journal_abort_handle(handle_t *handle) { handle->h_aborted = 1; } static inline void jbd2_init_fs_dev_write_error(journal_t *journal) { struct address_space *mapping = journal->j_fs_dev->bd_mapping; /* * Save the original wb_err value of client fs's bdev mapping which * could be used to detect the client fs's metadata async write error. */ errseq_check_and_advance(&mapping->wb_err, &journal->j_fs_dev_wb_err); } static inline int jbd2_check_fs_dev_write_error(journal_t *journal) { struct address_space *mapping = journal->j_fs_dev->bd_mapping; return errseq_check(&mapping->wb_err, READ_ONCE(journal->j_fs_dev_wb_err)); } #endif /* __KERNEL__ */ /* Comparison functions for transaction IDs: perform comparisons using * modulo arithmetic so that they work over sequence number wraps. */ static inline int tid_gt(tid_t x, tid_t y) { int difference = (x - y); return (difference > 0); } static inline int tid_geq(tid_t x, tid_t y) { int difference = (x - y); return (difference >= 0); } extern int jbd2_journal_blocks_per_folio(struct inode *inode); extern size_t journal_tag_bytes(journal_t *journal); static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) { return jbd2_has_feature_csum2(journal) || jbd2_has_feature_csum3(journal); } static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb) { int num_fc_blocks = be32_to_cpu(jsb->s_num_fc_blks); return num_fc_blocks ? num_fc_blocks : JBD2_DEFAULT_FAST_COMMIT_BLOCKS; } /* * Return number of free blocks in the log. Must be called under j_state_lock. */ static inline unsigned long jbd2_log_space_left(journal_t *journal) { /* Allow for rounding errors */ long free = journal->j_free - 32; if (journal->j_committing_transaction) { free -= atomic_read(&journal-> j_committing_transaction->t_outstanding_credits); } return max_t(long, free, 0); } /* * Definitions which augment the buffer_head layer */ /* journaling buffer types */ #define BJ_None 0 /* Not journaled */ #define BJ_Metadata 1 /* Normal journaled metadata */ #define BJ_Forget 2 /* Buffer superseded by this transaction */ #define BJ_Shadow 3 /* Buffer contents being shadowed to the log */ #define BJ_Reserved 4 /* Buffer is reserved for access by journal */ #define BJ_Types 5 static inline u32 jbd2_chksum(u32 crc, const void *address, unsigned int length) { return crc32c(crc, address, length); } /* Return most recent uncommitted transaction */ static inline tid_t jbd2_get_latest_transaction(journal_t *journal) { tid_t tid; read_lock(&journal->j_state_lock); tid = journal->j_commit_request; if (journal->j_running_transaction) tid = journal->j_running_transaction->t_tid; read_unlock(&journal->j_state_lock); return tid; } static inline int jbd2_handle_buffer_credits(handle_t *handle) { journal_t *journal; if (!handle->h_reserved) journal = handle->h_transaction->t_journal; else journal = handle->h_journal; return handle->h_total_credits - DIV_ROUND_UP(handle->h_revoke_credits_requested, journal->j_revoke_records_per_block); } #ifdef __KERNEL__ #define buffer_trace_init(bh) do {} while (0) #define print_buffer_fields(bh) do {} while (0) #define print_buffer_trace(bh) do {} while (0) #define BUFFER_TRACE(bh, info) do {} while (0) #define BUFFER_TRACE2(bh, bh2, info) do {} while (0) #define JBUFFER_TRACE(jh, info) do {} while (0) #endif /* __KERNEL__ */ #define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #endif /* _LINUX_JBD2_H */
2 1 1 12 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 // SPDX-License-Identifier: GPL-2.0-only /* * IEEE 802.1Q GARP VLAN Registration Protocol (GVRP) * * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> */ #include <linux/types.h> #include <linux/if_vlan.h> #include <net/garp.h> #include "vlan.h" #define GARP_GVRP_ADDRESS { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 } enum gvrp_attributes { GVRP_ATTR_INVALID, GVRP_ATTR_VID, __GVRP_ATTR_MAX }; #define GVRP_ATTR_MAX (__GVRP_ATTR_MAX - 1) static struct garp_application vlan_gvrp_app __read_mostly = { .proto.group_address = GARP_GVRP_ADDRESS, .maxattr = GVRP_ATTR_MAX, .type = GARP_APPLICATION_GVRP, }; int vlan_gvrp_request_join(const struct net_device *dev) { const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); if (vlan->vlan_proto != htons(ETH_P_8021Q)) return 0; return garp_request_join(vlan->real_dev, &vlan_gvrp_app, &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); } void vlan_gvrp_request_leave(const struct net_device *dev) { const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); if (vlan->vlan_proto != htons(ETH_P_8021Q)) return; garp_request_leave(vlan->real_dev, &vlan_gvrp_app, &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); } int vlan_gvrp_init_applicant(struct net_device *dev) { return garp_init_applicant(dev, &vlan_gvrp_app); } void vlan_gvrp_uninit_applicant(struct net_device *dev) { garp_uninit_applicant(dev, &vlan_gvrp_app); } int __init vlan_gvrp_init(void) { return garp_register_application(&vlan_gvrp_app); } void vlan_gvrp_uninit(void) { garp_unregister_application(&vlan_gvrp_app); }
16 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 // SPDX-License-Identifier: GPL-2.0 /* * sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem. * * Changes: * YOSHIFUJI Hideaki @USAGI: added icmp sysctl table. */ #include <linux/mm.h> #include <linux/sysctl.h> #include <linux/in6.h> #include <linux/ipv6.h> #include <linux/slab.h> #include <linux/export.h> #include <net/ndisc.h> #include <net/ipv6.h> #include <net/addrconf.h> #include <net/inet_frag.h> #include <net/netevent.h> #include <net/ip_fib.h> #ifdef CONFIG_NETLABEL #include <net/calipso.h> #endif #include <linux/ioam6.h> static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static u32 rt6_multipath_hash_fields_all_mask = FIB_MULTIPATH_HASH_FIELD_ALL_MASK; static u32 ioam6_id_max = IOAM6_DEFAULT_ID; static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE; static int proc_rt6_multipath_hash_policy(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net; int ret; net = container_of(table->data, struct net, ipv6.sysctl.multipath_hash_policy); ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net); return ret; } static int proc_rt6_multipath_hash_fields(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net; int ret; net = container_of(table->data, struct net, ipv6.sysctl.multipath_hash_fields); ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net); return ret; } static struct ctl_table ipv6_table_template[] = { { .procname = "bindv6only", .data = &init_net.ipv6.sysctl.bindv6only, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "anycast_src_echo_reply", .data = &init_net.ipv6.sysctl.anycast_src_echo_reply, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "flowlabel_consistency", .data = &init_net.ipv6.sysctl.flowlabel_consistency, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "auto_flowlabels", .data = &init_net.ipv6.sysctl.auto_flowlabels, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra2 = &auto_flowlabels_max }, { .procname = "fwmark_reflect", .data = &init_net.ipv6.sysctl.fwmark_reflect, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "idgen_retries", .data = &init_net.ipv6.sysctl.idgen_retries, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "idgen_delay", .data = &init_net.ipv6.sysctl.idgen_delay, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "flowlabel_state_ranges", .data = &init_net.ipv6.sysctl.flowlabel_state_ranges, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_nonlocal_bind", .data = &init_net.ipv6.sysctl.ip_nonlocal_bind, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "flowlabel_reflect", .data = &init_net.ipv6.sysctl.flowlabel_reflect, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &flowlabel_reflect_max, }, { .procname = "max_dst_opts_number", .data = &init_net.ipv6.sysctl.max_dst_opts_cnt, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "max_hbh_opts_number", .data = &init_net.ipv6.sysctl.max_hbh_opts_cnt, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "max_dst_opts_length", .data = &init_net.ipv6.sysctl.max_dst_opts_len, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "max_hbh_length", .data = &init_net.ipv6.sysctl.max_hbh_opts_len, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "fib_multipath_hash_policy", .data = &init_net.ipv6.sysctl.multipath_hash_policy, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_rt6_multipath_hash_policy, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_THREE, }, { .procname = "fib_multipath_hash_fields", .data = &init_net.ipv6.sysctl.multipath_hash_fields, .maxlen = sizeof(u32), .mode = 0644, .proc_handler = proc_rt6_multipath_hash_fields, .extra1 = SYSCTL_ONE, .extra2 = &rt6_multipath_hash_fields_all_mask, }, { .procname = "seg6_flowlabel", .data = &init_net.ipv6.sysctl.seg6_flowlabel, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "fib_notify_on_flag_change", .data = &init_net.ipv6.sysctl.fib_notify_on_flag_change, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, { .procname = "ioam6_id", .data = &init_net.ipv6.sysctl.ioam6_id, .maxlen = sizeof(u32), .mode = 0644, .proc_handler = proc_douintvec_minmax, .extra2 = &ioam6_id_max, }, { .procname = "ioam6_id_wide", .data = &init_net.ipv6.sysctl.ioam6_id_wide, .maxlen = sizeof(u64), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra2 = &ioam6_id_wide_max, }, }; static struct ctl_table ipv6_rotable[] = { { .procname = "mld_max_msf", .data = &sysctl_mld_max_msf, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "mld_qrv", .data = &sysctl_mld_qrv, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE }, #ifdef CONFIG_NETLABEL { .procname = "calipso_cache_enable", .data = &calipso_cache_enabled, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "calipso_cache_bucket_size", .data = &calipso_cache_bucketsize, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, #endif /* CONFIG_NETLABEL */ }; static int __net_init ipv6_sysctl_net_init(struct net *net) { size_t table_size = ARRAY_SIZE(ipv6_table_template); struct ctl_table *ipv6_table; struct ctl_table *ipv6_route_table; struct ctl_table *ipv6_icmp_table; int err, i; err = -ENOMEM; ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template), GFP_KERNEL); if (!ipv6_table) goto out; /* Update the variables to point into the current struct net */ for (i = 0; i < table_size; i++) ipv6_table[i].data += (void *)net - (void *)&init_net; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) goto out_ipv6_table; ipv6_icmp_table = ipv6_icmp_sysctl_init(net); if (!ipv6_icmp_table) goto out_ipv6_route_table; net->ipv6.sysctl.hdr = register_net_sysctl_sz(net, "net/ipv6", ipv6_table, table_size); if (!net->ipv6.sysctl.hdr) goto out_ipv6_icmp_table; net->ipv6.sysctl.route_hdr = register_net_sysctl_sz(net, "net/ipv6/route", ipv6_route_table, ipv6_route_sysctl_table_size(net)); if (!net->ipv6.sysctl.route_hdr) goto out_unregister_ipv6_table; net->ipv6.sysctl.icmp_hdr = register_net_sysctl_sz(net, "net/ipv6/icmp", ipv6_icmp_table, ipv6_icmp_sysctl_table_size()); if (!net->ipv6.sysctl.icmp_hdr) goto out_unregister_route_table; err = 0; out: return err; out_unregister_route_table: unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr); out_unregister_ipv6_table: unregister_net_sysctl_table(net->ipv6.sysctl.hdr); out_ipv6_icmp_table: kfree(ipv6_icmp_table); out_ipv6_route_table: kfree(ipv6_route_table); out_ipv6_table: kfree(ipv6_table); goto out; } static void __net_exit ipv6_sysctl_net_exit(struct net *net) { const struct ctl_table *ipv6_table; const struct ctl_table *ipv6_route_table; const struct ctl_table *ipv6_icmp_table; ipv6_table = net->ipv6.sysctl.hdr->ctl_table_arg; ipv6_route_table = net->ipv6.sysctl.route_hdr->ctl_table_arg; ipv6_icmp_table = net->ipv6.sysctl.icmp_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv6.sysctl.icmp_hdr); unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr); unregister_net_sysctl_table(net->ipv6.sysctl.hdr); kfree(ipv6_table); kfree(ipv6_route_table); kfree(ipv6_icmp_table); } static struct pernet_operations ipv6_sysctl_net_ops = { .init = ipv6_sysctl_net_init, .exit = ipv6_sysctl_net_exit, }; static struct ctl_table_header *ip6_header; int ipv6_sysctl_register(void) { int err = -ENOMEM; ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable); if (!ip6_header) goto out; err = register_pernet_subsys(&ipv6_sysctl_net_ops); if (err) goto err_pernet; out: return err; err_pernet: unregister_net_sysctl_table(ip6_header); goto out; } void ipv6_sysctl_unregister(void) { unregister_net_sysctl_table(ip6_header); unregister_pernet_subsys(&ipv6_sysctl_net_ops); }
16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 // SPDX-License-Identifier: GPL-2.0-only /* * Generic netlink handshake service * * Author: Chuck Lever <chuck.lever@oracle.com> * * Copyright (c) 2023, Oracle and/or its affiliates. */ #include <linux/types.h> #include <linux/socket.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/mm.h> #include <net/sock.h> #include <net/genetlink.h> #include <net/netns/generic.h> #include <kunit/visibility.h> #include <uapi/linux/handshake.h> #include "handshake.h" #include "genl.h" #include <trace/events/handshake.h> /** * handshake_genl_notify - Notify handlers that a request is waiting * @net: target network namespace * @proto: handshake protocol * @flags: memory allocation control flags * * Returns zero on success or a negative errno if notification failed. */ int handshake_genl_notify(struct net *net, const struct handshake_proto *proto, gfp_t flags) { struct sk_buff *msg; void *hdr; /* Disable notifications during unit testing */ if (!test_bit(HANDSHAKE_F_PROTO_NOTIFY, &proto->hp_flags)) return 0; if (!genl_has_listeners(&handshake_nl_family, net, proto->hp_handler_class)) return -ESRCH; msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, flags); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &handshake_nl_family, 0, HANDSHAKE_CMD_READY); if (!hdr) goto out_free; if (nla_put_u32(msg, HANDSHAKE_A_ACCEPT_HANDLER_CLASS, proto->hp_handler_class) < 0) { genlmsg_cancel(msg, hdr); goto out_free; } genlmsg_end(msg, hdr); return genlmsg_multicast_netns(&handshake_nl_family, net, msg, 0, proto->hp_handler_class, flags); out_free: nlmsg_free(msg); return -EMSGSIZE; } /** * handshake_genl_put - Create a generic netlink message header * @msg: buffer in which to create the header * @info: generic netlink message context * * Returns a ready-to-use header, or NULL. */ struct nlmsghdr *handshake_genl_put(struct sk_buff *msg, struct genl_info *info) { return genlmsg_put(msg, info->snd_portid, info->snd_seq, &handshake_nl_family, 0, info->genlhdr->cmd); } EXPORT_SYMBOL(handshake_genl_put); int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct handshake_net *hn = handshake_pernet(net); struct handshake_req *req = NULL; struct socket *sock; int class, fd, err; err = -EOPNOTSUPP; if (!hn) goto out_status; err = -EINVAL; if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_ACCEPT_HANDLER_CLASS)) goto out_status; class = nla_get_u32(info->attrs[HANDSHAKE_A_ACCEPT_HANDLER_CLASS]); err = -EAGAIN; req = handshake_req_next(hn, class); if (!req) goto out_status; sock = req->hr_sk->sk_socket; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { err = fd; goto out_complete; } err = req->hr_proto->hp_accept(req, info, fd); if (err) { put_unused_fd(fd); goto out_complete; } fd_install(fd, get_file(sock->file)); trace_handshake_cmd_accept(net, req, req->hr_sk, fd); return 0; out_complete: handshake_complete(req, -EIO, NULL); out_status: trace_handshake_cmd_accept_err(net, req, NULL, err); return err; } int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct handshake_req *req; struct socket *sock; int fd, status, err; if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD)) return -EINVAL; fd = nla_get_s32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]); sock = sockfd_lookup(fd, &err); if (!sock) return err; req = handshake_req_hash_lookup(sock->sk); if (!req) { err = -EBUSY; trace_handshake_cmd_done_err(net, req, sock->sk, err); sockfd_put(sock); return err; } trace_handshake_cmd_done(net, req, sock->sk, fd); status = -EIO; if (info->attrs[HANDSHAKE_A_DONE_STATUS]) status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]); handshake_complete(req, status, info); sockfd_put(sock); return 0; } static unsigned int handshake_net_id; static int __net_init handshake_net_init(struct net *net) { struct handshake_net *hn = net_generic(net, handshake_net_id); unsigned long tmp; struct sysinfo si; /* * Arbitrary limit to prevent handshakes that do not make * progress from clogging up the system. The cap scales up * with the amount of physical memory on the system. */ si_meminfo(&si); tmp = si.totalram / (25 * si.mem_unit); hn->hn_pending_max = clamp(tmp, 3UL, 50UL); spin_lock_init(&hn->hn_lock); hn->hn_pending = 0; hn->hn_flags = 0; INIT_LIST_HEAD(&hn->hn_requests); return 0; } static void __net_exit handshake_net_exit(struct net *net) { struct handshake_net *hn = net_generic(net, handshake_net_id); struct handshake_req *req; LIST_HEAD(requests); /* * Drain the net's pending list. Requests that have been * accepted and are in progress will be destroyed when * the socket is closed. */ spin_lock(&hn->hn_lock); set_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags); list_splice_init(&requests, &hn->hn_requests); spin_unlock(&hn->hn_lock); while (!list_empty(&requests)) { req = list_first_entry(&requests, struct handshake_req, hr_list); list_del(&req->hr_list); /* * Requests on this list have not yet been * accepted, so they do not have an fd to put. */ handshake_complete(req, -ETIMEDOUT, NULL); } } static struct pernet_operations handshake_genl_net_ops = { .init = handshake_net_init, .exit = handshake_net_exit, .id = &handshake_net_id, .size = sizeof(struct handshake_net), }; /** * handshake_pernet - Get the handshake private per-net structure * @net: network namespace * * Returns a pointer to the net's private per-net structure for the * handshake module, or NULL if handshake_init() failed. */ struct handshake_net *handshake_pernet(struct net *net) { return handshake_net_id ? net_generic(net, handshake_net_id) : NULL; } EXPORT_SYMBOL_IF_KUNIT(handshake_pernet); static int __init handshake_init(void) { int ret; ret = handshake_req_hash_init(); if (ret) { pr_warn("handshake: hash initialization failed (%d)\n", ret); return ret; } ret = genl_register_family(&handshake_nl_family); if (ret) { pr_warn("handshake: netlink registration failed (%d)\n", ret); handshake_req_hash_destroy(); return ret; } /* * ORDER: register_pernet_subsys must be done last. * * If initialization does not make it past pernet_subsys * registration, then handshake_net_id will remain 0. That * shunts the handshake consumer API to return ENOTSUPP * to prevent it from dereferencing something that hasn't * been allocated. */ ret = register_pernet_subsys(&handshake_genl_net_ops); if (ret) { pr_warn("handshake: pernet registration failed (%d)\n", ret); genl_unregister_family(&handshake_nl_family); handshake_req_hash_destroy(); } return ret; } static void __exit handshake_exit(void) { unregister_pernet_subsys(&handshake_genl_net_ops); handshake_net_id = 0; handshake_req_hash_destroy(); genl_unregister_family(&handshake_nl_family); } module_init(handshake_init); module_exit(handshake_exit);
34 1 1 1 1 11 10 10 1 11 2 2 2 2 2 2 2 10 11 11 11 11 10 528 530 2 123 116 11 528 88 10 83 123 532 528 10 118 122 1 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 // SPDX-License-Identifier: GPL-2.0 /* * Ldisc rw semaphore * * The ldisc semaphore is semantically a rw_semaphore but which enforces * an alternate policy, namely: * 1) Supports lock wait timeouts * 2) Write waiter has priority * 3) Downgrading is not supported * * Implementation notes: * 1) Upper half of semaphore count is a wait count (differs from rwsem * in that rwsem normalizes the upper half to the wait bias) * 2) Lacks overflow checking * * The generic counting was copied and modified from include/asm-generic/rwsem.h * by Paul Mackerras <paulus@samba.org>. * * The scheduling policy was copied and modified from lib/rwsem.c * Written by David Howells (dhowells@redhat.com). * * This implementation incorporates the write lock stealing work of * Michel Lespinasse <walken@google.com>. * * Copyright (C) 2013 Peter Hurley <peter@hurleysoftware.com> */ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/tty.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/task.h> #if BITS_PER_LONG == 64 # define LDSEM_ACTIVE_MASK 0xffffffffL #else # define LDSEM_ACTIVE_MASK 0x0000ffffL #endif #define LDSEM_UNLOCKED 0L #define LDSEM_ACTIVE_BIAS 1L #define LDSEM_WAIT_BIAS (-LDSEM_ACTIVE_MASK-1) #define LDSEM_READ_BIAS LDSEM_ACTIVE_BIAS #define LDSEM_WRITE_BIAS (LDSEM_WAIT_BIAS + LDSEM_ACTIVE_BIAS) struct ldsem_waiter { struct list_head list; struct task_struct *task; }; /* * Initialize an ldsem: */ void __init_ldsem(struct ld_semaphore *sem, const char *name, struct lock_class_key *key) { #ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make sure we are not reinitializing a held semaphore: */ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map(&sem->dep_map, name, key, 0); #endif atomic_long_set(&sem->count, LDSEM_UNLOCKED); sem->wait_readers = 0; raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->read_wait); INIT_LIST_HEAD(&sem->write_wait); } static void __ldsem_wake_readers(struct ld_semaphore *sem) { struct ldsem_waiter *waiter, *next; struct task_struct *tsk; long adjust, count; /* * Try to grant read locks to all readers on the read wait list. * Note the 'active part' of the count is incremented by * the number of readers before waking any processes up. */ adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS); count = atomic_long_add_return(adjust, &sem->count); do { if (count > 0) break; if (atomic_long_try_cmpxchg(&sem->count, &count, count - adjust)) return; } while (1); list_for_each_entry_safe(waiter, next, &sem->read_wait, list) { tsk = waiter->task; smp_store_release(&waiter->task, NULL); wake_up_process(tsk); put_task_struct(tsk); } INIT_LIST_HEAD(&sem->read_wait); sem->wait_readers = 0; } static inline int writer_trylock(struct ld_semaphore *sem) { /* * Only wake this writer if the active part of the count can be * transitioned from 0 -> 1 */ long count = atomic_long_add_return(LDSEM_ACTIVE_BIAS, &sem->count); do { if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) return 1; if (atomic_long_try_cmpxchg(&sem->count, &count, count - LDSEM_ACTIVE_BIAS)) return 0; } while (1); } static void __ldsem_wake_writer(struct ld_semaphore *sem) { struct ldsem_waiter *waiter; waiter = list_entry(sem->write_wait.next, struct ldsem_waiter, list); wake_up_process(waiter->task); } /* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then: * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) * - the spinlock must be held by the caller * - woken process blocks are discarded from the list after having task zeroed */ static void __ldsem_wake(struct ld_semaphore *sem) { if (!list_empty(&sem->write_wait)) __ldsem_wake_writer(sem); else if (!list_empty(&sem->read_wait)) __ldsem_wake_readers(sem); } static void ldsem_wake(struct ld_semaphore *sem) { unsigned long flags; raw_spin_lock_irqsave(&sem->wait_lock, flags); __ldsem_wake(sem); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } /* * wait for the read lock to be granted */ static struct ld_semaphore __sched * down_read_failed(struct ld_semaphore *sem, long count, long timeout) { struct ldsem_waiter waiter; long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS; /* set up my own style of waitqueue */ raw_spin_lock_irq(&sem->wait_lock); /* * Try to reverse the lock attempt but if the count has changed * so that reversing fails, check if there are no waiters, * and early-out if not */ do { if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) { count += adjust; break; } if (count > 0) { raw_spin_unlock_irq(&sem->wait_lock); return sem; } } while (1); list_add_tail(&waiter.list, &sem->read_wait); sem->wait_readers++; waiter.task = current; get_task_struct(current); /* if there are no active locks, wake the new lock owner(s) */ if ((count & LDSEM_ACTIVE_MASK) == 0) __ldsem_wake(sem); raw_spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (!smp_load_acquire(&waiter.task)) break; if (!timeout) break; timeout = schedule_timeout(timeout); } __set_current_state(TASK_RUNNING); if (!timeout) { /* * Lock timed out but check if this task was just * granted lock ownership - if so, pretend there * was no timeout; otherwise, cleanup lock wait. */ raw_spin_lock_irq(&sem->wait_lock); if (waiter.task) { atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); sem->wait_readers--; list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); put_task_struct(waiter.task); return NULL; } raw_spin_unlock_irq(&sem->wait_lock); } return sem; } /* * wait for the write lock to be granted */ static struct ld_semaphore __sched * down_write_failed(struct ld_semaphore *sem, long count, long timeout) { struct ldsem_waiter waiter; long adjust = -LDSEM_ACTIVE_BIAS; int locked = 0; /* set up my own style of waitqueue */ raw_spin_lock_irq(&sem->wait_lock); /* * Try to reverse the lock attempt but if the count has changed * so that reversing fails, check if the lock is now owned, * and early-out if so. */ do { if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) break; if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) { raw_spin_unlock_irq(&sem->wait_lock); return sem; } } while (1); list_add_tail(&waiter.list, &sem->write_wait); waiter.task = current; set_current_state(TASK_UNINTERRUPTIBLE); for (;;) { if (!timeout) break; raw_spin_unlock_irq(&sem->wait_lock); timeout = schedule_timeout(timeout); raw_spin_lock_irq(&sem->wait_lock); set_current_state(TASK_UNINTERRUPTIBLE); locked = writer_trylock(sem); if (locked) break; } if (!locked) atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count); list_del(&waiter.list); /* * In case of timeout, wake up every reader who gave the right of way * to writer. Prevent separation readers into two groups: * one that helds semaphore and another that sleeps. * (in case of no contention with a writer) */ if (!locked && list_empty(&sem->write_wait)) __ldsem_wake_readers(sem); raw_spin_unlock_irq(&sem->wait_lock); __set_current_state(TASK_RUNNING); /* lock wait may have timed out */ if (!locked) return NULL; return sem; } static int __ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) { long count; rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); count = atomic_long_add_return(LDSEM_READ_BIAS, &sem->count); if (count <= 0) { lock_contended(&sem->dep_map, _RET_IP_); if (!down_read_failed(sem, count, timeout)) { rwsem_release(&sem->dep_map, _RET_IP_); return 0; } } lock_acquired(&sem->dep_map, _RET_IP_); return 1; } static int __ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, long timeout) { long count; rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); count = atomic_long_add_return(LDSEM_WRITE_BIAS, &sem->count); if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) { lock_contended(&sem->dep_map, _RET_IP_); if (!down_write_failed(sem, count, timeout)) { rwsem_release(&sem->dep_map, _RET_IP_); return 0; } } lock_acquired(&sem->dep_map, _RET_IP_); return 1; } /* * lock for reading -- returns 1 if successful, 0 if timed out */ int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout) { might_sleep(); return __ldsem_down_read_nested(sem, 0, timeout); } /* * trylock for reading -- returns 1 if successful, 0 if contention */ int ldsem_down_read_trylock(struct ld_semaphore *sem) { long count = atomic_long_read(&sem->count); while (count >= 0) { if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_READ_BIAS)) { rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); lock_acquired(&sem->dep_map, _RET_IP_); return 1; } } return 0; } /* * lock for writing -- returns 1 if successful, 0 if timed out */ int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout) { might_sleep(); return __ldsem_down_write_nested(sem, 0, timeout); } /* * release a read lock */ void ldsem_up_read(struct ld_semaphore *sem) { long count; rwsem_release(&sem->dep_map, _RET_IP_); count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count); if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0) ldsem_wake(sem); } /* * release a write lock */ void ldsem_up_write(struct ld_semaphore *sem) { long count; rwsem_release(&sem->dep_map, _RET_IP_); count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count); if (count < 0) ldsem_wake(sem); } #ifdef CONFIG_DEBUG_LOCK_ALLOC int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass, long timeout) { might_sleep(); return __ldsem_down_read_nested(sem, subclass, timeout); } int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, long timeout) { might_sleep(); return __ldsem_down_write_nested(sem, subclass, timeout); } #endif
7 17 4 14 16 16 1 13 4 4 3 903 894 16 3 2 6 6 2 7 6 3 1 8 1 1 4 3 7 7 7 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 // SPDX-License-Identifier: GPL-2.0-or-later /* * Directory notifications for Linux. * * Copyright (C) 2000,2001,2002 Stephen Rothwell * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * dnotify was largly rewritten to use the new fsnotify infrastructure */ #include <linux/fs.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include <linux/dnotify.h> #include <linux/init.h> #include <linux/security.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/fsnotify_backend.h> static int dir_notify_enable __read_mostly = 1; #ifdef CONFIG_SYSCTL static const struct ctl_table dnotify_sysctls[] = { { .procname = "dir-notify-enable", .data = &dir_notify_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, }; static void __init dnotify_sysctl_init(void) { register_sysctl_init("fs", dnotify_sysctls); } #else #define dnotify_sysctl_init() do { } while (0) #endif static struct kmem_cache *dnotify_struct_cache __ro_after_init; static struct kmem_cache *dnotify_mark_cache __ro_after_init; static struct fsnotify_group *dnotify_group __ro_after_init; /* * dnotify will attach one of these to each inode (i_fsnotify_marks) which * is being watched by dnotify. If multiple userspace applications are watching * the same directory with dnotify their information is chained in dn */ struct dnotify_mark { struct fsnotify_mark fsn_mark; struct dnotify_struct *dn; }; /* * When a process starts or stops watching an inode the set of events which * dnotify cares about for that inode may change. This function runs the * list of everything receiving dnotify events about this directory and calculates * the set of all those events. After it updates what dnotify is interested in * it calls the fsnotify function so it can update the set of all events relevant * to this inode. */ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) { __u32 new_mask = 0; struct dnotify_struct *dn; struct dnotify_mark *dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); assert_spin_locked(&fsn_mark->lock); for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next) new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT); if (fsn_mark->mask == new_mask) return; fsn_mark->mask = new_mask; fsnotify_recalc_mask(fsn_mark->connector); } /* * Mains fsnotify call where events are delivered to dnotify. * Find the dnotify mark on the relevant inode, run the list of dnotify structs * on that mark and determine which of them has expressed interest in receiving * events of this type. When found send the correct process and signal and * destroy the dnotify struct if it was not registered to receive multiple * events. */ static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *name, u32 cookie) { struct dnotify_mark *dn_mark; struct dnotify_struct *dn; struct dnotify_struct **prev; struct fown_struct *fown; __u32 test_mask = mask & ~FS_EVENT_ON_CHILD; /* not a dir, dnotify doesn't care */ if (!dir && !(mask & FS_ISDIR)) return 0; dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark); spin_lock(&inode_mark->lock); prev = &dn_mark->dn; while ((dn = *prev) != NULL) { if ((dn->dn_mask & test_mask) == 0) { prev = &dn->dn_next; continue; } fown = file_f_owner(dn->dn_filp); send_sigio(fown, dn->dn_fd, POLL_MSG); if (dn->dn_mask & FS_DN_MULTISHOT) prev = &dn->dn_next; else { *prev = dn->dn_next; kmem_cache_free(dnotify_struct_cache, dn); dnotify_recalc_inode_mask(inode_mark); } } spin_unlock(&inode_mark->lock); return 0; } static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) { struct dnotify_mark *dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); BUG_ON(dn_mark->dn); kmem_cache_free(dnotify_mark_cache, dn_mark); } static const struct fsnotify_ops dnotify_fsnotify_ops = { .handle_inode_event = dnotify_handle_event, .free_mark = dnotify_free_mark, }; /* * Called every time a file is closed. Looks first for a dnotify mark on the * inode. If one is found run all of the ->dn structures attached to that * mark for one relevant to this process closing the file and remove that * dnotify_struct. If that was the last dnotify_struct also remove the * fsnotify_mark. */ void dnotify_flush(struct file *filp, fl_owner_t id) { struct fsnotify_mark *fsn_mark; struct dnotify_mark *dn_mark; struct dnotify_struct *dn; struct dnotify_struct **prev; struct inode *inode; bool free = false; inode = file_inode(filp); if (!S_ISDIR(inode->i_mode)) return; fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); if (!fsn_mark) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); fsnotify_group_lock(dnotify_group); spin_lock(&fsn_mark->lock); prev = &dn_mark->dn; while ((dn = *prev) != NULL) { if ((dn->dn_owner == id) && (dn->dn_filp == filp)) { *prev = dn->dn_next; kmem_cache_free(dnotify_struct_cache, dn); dnotify_recalc_inode_mask(fsn_mark); break; } prev = &dn->dn_next; } spin_unlock(&fsn_mark->lock); /* nothing else could have found us thanks to the dnotify_groups mark_mutex */ if (dn_mark->dn == NULL) { fsnotify_detach_mark(fsn_mark); free = true; } fsnotify_group_unlock(dnotify_group); if (free) fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); } /* this conversion is done only at watch creation */ static __u32 convert_arg(unsigned int arg) { __u32 new_mask = FS_EVENT_ON_CHILD; if (arg & DN_MULTISHOT) new_mask |= FS_DN_MULTISHOT; if (arg & DN_DELETE) new_mask |= (FS_DELETE | FS_MOVED_FROM); if (arg & DN_MODIFY) new_mask |= FS_MODIFY; if (arg & DN_ACCESS) new_mask |= FS_ACCESS; if (arg & DN_ATTRIB) new_mask |= FS_ATTRIB; if (arg & DN_RENAME) new_mask |= FS_RENAME; if (arg & DN_CREATE) new_mask |= (FS_CREATE | FS_MOVED_TO); return new_mask; } /* * If multiple processes watch the same inode with dnotify there is only one * dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct * onto that mark. This function either attaches the new dnotify_struct onto * that list, or it |= the mask onto an existing dnofiy_struct. */ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark, fl_owner_t id, int fd, struct file *filp, __u32 mask) { struct dnotify_struct *odn; odn = dn_mark->dn; while (odn != NULL) { /* adding more events to existing dnofiy_struct? */ if ((odn->dn_owner == id) && (odn->dn_filp == filp)) { odn->dn_fd = fd; odn->dn_mask |= mask; return -EEXIST; } odn = odn->dn_next; } dn->dn_mask = mask; dn->dn_fd = fd; dn->dn_filp = filp; dn->dn_owner = id; dn->dn_next = dn_mark->dn; dn_mark->dn = dn; return 0; } /* * When a process calls fcntl to attach a dnotify watch to a directory it ends * up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be * attached to the fsnotify_mark. */ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) { struct dnotify_mark *new_dn_mark, *dn_mark; struct fsnotify_mark *new_fsn_mark, *fsn_mark; struct dnotify_struct *dn; struct inode *inode; fl_owner_t id = current->files; struct file *f = NULL; int destroy = 0, error = 0; __u32 mask; /* we use these to tell if we need to kfree */ new_fsn_mark = NULL; dn = NULL; if (!dir_notify_enable) { error = -EINVAL; goto out_err; } /* a 0 mask means we are explicitly removing the watch */ if ((arg & ~DN_MULTISHOT) == 0) { dnotify_flush(filp, id); error = 0; goto out_err; } /* dnotify only works on directories */ inode = file_inode(filp); if (!S_ISDIR(inode->i_mode)) { error = -ENOTDIR; goto out_err; } /* * convert the userspace DN_* "arg" to the internal FS_* * defined in fsnotify */ mask = convert_arg(arg); error = security_path_notify(&filp->f_path, mask, FSNOTIFY_OBJ_TYPE_INODE); if (error) goto out_err; /* expect most fcntl to add new rather than augment old */ dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL); if (!dn) { error = -ENOMEM; goto out_err; } error = file_f_owner_allocate(filp); if (error) goto out_err; /* new fsnotify mark, we expect most fcntl calls to add a new mark */ new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL); if (!new_dn_mark) { error = -ENOMEM; goto out_err; } /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; fsnotify_init_mark(new_fsn_mark, dnotify_group); new_fsn_mark->mask = mask; new_dn_mark->dn = NULL; /* this is needed to prevent the fcntl/close race described below */ fsnotify_group_lock(dnotify_group); /* add the new_fsn_mark or find an old one. */ fsn_mark = fsnotify_find_inode_mark(inode, dnotify_group); if (fsn_mark) { dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0); if (error) { fsnotify_group_unlock(dnotify_group); goto out_err; } spin_lock(&new_fsn_mark->lock); fsn_mark = new_fsn_mark; dn_mark = new_dn_mark; /* we used new_fsn_mark, so don't free it */ new_fsn_mark = NULL; } f = fget_raw(fd); /* if (f != filp) means that we lost a race and another task/thread * actually closed the fd we are still playing with before we grabbed * the dnotify_groups mark_mutex and fsn_mark->lock. Since closing the * fd is the only time we clean up the marks we need to get our mark * off the list. */ if (f != filp) { /* if we added ourselves, shoot ourselves, it's possible that * the flush actually did shoot this fsn_mark. That's fine too * since multiple calls to destroy_mark is perfectly safe, if * we found a dn_mark already attached to the inode, just sod * off silently as the flush at close time dealt with it. */ if (dn_mark == new_dn_mark) destroy = 1; error = 0; goto out; } __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0); error = attach_dn(dn, dn_mark, id, fd, filp, mask); /* !error means that we attached the dn to the dn_mark, so don't free it */ if (!error) dn = NULL; /* -EEXIST means that we didn't add this new dn and used an old one. * that isn't an error (and the unused dn should be freed) */ else if (error == -EEXIST) error = 0; dnotify_recalc_inode_mask(fsn_mark); out: spin_unlock(&fsn_mark->lock); if (destroy) fsnotify_detach_mark(fsn_mark); fsnotify_group_unlock(dnotify_group); if (destroy) fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); out_err: if (new_fsn_mark) fsnotify_put_mark(new_fsn_mark); if (dn) kmem_cache_free(dnotify_struct_cache, dn); if (f) fput(f); return error; } static int __init dnotify_init(void) { dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC|SLAB_ACCOUNT); dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, 0); if (IS_ERR(dnotify_group)) panic("unable to allocate fsnotify group for dnotify\n"); dnotify_sysctl_init(); return 0; } module_init(dnotify_init)
1 1 5 4 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 // SPDX-License-Identifier: GPL-2.0 /* * (C) Copyright 2002-2004, 2007 Greg Kroah-Hartman <greg@kroah.com> * (C) Copyright 2007 Novell Inc. */ #include <linux/pci.h> #include <linux/module.h> #include <linux/init.h> #include <linux/device.h> #include <linux/mempolicy.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/sched/isolation.h> #include <linux/cpu.h> #include <linux/pm_runtime.h> #include <linux/suspend.h> #include <linux/kexec.h> #include <linux/of_device.h> #include <linux/acpi.h> #include <linux/dma-map-ops.h> #include <linux/iommu.h> #include "pci.h" #include "pcie/portdrv.h" struct pci_dynid { struct list_head node; struct pci_device_id id; }; /** * pci_add_dynid - add a new PCI device ID to this driver and re-probe devices * @drv: target pci driver * @vendor: PCI vendor ID * @device: PCI device ID * @subvendor: PCI subvendor ID * @subdevice: PCI subdevice ID * @class: PCI class * @class_mask: PCI class mask * @driver_data: private driver data * * Adds a new dynamic pci device ID to this driver and causes the * driver to probe for all devices again. @drv must have been * registered prior to calling this function. * * CONTEXT: * Does GFP_KERNEL allocation. * * RETURNS: * 0 on success, -errno on failure. */ int pci_add_dynid(struct pci_driver *drv, unsigned int vendor, unsigned int device, unsigned int subvendor, unsigned int subdevice, unsigned int class, unsigned int class_mask, unsigned long driver_data) { struct pci_dynid *dynid; dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); if (!dynid) return -ENOMEM; dynid->id.vendor = vendor; dynid->id.device = device; dynid->id.subvendor = subvendor; dynid->id.subdevice = subdevice; dynid->id.class = class; dynid->id.class_mask = class_mask; dynid->id.driver_data = driver_data; spin_lock(&drv->dynids.lock); list_add_tail(&dynid->node, &drv->dynids.list); spin_unlock(&drv->dynids.lock); return driver_attach(&drv->driver); } EXPORT_SYMBOL_GPL(pci_add_dynid); static void pci_free_dynids(struct pci_driver *drv) { struct pci_dynid *dynid, *n; spin_lock(&drv->dynids.lock); list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { list_del(&dynid->node); kfree(dynid); } spin_unlock(&drv->dynids.lock); } /** * pci_match_id - See if a PCI device matches a given pci_id table * @ids: array of PCI device ID structures to search in * @dev: the PCI device structure to match against. * * Used by a driver to check whether a PCI device is in its list of * supported devices. Returns the matching pci_device_id structure or * %NULL if there is no match. * * Deprecated; don't use this as it will not catch any dynamic IDs * that a driver might want to check for. */ const struct pci_device_id *pci_match_id(const struct pci_device_id *ids, struct pci_dev *dev) { if (ids) { while (ids->vendor || ids->subvendor || ids->class_mask) { if (pci_match_one_device(ids, dev)) return ids; ids++; } } return NULL; } EXPORT_SYMBOL(pci_match_id); static const struct pci_device_id pci_device_id_any = { .vendor = PCI_ANY_ID, .device = PCI_ANY_ID, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }; /** * pci_match_device - See if a device matches a driver's list of IDs * @drv: the PCI driver to match against * @dev: the PCI device structure to match against * * Used by a driver to check whether a PCI device is in its list of * supported devices or in the dynids list, which may have been augmented * via the sysfs "new_id" file. Returns the matching pci_device_id * structure or %NULL if there is no match. */ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, struct pci_dev *dev) { struct pci_dynid *dynid; const struct pci_device_id *found_id = NULL, *ids; /* When driver_override is set, only bind to the matching driver */ if (dev->driver_override && strcmp(dev->driver_override, drv->name)) return NULL; /* Look at the dynamic ids first, before the static ones */ spin_lock(&drv->dynids.lock); list_for_each_entry(dynid, &drv->dynids.list, node) { if (pci_match_one_device(&dynid->id, dev)) { found_id = &dynid->id; break; } } spin_unlock(&drv->dynids.lock); if (found_id) return found_id; for (ids = drv->id_table; (found_id = pci_match_id(ids, dev)); ids = found_id + 1) { /* * The match table is split based on driver_override. * In case override_only was set, enforce driver_override * matching. */ if (found_id->override_only) { if (dev->driver_override) return found_id; } else { return found_id; } } /* driver_override will always match, send a dummy id */ if (dev->driver_override) return &pci_device_id_any; return NULL; } /** * new_id_store - sysfs frontend to pci_add_dynid() * @driver: target device driver * @buf: buffer for scanning device ID data * @count: input size * * Allow PCI IDs to be added to an existing driver via sysfs. */ static ssize_t new_id_store(struct device_driver *driver, const char *buf, size_t count) { struct pci_driver *pdrv = to_pci_driver(driver); const struct pci_device_id *ids = pdrv->id_table; u32 vendor, device, subvendor = PCI_ANY_ID, subdevice = PCI_ANY_ID, class = 0, class_mask = 0; unsigned long driver_data = 0; int fields; int retval = 0; fields = sscanf(buf, "%x %x %x %x %x %x %lx", &vendor, &device, &subvendor, &subdevice, &class, &class_mask, &driver_data); if (fields < 2) return -EINVAL; if (fields != 7) { struct pci_dev *pdev = kzalloc(sizeof(*pdev), GFP_KERNEL); if (!pdev) return -ENOMEM; pdev->vendor = vendor; pdev->device = device; pdev->subsystem_vendor = subvendor; pdev->subsystem_device = subdevice; pdev->class = class; if (pci_match_device(pdrv, pdev)) retval = -EEXIST; kfree(pdev); if (retval) return retval; } /* Only accept driver_data values that match an existing id_table entry */ if (ids) { retval = -EINVAL; while (ids->vendor || ids->subvendor || ids->class_mask) { if (driver_data == ids->driver_data) { retval = 0; break; } ids++; } if (retval) /* No match */ return retval; } retval = pci_add_dynid(pdrv, vendor, device, subvendor, subdevice, class, class_mask, driver_data); if (retval) return retval; return count; } static DRIVER_ATTR_WO(new_id); /** * remove_id_store - remove a PCI device ID from this driver * @driver: target device driver * @buf: buffer for scanning device ID data * @count: input size * * Removes a dynamic pci device ID to this driver. */ static ssize_t remove_id_store(struct device_driver *driver, const char *buf, size_t count) { struct pci_dynid *dynid, *n; struct pci_driver *pdrv = to_pci_driver(driver); u32 vendor, device, subvendor = PCI_ANY_ID, subdevice = PCI_ANY_ID, class = 0, class_mask = 0; int fields; size_t retval = -ENODEV; fields = sscanf(buf, "%x %x %x %x %x %x", &vendor, &device, &subvendor, &subdevice, &class, &class_mask); if (fields < 2) return -EINVAL; spin_lock(&pdrv->dynids.lock); list_for_each_entry_safe(dynid, n, &pdrv->dynids.list, node) { struct pci_device_id *id = &dynid->id; if ((id->vendor == vendor) && (id->device == device) && (subvendor == PCI_ANY_ID || id->subvendor == subvendor) && (subdevice == PCI_ANY_ID || id->subdevice == subdevice) && !((id->class ^ class) & class_mask)) { list_del(&dynid->node); kfree(dynid); retval = count; break; } } spin_unlock(&pdrv->dynids.lock); return retval; } static DRIVER_ATTR_WO(remove_id); static struct attribute *pci_drv_attrs[] = { &driver_attr_new_id.attr, &driver_attr_remove_id.attr, NULL, }; ATTRIBUTE_GROUPS(pci_drv); struct drv_dev_and_id { struct pci_driver *drv; struct pci_dev *dev; const struct pci_device_id *id; }; static long local_pci_probe(void *_ddi) { struct drv_dev_and_id *ddi = _ddi; struct pci_dev *pci_dev = ddi->dev; struct pci_driver *pci_drv = ddi->drv; struct device *dev = &pci_dev->dev; int rc; /* * Unbound PCI devices are always put in D0, regardless of * runtime PM status. During probe, the device is set to * active and the usage count is incremented. If the driver * supports runtime PM, it should call pm_runtime_put_noidle(), * or any other runtime PM helper function decrementing the usage * count, in its probe routine and pm_runtime_get_noresume() in * its remove routine. */ pm_runtime_get_sync(dev); pci_dev->driver = pci_drv; rc = pci_drv->probe(pci_dev, ddi->id); if (!rc) return rc; if (rc < 0) { pci_dev->driver = NULL; pm_runtime_put_sync(dev); return rc; } /* * Probe function should return < 0 for failure, 0 for success * Treat values > 0 as success, but warn. */ pci_warn(pci_dev, "Driver probe function unexpectedly returned %d\n", rc); return 0; } static bool pci_physfn_is_probed(struct pci_dev *dev) { #ifdef CONFIG_PCI_IOV return dev->is_virtfn && dev->physfn->is_probed; #else return false; #endif } static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, const struct pci_device_id *id) { int error, node, cpu; struct drv_dev_and_id ddi = { drv, dev, id }; /* * Execute driver initialization on node where the device is * attached. This way the driver likely allocates its local memory * on the right node. */ node = dev_to_node(&dev->dev); dev->is_probed = 1; cpu_hotplug_disable(); /* * Prevent nesting work_on_cpu() for the case where a Virtual Function * device is probed from work_on_cpu() of the Physical device. */ if (node < 0 || node >= MAX_NUMNODES || !node_online(node) || pci_physfn_is_probed(dev)) { cpu = nr_cpu_ids; } else { cpumask_var_t wq_domain_mask; if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) { error = -ENOMEM; goto out; } cpumask_and(wq_domain_mask, housekeeping_cpumask(HK_TYPE_WQ), housekeeping_cpumask(HK_TYPE_DOMAIN)); cpu = cpumask_any_and(cpumask_of_node(node), wq_domain_mask); free_cpumask_var(wq_domain_mask); } if (cpu < nr_cpu_ids) error = work_on_cpu(cpu, local_pci_probe, &ddi); else error = local_pci_probe(&ddi); out: dev->is_probed = 0; cpu_hotplug_enable(); return error; } /** * __pci_device_probe - check if a driver wants to claim a specific PCI device * @drv: driver to call to check if it wants the PCI device * @pci_dev: PCI device being probed * * returns 0 on success, else error. * side-effect: pci_dev->driver is set to drv when drv claims pci_dev. */ static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev) { const struct pci_device_id *id; int error = 0; if (drv->probe) { error = -ENODEV; id = pci_match_device(drv, pci_dev); if (id) error = pci_call_probe(drv, pci_dev, id); } return error; } #ifdef CONFIG_PCI_IOV static inline bool pci_device_can_probe(struct pci_dev *pdev) { return (!pdev->is_virtfn || pdev->physfn->sriov->drivers_autoprobe || pdev->driver_override); } #else static inline bool pci_device_can_probe(struct pci_dev *pdev) { return true; } #endif static int pci_device_probe(struct device *dev) { int error; struct pci_dev *pci_dev = to_pci_dev(dev); struct pci_driver *drv = to_pci_driver(dev->driver); if (!pci_device_can_probe(pci_dev)) return -ENODEV; pci_assign_irq(pci_dev); error = pcibios_alloc_irq(pci_dev); if (error < 0) return error; pci_dev_get(pci_dev); error = __pci_device_probe(drv, pci_dev); if (error) { pcibios_free_irq(pci_dev); pci_dev_put(pci_dev); } return error; } static void pci_device_remove(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct pci_driver *drv = pci_dev->driver; if (drv->remove) { pm_runtime_get_sync(dev); /* * If the driver provides a .runtime_idle() callback and it has * started to run already, it may continue to run in parallel * with the code below, so wait until all of the runtime PM * activity has completed. */ pm_runtime_barrier(dev); drv->remove(pci_dev); pm_runtime_put_noidle(dev); } pcibios_free_irq(pci_dev); pci_dev->driver = NULL; pci_iov_remove(pci_dev); /* Undo the runtime PM settings in local_pci_probe() */ pm_runtime_put_sync(dev); /* * If the device is still on, set the power state as "unknown", * since it might change by the next time we load the driver. */ if (pci_dev->current_state == PCI_D0) pci_dev->current_state = PCI_UNKNOWN; /* * We would love to complain here if pci_dev->is_enabled is set, that * the driver should have called pci_disable_device(), but the * unfortunate fact is there are too many odd BIOS and bridge setups * that don't like drivers doing that all of the time. * Oh well, we can dream of sane hardware when we sleep, no matter how * horrible the crap we have to deal with is when we are awake... */ pci_dev_put(pci_dev); } static void pci_device_shutdown(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct pci_driver *drv = pci_dev->driver; pm_runtime_resume(dev); if (drv && drv->shutdown) drv->shutdown(pci_dev); /* * If this is a kexec reboot, turn off Bus Master bit on the * device to tell it to not continue to do DMA. Don't touch * devices in D3cold or unknown states. * If it is not a kexec reboot, firmware will hit the PCI * devices with big hammer and stop their DMA any way. */ if (kexec_in_progress && (pci_dev->current_state <= PCI_D3hot)) pci_clear_master(pci_dev); } #ifdef CONFIG_PM_SLEEP /* Auxiliary functions used for system resume */ /** * pci_restore_standard_config - restore standard config registers of PCI device * @pci_dev: PCI device to handle */ static int pci_restore_standard_config(struct pci_dev *pci_dev) { pci_update_current_state(pci_dev, PCI_UNKNOWN); if (pci_dev->current_state != PCI_D0) { int error = pci_set_power_state(pci_dev, PCI_D0); if (error) return error; } pci_restore_state(pci_dev); pci_pme_restore(pci_dev); return 0; } #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM /* Auxiliary functions used for system resume and run-time resume */ static void pci_pm_default_resume(struct pci_dev *pci_dev) { pci_fixup_device(pci_fixup_resume, pci_dev); pci_enable_wake(pci_dev, PCI_D0, false); } static void pci_pm_default_resume_early(struct pci_dev *pci_dev) { pci_pm_power_up_and_verify_state(pci_dev); pci_restore_state(pci_dev); pci_pme_restore(pci_dev); } static void pci_pm_bridge_power_up_actions(struct pci_dev *pci_dev) { int ret; ret = pci_bridge_wait_for_secondary_bus(pci_dev, "resume"); if (ret) { /* * The downstream link failed to come up, so mark the * devices below as disconnected to make sure we don't * attempt to resume them. */ pci_walk_bus(pci_dev->subordinate, pci_dev_set_disconnected, NULL); return; } /* * When powering on a bridge from D3cold, the whole hierarchy may be * powered on into D0uninitialized state, resume them to give them a * chance to suspend again */ pci_resume_bus(pci_dev->subordinate); } #endif /* CONFIG_PM */ #ifdef CONFIG_PM_SLEEP /* * Default "suspend" method for devices that have no driver provided suspend, * or not even a driver at all (second part). */ static void pci_pm_set_unknown_state(struct pci_dev *pci_dev) { /* * mark its power state as "unknown", since we don't know if * e.g. the BIOS will change its device state when we suspend. */ if (pci_dev->current_state == PCI_D0) pci_dev->current_state = PCI_UNKNOWN; } /* * Default "resume" method for devices that have no driver provided resume, * or not even a driver at all (second part). */ static int pci_pm_reenable_device(struct pci_dev *pci_dev) { int retval; /* if the device was enabled before suspend, re-enable */ retval = pci_reenable_device(pci_dev); /* * if the device was busmaster before the suspend, make it busmaster * again */ if (pci_dev->is_busmaster) pci_set_master(pci_dev); return retval; } static int pci_legacy_suspend(struct device *dev, pm_message_t state) { struct pci_dev *pci_dev = to_pci_dev(dev); struct pci_driver *drv = pci_dev->driver; if (drv && drv->suspend) { pci_power_t prev = pci_dev->current_state; int error; error = drv->suspend(pci_dev, state); suspend_report_result(dev, drv->suspend, error); if (error) return error; if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { pci_WARN_ONCE(pci_dev, pci_dev->current_state != prev, "PCI PM: Device state not saved by %pS\n", drv->suspend); } } pci_fixup_device(pci_fixup_suspend, pci_dev); return 0; } static int pci_legacy_suspend_late(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); if (!pci_dev->state_saved) pci_save_state(pci_dev); pci_pm_set_unknown_state(pci_dev); pci_fixup_device(pci_fixup_suspend_late, pci_dev); return 0; } static int pci_legacy_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct pci_driver *drv = pci_dev->driver; pci_fixup_device(pci_fixup_resume, pci_dev); return drv && drv->resume ? drv->resume(pci_dev) : pci_pm_reenable_device(pci_dev); } /* Auxiliary functions used by the new power management framework */ static void pci_pm_default_suspend(struct pci_dev *pci_dev) { /* Disable non-bridge devices without PM support */ if (!pci_has_subordinate(pci_dev)) pci_disable_enabled_device(pci_dev); } static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) { struct pci_driver *drv = pci_dev->driver; bool ret = drv && (drv->suspend || drv->resume); /* * Legacy PM support is used by default, so warn if the new framework is * supported as well. Drivers are supposed to support either the * former, or the latter, but not both at the same time. */ pci_WARN(pci_dev, ret && drv->driver.pm, "device %04x:%04x\n", pci_dev->vendor, pci_dev->device); return ret; } /* New power management framework */ static int pci_pm_prepare(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; dev_pm_set_strict_midlayer(dev, true); if (pm && pm->prepare) { int error = pm->prepare(dev); if (error < 0) return error; if (!error && dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_PREPARE)) return 0; } if (pci_dev_need_resume(pci_dev)) return 0; /* * The PME setting needs to be adjusted here in case the direct-complete * optimization is used with respect to this device. */ pci_dev_adjust_pme(pci_dev); return 1; } static void pci_pm_complete(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); pci_dev_complete_resume(pci_dev); pm_generic_complete(dev); /* Resume device if platform firmware has put it in reset-power-on */ if (pm_runtime_suspended(dev) && pm_resume_via_firmware()) { pci_power_t pre_sleep_state = pci_dev->current_state; pci_refresh_power_state(pci_dev); /* * On platforms with ACPI this check may also trigger for * devices sharing power resources if one of those power * resources has been activated as a result of a change of the * power state of another device sharing it. However, in that * case it is also better to resume the device, in general. */ if (pci_dev->current_state < pre_sleep_state) pm_request_resume(dev); } dev_pm_set_strict_midlayer(dev, false); } #else /* !CONFIG_PM_SLEEP */ #define pci_pm_prepare NULL #define pci_pm_complete NULL #endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_SUSPEND static void pcie_pme_root_status_cleanup(struct pci_dev *pci_dev) { /* * Some BIOSes forget to clear Root PME Status bits after system * wakeup, which breaks ACPI-based runtime wakeup on PCI Express. * Clear those bits now just in case (shouldn't hurt). */ if (pci_is_pcie(pci_dev) && (pci_pcie_type(pci_dev) == PCI_EXP_TYPE_ROOT_PORT || pci_pcie_type(pci_dev) == PCI_EXP_TYPE_RC_EC)) pcie_clear_root_pme_status(pci_dev); } static int pci_pm_suspend(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; pci_dev->skip_bus_pm = false; /* * Disabling PTM allows some systems, e.g., Intel mobile chips * since Coffee Lake, to enter a lower-power PM state. */ pci_suspend_ptm(pci_dev); if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_SUSPEND); if (!pm) { pci_pm_default_suspend(pci_dev); return 0; } /* * PCI devices suspended at run time may need to be resumed at this * point, because in general it may be necessary to reconfigure them for * system suspend. Namely, if the device is expected to wake up the * system from the sleep state, it may have to be reconfigured for this * purpose, or if the device is not expected to wake up the system from * the sleep state, it should be prevented from signaling wakeup events * going forward. * * Also if the driver of the device does not indicate that its system * suspend callbacks can cope with runtime-suspended devices, it is * better to resume the device from runtime suspend here. */ if (!dev_pm_smart_suspend(dev) || pci_dev_need_resume(pci_dev)) { pm_runtime_resume(dev); pci_dev->state_saved = false; } else { pci_dev_adjust_pme(pci_dev); } if (pm->suspend) { pci_power_t prev = pci_dev->current_state; int error; error = pm->suspend(dev); suspend_report_result(dev, pm->suspend, error); if (error) return error; if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { pci_WARN_ONCE(pci_dev, pci_dev->current_state != prev, "PCI PM: State of device not saved by %pS\n", pm->suspend); } } return 0; } static int pci_pm_suspend_late(struct device *dev) { if (dev_pm_skip_suspend(dev)) return 0; pci_fixup_device(pci_fixup_suspend, to_pci_dev(dev)); return pm_generic_suspend_late(dev); } static int pci_pm_suspend_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (dev_pm_skip_suspend(dev)) return 0; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev); if (!pm) { pci_save_state(pci_dev); goto Fixup; } if (pm->suspend_noirq) { pci_power_t prev = pci_dev->current_state; int error; error = pm->suspend_noirq(dev); suspend_report_result(dev, pm->suspend_noirq, error); if (error) return error; if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { pci_WARN_ONCE(pci_dev, pci_dev->current_state != prev, "PCI PM: State of device not saved by %pS\n", pm->suspend_noirq); goto Fixup; } } if (!pci_dev->state_saved) { pci_save_state(pci_dev); /* * If the device is a bridge with a child in D0 below it, * it needs to stay in D0, so check skip_bus_pm to avoid * putting it into a low-power state in that case. */ if (!pci_dev->skip_bus_pm && pci_power_manageable(pci_dev)) pci_prepare_to_sleep(pci_dev); } pci_dbg(pci_dev, "PCI PM: Suspend power state: %s\n", pci_power_name(pci_dev->current_state)); if (pci_dev->current_state == PCI_D0) { pci_dev->skip_bus_pm = true; /* * Per PCI PM r1.2, table 6-1, a bridge must be in D0 if any * downstream device is in D0, so avoid changing the power state * of the parent bridge by setting the skip_bus_pm flag for it. */ if (pci_dev->bus->self) pci_dev->bus->self->skip_bus_pm = true; } if (pci_dev->skip_bus_pm && pm_suspend_no_platform()) { pci_dbg(pci_dev, "PCI PM: Skipped\n"); goto Fixup; } pci_pm_set_unknown_state(pci_dev); /* * Some BIOSes from ASUS have a bug: If a USB EHCI host controller's * PCI COMMAND register isn't 0, the BIOS assumes that the controller * hasn't been quiesced and tries to turn it off. If the controller * is already in D3, this can hang or cause memory corruption. * * Since the value of the COMMAND register doesn't matter once the * device has been suspended, we can safely set it to 0 here. */ if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI) pci_write_config_word(pci_dev, PCI_COMMAND, 0); Fixup: pci_fixup_device(pci_fixup_suspend_late, pci_dev); /* * If the target system sleep state is suspend-to-idle, it is sufficient * to check whether or not the device's wakeup settings are good for * runtime PM. Otherwise, the pm_resume_via_firmware() check will cause * pci_pm_complete() to take care of fixing up the device's state * anyway, if need be. */ if (device_can_wakeup(dev) && !device_may_wakeup(dev)) dev->power.may_skip_resume = false; return 0; } static int pci_pm_resume_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; pci_power_t prev_state = pci_dev->current_state; bool skip_bus_pm = pci_dev->skip_bus_pm; if (dev_pm_skip_resume(dev)) return 0; /* * In the suspend-to-idle case, devices left in D0 during suspend will * stay in D0, so it is not necessary to restore or update their * configuration here and attempting to put them into D0 again is * pointless, so avoid doing that. */ if (!(skip_bus_pm && pm_suspend_no_platform())) pci_pm_default_resume_early(pci_dev); pci_fixup_device(pci_fixup_resume_early, pci_dev); pcie_pme_root_status_cleanup(pci_dev); if (!skip_bus_pm && prev_state == PCI_D3cold) pci_pm_bridge_power_up_actions(pci_dev); if (pci_has_legacy_pm_support(pci_dev)) return 0; if (pm && pm->resume_noirq) return pm->resume_noirq(dev); return 0; } static int pci_pm_resume_early(struct device *dev) { if (dev_pm_skip_resume(dev)) return 0; return pm_generic_resume_early(dev); } static int pci_pm_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; /* * This is necessary for the suspend error path in which resume is * called without restoring the standard config registers of the device. */ if (pci_dev->state_saved) pci_restore_standard_config(pci_dev); pci_resume_ptm(pci_dev); if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); pci_pm_default_resume(pci_dev); if (pm) { if (pm->resume) return pm->resume(dev); } else { pci_pm_reenable_device(pci_dev); } return 0; } #else /* !CONFIG_SUSPEND */ #define pci_pm_suspend NULL #define pci_pm_suspend_late NULL #define pci_pm_suspend_noirq NULL #define pci_pm_resume NULL #define pci_pm_resume_early NULL #define pci_pm_resume_noirq NULL #endif /* !CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATE_CALLBACKS static int pci_pm_freeze(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_FREEZE); if (!pm) { pci_pm_default_suspend(pci_dev); return 0; } /* * Resume all runtime-suspended devices before creating a snapshot * image of system memory, because the restore kernel generally cannot * be expected to always handle them consistently and they need to be * put into the runtime-active metastate during system resume anyway, * so it is better to ensure that the state saved in the image will be * always consistent with that. */ pm_runtime_resume(dev); pci_dev->state_saved = false; if (pm->freeze) { int error; error = pm->freeze(dev); suspend_report_result(dev, pm->freeze, error); if (error) return error; } return 0; } static int pci_pm_freeze_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev); if (pm && pm->freeze_noirq) { int error; error = pm->freeze_noirq(dev); suspend_report_result(dev, pm->freeze_noirq, error); if (error) return error; } if (!pci_dev->state_saved) pci_save_state(pci_dev); pci_pm_set_unknown_state(pci_dev); return 0; } static int pci_pm_thaw_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; /* * The pm->thaw_noirq() callback assumes the device has been * returned to D0 and its config state has been restored. * * In addition, pci_restore_state() restores MSI-X state in MMIO * space, which requires the device to be in D0, so return it to D0 * in case the driver's "freeze" callbacks put it into a low-power * state. */ pci_pm_power_up_and_verify_state(pci_dev); pci_restore_state(pci_dev); if (pci_has_legacy_pm_support(pci_dev)) return 0; if (pm && pm->thaw_noirq) return pm->thaw_noirq(dev); return 0; } static int pci_pm_thaw(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); if (pm) { if (pm->thaw) error = pm->thaw(dev); } else { pci_pm_reenable_device(pci_dev); } pci_dev->state_saved = false; return error; } static int pci_pm_poweroff(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_HIBERNATE); if (!pm) { pci_pm_default_suspend(pci_dev); return 0; } /* The reason to do that is the same as in pci_pm_suspend(). */ if (!dev_pm_smart_suspend(dev) || pci_dev_need_resume(pci_dev)) { pm_runtime_resume(dev); pci_dev->state_saved = false; } else { pci_dev_adjust_pme(pci_dev); } if (pm->poweroff) { int error; error = pm->poweroff(dev); suspend_report_result(dev, pm->poweroff, error); if (error) return error; } return 0; } static int pci_pm_poweroff_late(struct device *dev) { if (dev_pm_skip_suspend(dev)) return 0; pci_fixup_device(pci_fixup_suspend, to_pci_dev(dev)); return pm_generic_poweroff_late(dev); } static int pci_pm_poweroff_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (dev_pm_skip_suspend(dev)) return 0; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev); if (!pm) { pci_fixup_device(pci_fixup_suspend_late, pci_dev); return 0; } if (pm->poweroff_noirq) { int error; error = pm->poweroff_noirq(dev); suspend_report_result(dev, pm->poweroff_noirq, error); if (error) return error; } if (!pci_dev->state_saved && !pci_has_subordinate(pci_dev)) pci_prepare_to_sleep(pci_dev); /* * The reason for doing this here is the same as for the analogous code * in pci_pm_suspend_noirq(). */ if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI) pci_write_config_word(pci_dev, PCI_COMMAND, 0); pci_fixup_device(pci_fixup_suspend_late, pci_dev); return 0; } static int pci_pm_restore_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; pci_pm_default_resume_early(pci_dev); pci_fixup_device(pci_fixup_resume_early, pci_dev); if (pci_has_legacy_pm_support(pci_dev)) return 0; if (pm && pm->restore_noirq) return pm->restore_noirq(dev); return 0; } static int pci_pm_restore(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; /* * This is necessary for the hibernation error path in which restore is * called without restoring the standard config registers of the device. */ if (pci_dev->state_saved) pci_restore_standard_config(pci_dev); if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); pci_pm_default_resume(pci_dev); if (pm) { if (pm->restore) return pm->restore(dev); } else { pci_pm_reenable_device(pci_dev); } return 0; } #else /* !CONFIG_HIBERNATE_CALLBACKS */ #define pci_pm_freeze NULL #define pci_pm_freeze_noirq NULL #define pci_pm_thaw NULL #define pci_pm_thaw_noirq NULL #define pci_pm_poweroff NULL #define pci_pm_poweroff_late NULL #define pci_pm_poweroff_noirq NULL #define pci_pm_restore NULL #define pci_pm_restore_noirq NULL #endif /* !CONFIG_HIBERNATE_CALLBACKS */ #ifdef CONFIG_PM static int pci_pm_runtime_suspend(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; pci_power_t prev = pci_dev->current_state; int error; pci_suspend_ptm(pci_dev); /* * If pci_dev->driver is not set (unbound), we leave the device in D0, * but it may go to D3cold when the bridge above it runtime suspends. * Save its config space in case that happens. */ if (!pci_dev->driver) { pci_save_state(pci_dev); return 0; } pci_dev->state_saved = false; if (pm && pm->runtime_suspend) { error = pm->runtime_suspend(dev); /* * -EBUSY and -EAGAIN is used to request the runtime PM core * to schedule a new suspend, so log the event only with debug * log level. */ if (error == -EBUSY || error == -EAGAIN) { pci_dbg(pci_dev, "can't suspend now (%ps returned %d)\n", pm->runtime_suspend, error); return error; } else if (error) { pci_err(pci_dev, "can't suspend (%ps returned %d)\n", pm->runtime_suspend, error); return error; } } pci_fixup_device(pci_fixup_suspend, pci_dev); if (pm && pm->runtime_suspend && !pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { pci_WARN_ONCE(pci_dev, pci_dev->current_state != prev, "PCI PM: State of device not saved by %pS\n", pm->runtime_suspend); return 0; } if (!pci_dev->state_saved) { pci_save_state(pci_dev); pci_finish_runtime_suspend(pci_dev); } return 0; } static int pci_pm_runtime_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; pci_power_t prev_state = pci_dev->current_state; int error = 0; /* * Restoring config space is necessary even if the device is not bound * to a driver because although we left it in D0, it may have gone to * D3cold when the bridge above it runtime suspended. */ pci_pm_default_resume_early(pci_dev); pci_resume_ptm(pci_dev); if (!pci_dev->driver) return 0; pci_fixup_device(pci_fixup_resume_early, pci_dev); pci_pm_default_resume(pci_dev); if (prev_state == PCI_D3cold) pci_pm_bridge_power_up_actions(pci_dev); if (pm && pm->runtime_resume) error = pm->runtime_resume(dev); return error; } static int pci_pm_runtime_idle(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; /* * If pci_dev->driver is not set (unbound), the device should * always remain in D0 regardless of the runtime PM status */ if (!pci_dev->driver) return 0; if (pm && pm->runtime_idle) return pm->runtime_idle(dev); return 0; } static const struct dev_pm_ops pci_dev_pm_ops = { .prepare = pci_pm_prepare, .complete = pci_pm_complete, .suspend = pci_pm_suspend, .suspend_late = pci_pm_suspend_late, .resume = pci_pm_resume, .resume_early = pci_pm_resume_early, .freeze = pci_pm_freeze, .thaw = pci_pm_thaw, .poweroff = pci_pm_poweroff, .poweroff_late = pci_pm_poweroff_late, .restore = pci_pm_restore, .suspend_noirq = pci_pm_suspend_noirq, .resume_noirq = pci_pm_resume_noirq, .freeze_noirq = pci_pm_freeze_noirq, .thaw_noirq = pci_pm_thaw_noirq, .poweroff_noirq = pci_pm_poweroff_noirq, .restore_noirq = pci_pm_restore_noirq, .runtime_suspend = pci_pm_runtime_suspend, .runtime_resume = pci_pm_runtime_resume, .runtime_idle = pci_pm_runtime_idle, }; #define PCI_PM_OPS_PTR (&pci_dev_pm_ops) #else /* !CONFIG_PM */ #define pci_pm_runtime_suspend NULL #define pci_pm_runtime_resume NULL #define pci_pm_runtime_idle NULL #define PCI_PM_OPS_PTR NULL #endif /* !CONFIG_PM */ /** * __pci_register_driver - register a new pci driver * @drv: the driver structure to register * @owner: owner module of drv * @mod_name: module name string * * Adds the driver structure to the list of registered drivers. * Returns a negative value on error, otherwise 0. * If no error occurred, the driver remains registered even if * no device was claimed during registration. */ int __pci_register_driver(struct pci_driver *drv, struct module *owner, const char *mod_name) { /* initialize common driver fields */ drv->driver.name = drv->name; drv->driver.bus = &pci_bus_type; drv->driver.owner = owner; drv->driver.mod_name = mod_name; drv->driver.groups = drv->groups; drv->driver.dev_groups = drv->dev_groups; spin_lock_init(&drv->dynids.lock); INIT_LIST_HEAD(&drv->dynids.list); /* register with core */ return driver_register(&drv->driver); } EXPORT_SYMBOL(__pci_register_driver); /** * pci_unregister_driver - unregister a pci driver * @drv: the driver structure to unregister * * Deletes the driver structure from the list of registered PCI drivers, * gives it a chance to clean up by calling its remove() function for * each device it was responsible for, and marks those devices as * driverless. */ void pci_unregister_driver(struct pci_driver *drv) { driver_unregister(&drv->driver); pci_free_dynids(drv); } EXPORT_SYMBOL(pci_unregister_driver); static struct pci_driver pci_compat_driver = { .name = "compat" }; /** * pci_dev_driver - get the pci_driver of a device * @dev: the device to query * * Returns the appropriate pci_driver structure or %NULL if there is no * registered driver for the device. */ struct pci_driver *pci_dev_driver(const struct pci_dev *dev) { int i; if (dev->driver) return dev->driver; for (i = 0; i <= PCI_ROM_RESOURCE; i++) if (dev->resource[i].flags & IORESOURCE_BUSY) return &pci_compat_driver; return NULL; } EXPORT_SYMBOL(pci_dev_driver); /** * pci_bus_match - Tell if a PCI device structure has a matching PCI device id structure * @dev: the PCI device structure to match against * @drv: the device driver to search for matching PCI device id structures * * Used by a driver to check whether a PCI device present in the * system is in its list of supported devices. Returns the matching * pci_device_id structure or %NULL if there is no match. */ static int pci_bus_match(struct device *dev, const struct device_driver *drv) { struct pci_dev *pci_dev = to_pci_dev(dev); struct pci_driver *pci_drv; const struct pci_device_id *found_id; if (pci_dev_binding_disallowed(pci_dev)) return 0; pci_drv = (struct pci_driver *)to_pci_driver(drv); found_id = pci_match_device(pci_drv, pci_dev); if (found_id) return 1; return 0; } /** * pci_dev_get - increments the reference count of the pci device structure * @dev: the device being referenced * * Each live reference to a device should be refcounted. * * Drivers for PCI devices should normally record such references in * their probe() methods, when they bind to a device, and release * them by calling pci_dev_put(), in their disconnect() methods. * * A pointer to the device with the incremented reference counter is returned. */ struct pci_dev *pci_dev_get(struct pci_dev *dev) { if (dev) get_device(&dev->dev); return dev; } EXPORT_SYMBOL(pci_dev_get); /** * pci_dev_put - release a use of the pci device structure * @dev: device that's been disconnected * * Must be called when a user of a device is finished with it. When the last * user of the device calls this function, the memory of the device is freed. */ void pci_dev_put(struct pci_dev *dev) { if (dev) put_device(&dev->dev); } EXPORT_SYMBOL(pci_dev_put); static int pci_uevent(const struct device *dev, struct kobj_uevent_env *env) { const struct pci_dev *pdev; if (!dev) return -ENODEV; pdev = to_pci_dev(dev); if (add_uevent_var(env, "PCI_CLASS=%04X", pdev->class)) return -ENOMEM; if (add_uevent_var(env, "PCI_ID=%04X:%04X", pdev->vendor, pdev->device)) return -ENOMEM; if (add_uevent_var(env, "PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor, pdev->subsystem_device)) return -ENOMEM; if (add_uevent_var(env, "PCI_SLOT_NAME=%s", pci_name(pdev))) return -ENOMEM; if (add_uevent_var(env, "MODALIAS=pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02X", pdev->vendor, pdev->device, pdev->subsystem_vendor, pdev->subsystem_device, (u8)(pdev->class >> 16), (u8)(pdev->class >> 8), (u8)(pdev->class))) return -ENOMEM; return 0; } #if defined(CONFIG_PCIEAER) || defined(CONFIG_EEH) || defined(CONFIG_S390) /** * pci_uevent_ers - emit a uevent during recovery path of PCI device * @pdev: PCI device undergoing error recovery * @err_type: type of error event */ void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type) { int idx = 0; char *envp[3]; switch (err_type) { case PCI_ERS_RESULT_NONE: case PCI_ERS_RESULT_CAN_RECOVER: case PCI_ERS_RESULT_NEED_RESET: envp[idx++] = "ERROR_EVENT=BEGIN_RECOVERY"; envp[idx++] = "DEVICE_ONLINE=0"; break; case PCI_ERS_RESULT_RECOVERED: envp[idx++] = "ERROR_EVENT=SUCCESSFUL_RECOVERY"; envp[idx++] = "DEVICE_ONLINE=1"; break; case PCI_ERS_RESULT_DISCONNECT: envp[idx++] = "ERROR_EVENT=FAILED_RECOVERY"; envp[idx++] = "DEVICE_ONLINE=0"; break; default: break; } if (idx > 0) { envp[idx++] = NULL; kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, envp); } } #endif static int pci_bus_num_vf(struct device *dev) { return pci_num_vf(to_pci_dev(dev)); } /** * pci_dma_configure - Setup DMA configuration * @dev: ptr to dev structure * * Function to update PCI devices's DMA configuration using the same * info from the OF node or ACPI node of host bridge's parent (if any). */ static int pci_dma_configure(struct device *dev) { const struct device_driver *drv = READ_ONCE(dev->driver); struct device *bridge; int ret = 0; bridge = pci_get_host_bridge_device(to_pci_dev(dev)); if (IS_ENABLED(CONFIG_OF) && bridge->parent && bridge->parent->of_node) { ret = of_dma_configure(dev, bridge->parent->of_node, true); } else if (has_acpi_companion(bridge)) { struct acpi_device *adev = to_acpi_device_node(bridge->fwnode); ret = acpi_dma_configure(dev, acpi_get_dma_attr(adev)); } pci_put_host_bridge_device(bridge); /* @drv may not be valid when we're called from the IOMMU layer */ if (!ret && drv && !to_pci_driver(drv)->driver_managed_dma) { ret = iommu_device_use_default_domain(dev); if (ret) arch_teardown_dma_ops(dev); } return ret; } static void pci_dma_cleanup(struct device *dev) { struct pci_driver *driver = to_pci_driver(dev->driver); if (!driver->driver_managed_dma) iommu_device_unuse_default_domain(dev); } /* * pci_device_irq_get_affinity - get IRQ affinity mask for device * @dev: ptr to dev structure * @irq_vec: interrupt vector number * * Return the CPU affinity mask for @dev and @irq_vec. */ static const struct cpumask *pci_device_irq_get_affinity(struct device *dev, unsigned int irq_vec) { return pci_irq_get_affinity(to_pci_dev(dev), irq_vec); } const struct bus_type pci_bus_type = { .name = "pci", .match = pci_bus_match, .uevent = pci_uevent, .probe = pci_device_probe, .remove = pci_device_remove, .shutdown = pci_device_shutdown, .irq_get_affinity = pci_device_irq_get_affinity, .dev_groups = pci_dev_groups, .bus_groups = pci_bus_groups, .drv_groups = pci_drv_groups, .pm = PCI_PM_OPS_PTR, .num_vf = pci_bus_num_vf, .dma_configure = pci_dma_configure, .dma_cleanup = pci_dma_cleanup, }; EXPORT_SYMBOL(pci_bus_type); #ifdef CONFIG_PCIEPORTBUS static int pcie_port_bus_match(struct device *dev, const struct device_driver *drv) { struct pcie_device *pciedev; const struct pcie_port_service_driver *driver; if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type) return 0; pciedev = to_pcie_device(dev); driver = to_service_driver(drv); if (driver->service != pciedev->service) return 0; if (driver->port_type != PCIE_ANY_PORT && driver->port_type != pci_pcie_type(pciedev->port)) return 0; return 1; } const struct bus_type pcie_port_bus_type = { .name = "pci_express", .match = pcie_port_bus_match, }; #endif static int __init pci_driver_init(void) { int ret; ret = bus_register(&pci_bus_type); if (ret) return ret; #ifdef CONFIG_PCIEPORTBUS ret = bus_register(&pcie_port_bus_type); if (ret) return ret; #endif dma_debug_add_bus(&pci_bus_type); return 0; } postcore_initcall(pci_driver_init);
7034 7034 7085 427 784 143 284 783 143 284 783 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM net #if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NET_H #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/ip.h> #include <linux/tracepoint.h> TRACE_EVENT(net_dev_start_xmit, TP_PROTO(const struct sk_buff *skb, const struct net_device *dev), TP_ARGS(skb, dev), TP_STRUCT__entry( __string( name, dev->name ) __field( u16, queue_mapping ) __field( const void *, skbaddr ) __field( bool, vlan_tagged ) __field( u16, vlan_proto ) __field( u16, vlan_tci ) __field( u16, protocol ) __field( u8, ip_summed ) __field( unsigned int, len ) __field( unsigned int, data_len ) __field( int, network_offset ) __field( bool, transport_offset_valid) __field( int, transport_offset) __field( u8, tx_flags ) __field( u16, gso_size ) __field( u16, gso_segs ) __field( u16, gso_type ) ), TP_fast_assign( __assign_str(name); __entry->queue_mapping = skb->queue_mapping; __entry->skbaddr = skb; __entry->vlan_tagged = skb_vlan_tag_present(skb); __entry->vlan_proto = ntohs(skb->vlan_proto); __entry->vlan_tci = skb_vlan_tag_get(skb); __entry->protocol = ntohs(skb->protocol); __entry->ip_summed = skb->ip_summed; __entry->len = skb->len; __entry->data_len = skb->data_len; __entry->network_offset = skb_network_offset(skb); __entry->transport_offset_valid = skb_transport_header_was_set(skb); __entry->transport_offset = skb_transport_header_was_set(skb) ? skb_transport_offset(skb) : 0; __entry->tx_flags = skb_shinfo(skb)->tx_flags; __entry->gso_size = skb_shinfo(skb)->gso_size; __entry->gso_segs = skb_shinfo(skb)->gso_segs; __entry->gso_type = skb_shinfo(skb)->gso_type; ), TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", __get_str(name), __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->len, __entry->data_len, __entry->network_offset, __entry->transport_offset_valid, __entry->transport_offset, __entry->tx_flags, __entry->gso_size, __entry->gso_segs, __entry->gso_type) ); TRACE_EVENT(net_dev_xmit, TP_PROTO(struct sk_buff *skb, int rc, struct net_device *dev, unsigned int skb_len), TP_ARGS(skb, rc, dev, skb_len), TP_STRUCT__entry( __field( void *, skbaddr ) __field( unsigned int, len ) __field( int, rc ) __string( name, dev->name ) ), TP_fast_assign( __entry->skbaddr = skb; __entry->len = skb_len; __entry->rc = rc; __assign_str(name); ), TP_printk("dev=%s skbaddr=%p len=%u rc=%d", __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) ); TRACE_EVENT(net_dev_xmit_timeout, TP_PROTO(struct net_device *dev, int queue_index), TP_ARGS(dev, queue_index), TP_STRUCT__entry( __string( name, dev->name ) __string( driver, netdev_drivername(dev)) __field( int, queue_index ) ), TP_fast_assign( __assign_str(name); __assign_str(driver); __entry->queue_index = queue_index; ), TP_printk("dev=%s driver=%s queue=%d", __get_str(name), __get_str(driver), __entry->queue_index) ); DECLARE_EVENT_CLASS(net_dev_template, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb), TP_STRUCT__entry( __field( void *, skbaddr ) __field( unsigned int, len ) __string( name, skb->dev->name ) ), TP_fast_assign( __entry->skbaddr = skb; __entry->len = skb->len; __assign_str(name); ), TP_printk("dev=%s skbaddr=%p len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) DEFINE_EVENT(net_dev_template, net_dev_queue, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_template, netif_receive_skb, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_template, netif_rx, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb) ); DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb), TP_STRUCT__entry( __string( name, skb->dev->name ) __field( unsigned int, napi_id ) __field( u16, queue_mapping ) __field( const void *, skbaddr ) __field( bool, vlan_tagged ) __field( u16, vlan_proto ) __field( u16, vlan_tci ) __field( u16, protocol ) __field( u8, ip_summed ) __field( u32, hash ) __field( bool, l4_hash ) __field( unsigned int, len ) __field( unsigned int, data_len ) __field( unsigned int, truesize ) __field( bool, mac_header_valid) __field( int, mac_header ) __field( unsigned char, nr_frags ) __field( u16, gso_size ) __field( u16, gso_type ) ), TP_fast_assign( __assign_str(name); #ifdef CONFIG_NET_RX_BUSY_POLL __entry->napi_id = skb->napi_id; #else __entry->napi_id = 0; #endif __entry->queue_mapping = skb->queue_mapping; __entry->skbaddr = skb; __entry->vlan_tagged = skb_vlan_tag_present(skb); __entry->vlan_proto = ntohs(skb->vlan_proto); __entry->vlan_tci = skb_vlan_tag_get(skb); __entry->protocol = ntohs(skb->protocol); __entry->ip_summed = skb->ip_summed; __entry->hash = skb->hash; __entry->l4_hash = skb->l4_hash; __entry->len = skb->len; __entry->data_len = skb->data_len; __entry->truesize = skb->truesize; __entry->mac_header_valid = skb_mac_header_was_set(skb); __entry->mac_header = skb_mac_header(skb) - skb->data; __entry->nr_frags = skb_shinfo(skb)->nr_frags; __entry->gso_size = skb_shinfo(skb)->gso_size; __entry->gso_type = skb_shinfo(skb)->gso_type; ), TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", __get_str(name), __entry->napi_id, __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->hash, __entry->l4_hash, __entry->len, __entry->data_len, __entry->truesize, __entry->mac_header_valid, __entry->mac_header, __entry->nr_frags, __entry->gso_size, __entry->gso_type) ); DEFINE_EVENT(net_dev_rx_verbose_template, napi_gro_frags_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, napi_gro_receive_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_list_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DECLARE_EVENT_CLASS(net_dev_rx_exit_template, TP_PROTO(int ret), TP_ARGS(ret), TP_STRUCT__entry( __field(int, ret) ), TP_fast_assign( __entry->ret = ret; ), TP_printk("ret=%d", __entry->ret) ); DEFINE_EVENT(net_dev_rx_exit_template, napi_gro_frags_exit, TP_PROTO(int ret), TP_ARGS(ret) ); DEFINE_EVENT(net_dev_rx_exit_template, napi_gro_receive_exit, TP_PROTO(int ret), TP_ARGS(ret) ); DEFINE_EVENT(net_dev_rx_exit_template, netif_receive_skb_exit, TP_PROTO(int ret), TP_ARGS(ret) ); DEFINE_EVENT(net_dev_rx_exit_template, netif_rx_exit, TP_PROTO(int ret), TP_ARGS(ret) ); DEFINE_EVENT(net_dev_rx_exit_template, netif_receive_skb_list_exit, TP_PROTO(int ret), TP_ARGS(ret) ); #endif /* _TRACE_NET_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2021 pureLiFi */ #include <linux/kernel.h> #include <linux/errno.h> #include "chip.h" #include "mac.h" #include "usb.h" void plfxlc_chip_init(struct plfxlc_chip *chip, struct ieee80211_hw *hw, struct usb_interface *intf) { memset(chip, 0, sizeof(*chip)); mutex_init(&chip->mutex); plfxlc_usb_init(&chip->usb, hw, intf); } void plfxlc_chip_release(struct plfxlc_chip *chip) { plfxlc_usb_release(&chip->usb); mutex_destroy(&chip->mutex); } int plfxlc_set_beacon_interval(struct plfxlc_chip *chip, u16 interval, u8 dtim_period, int type) { if (!interval || (chip->beacon_set && chip->beacon_interval == interval)) return 0; chip->beacon_interval = interval; chip->beacon_set = true; return plfxlc_usb_wreq(chip->usb.ez_usb, &chip->beacon_interval, sizeof(chip->beacon_interval), USB_REQ_BEACON_INTERVAL_WR); } int plfxlc_chip_init_hw(struct plfxlc_chip *chip) { unsigned char *addr = plfxlc_mac_get_perm_addr(plfxlc_chip_to_mac(chip)); struct usb_device *udev = interface_to_usbdev(chip->usb.intf); pr_info("plfxlc chip %04x:%04x v%02x %pM %s\n", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct), le16_to_cpu(udev->descriptor.bcdDevice), addr, plfxlc_speed(udev->speed)); return plfxlc_set_beacon_interval(chip, 100, 0, 0); } int plfxlc_chip_switch_radio(struct plfxlc_chip *chip, u16 value) { int r; __le16 radio_on = cpu_to_le16(value); r = plfxlc_usb_wreq(chip->usb.ez_usb, &radio_on, sizeof(value), USB_REQ_POWER_WR); if (r) dev_err(plfxlc_chip_dev(chip), "POWER_WR failed (%d)\n", r); return r; } int plfxlc_chip_enable_rxtx(struct plfxlc_chip *chip) { plfxlc_usb_enable_tx(&chip->usb); return plfxlc_usb_enable_rx(&chip->usb); } void plfxlc_chip_disable_rxtx(struct plfxlc_chip *chip) { u8 value = 0; plfxlc_usb_wreq(chip->usb.ez_usb, &value, sizeof(value), USB_REQ_RXTX_WR); plfxlc_usb_disable_rx(&chip->usb); plfxlc_usb_disable_tx(&chip->usb); } int plfxlc_chip_set_rate(struct plfxlc_chip *chip, u8 rate) { int r; if (!chip) return -EINVAL; r = plfxlc_usb_wreq(chip->usb.ez_usb, &rate, sizeof(rate), USB_REQ_RATE_WR); if (r) dev_err(plfxlc_chip_dev(chip), "RATE_WR failed (%d)\n", r); return r; }
103 104 115 1 1 5 1 1 1 1 1 1 1 1 1 5 5 5 554 451 115 39 1 4 9 21 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 // SPDX-License-Identifier: GPL-2.0-or-later /* * G8BPQ compatible "AX.25 via ethernet" driver release 004 * * This code REQUIRES 2.0.0 or higher/ NET3.029 * * This is a "pseudo" network driver to allow AX.25 over Ethernet * using G8BPQ encapsulation. It has been extracted from the protocol * implementation because * * - things got unreadable within the protocol stack * - to cure the protocol stack from "feature-ism" * - a protocol implementation shouldn't need to know on * which hardware it is running * - user-level programs like the AX.25 utilities shouldn't * need to know about the hardware. * - IP over ethernet encapsulated AX.25 was impossible * - rxecho.c did not work * - to have room for extensions * - it just deserves to "live" as an own driver * * This driver can use any ethernet destination address, and can be * limited to accept frames from one dedicated ethernet card only. * * Note that the driver sets up the BPQ devices automagically on * startup or (if started before the "insmod" of an ethernet device) * on "ifconfig up". It hopefully will remove the BPQ on "rmmod"ing * the ethernet device (in fact: as soon as another ethernet or bpq * device gets "ifconfig"ured). * * I have heard that several people are thinking of experiments * with highspeed packet radio using existing ethernet cards. * Well, this driver is prepared for this purpose, just add * your tx key control and a txdelay / tailtime algorithm, * probably some buffering, and /voila/... * * History * BPQ 001 Joerg(DL1BKE) Extracted BPQ code from AX.25 * protocol stack and added my own * yet existing patches * BPQ 002 Joerg(DL1BKE) Scan network device list on * startup. * BPQ 003 Joerg(DL1BKE) Ethernet destination address * and accepted source address * can be configured by an ioctl() * call. * Fixed to match Linux networking * changes - 2.1.15. * BPQ 004 Joerg(DL1BKE) Fixed to not lock up on ifconfig. */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/net.h> #include <linux/slab.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/module.h> #include <linux/init.h> #include <linux/rtnetlink.h> #include <net/ip.h> #include <net/arp.h> #include <net/netdev_lock.h> #include <net/net_namespace.h> #include <linux/bpqether.h> static const char banner[] __initconst = KERN_INFO \ "AX.25: bpqether driver version 004\n"; static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static int bpq_device_event(struct notifier_block *, unsigned long, void *); static struct packet_type bpq_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_BPQ), .func = bpq_rcv, }; static struct notifier_block bpq_dev_notifier = { .notifier_call = bpq_device_event, }; struct bpqdev { struct list_head bpq_list; /* list of bpq devices chain */ struct net_device *ethdev; /* link to ethernet device */ struct net_device *axdev; /* bpq device (bpq#) */ char dest_addr[6]; /* ether destination address */ char acpt_addr[6]; /* accept ether frames from this address only */ }; static LIST_HEAD(bpq_devices); /* ------------------------------------------------------------------------ */ /* * Get the ethernet device for a BPQ device */ static inline struct net_device *bpq_get_ether_dev(struct net_device *dev) { struct bpqdev *bpq = netdev_priv(dev); return bpq ? bpq->ethdev : NULL; } /* * Get the BPQ device for the ethernet device */ static inline struct net_device *bpq_get_ax25_dev(struct net_device *dev) { struct bpqdev *bpq; list_for_each_entry_rcu(bpq, &bpq_devices, bpq_list, lockdep_rtnl_is_held()) { if (bpq->ethdev == dev) return bpq->axdev; } return NULL; } static inline int dev_is_ethdev(struct net_device *dev) { return dev->type == ARPHRD_ETHER && !netdev_need_ops_lock(dev); } /* ------------------------------------------------------------------------ */ /* * Receive an AX.25 frame via an ethernet interface. */ static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { int len; char * ptr; struct ethhdr *eth; struct bpqdev *bpq; if (!net_eq(dev_net(dev), &init_net)) goto drop; if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; if (!pskb_may_pull(skb, sizeof(struct ethhdr))) goto drop; rcu_read_lock(); dev = bpq_get_ax25_dev(dev); if (dev == NULL || !netif_running(dev)) goto drop_unlock; /* * if we want to accept frames from just one ethernet device * we check the source address of the sender. */ bpq = netdev_priv(dev); eth = eth_hdr(skb); if (!(bpq->acpt_addr[0] & 0x01) && !ether_addr_equal(eth->h_source, bpq->acpt_addr)) goto drop_unlock; if (skb_cow(skb, sizeof(struct ethhdr))) goto drop_unlock; len = skb->data[0] + skb->data[1] * 256 - 5; skb_pull(skb, 2); /* Remove the length bytes */ skb_trim(skb, len); /* Set the length of the data */ dev->stats.rx_packets++; dev->stats.rx_bytes += len; ptr = skb_push(skb, 1); *ptr = 0; skb->protocol = ax25_type_trans(skb, dev); netif_rx(skb); unlock: rcu_read_unlock(); return 0; drop_unlock: kfree_skb(skb); goto unlock; drop: kfree_skb(skb); return 0; } /* * Send an AX.25 frame via an ethernet interface */ static netdev_tx_t bpq_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned char *ptr; struct bpqdev *bpq; struct net_device *orig_dev; int size; if (skb->protocol == htons(ETH_P_IP)) return ax25_ip_xmit(skb); /* * Just to be *really* sure not to send anything if the interface * is down, the ethernet device may have gone. */ if (!netif_running(dev)) { kfree_skb(skb); return NETDEV_TX_OK; } skb_pull(skb, 1); /* Drop KISS byte */ size = skb->len; /* * We're about to mess with the skb which may still shared with the * generic networking code so unshare and ensure it's got enough * space for the BPQ headers. */ if (skb_cow(skb, AX25_BPQ_HEADER_LEN)) { if (net_ratelimit()) pr_err("bpqether: out of memory\n"); kfree_skb(skb); return NETDEV_TX_OK; } ptr = skb_push(skb, 2); /* Make space for length */ *ptr++ = (size + 5) % 256; *ptr++ = (size + 5) / 256; bpq = netdev_priv(dev); orig_dev = dev; if ((dev = bpq_get_ether_dev(dev)) == NULL) { orig_dev->stats.tx_dropped++; kfree_skb(skb); return NETDEV_TX_OK; } skb->protocol = ax25_type_trans(skb, dev); skb_reset_network_header(skb); dev_hard_header(skb, dev, ETH_P_BPQ, bpq->dest_addr, NULL, 0); dev->stats.tx_packets++; dev->stats.tx_bytes+=skb->len; dev_queue_xmit(skb); netif_wake_queue(dev); return NETDEV_TX_OK; } /* * Set AX.25 callsign */ static int bpq_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = (struct sockaddr *)addr; dev_addr_set(dev, sa->sa_data); return 0; } /* Ioctl commands * * SIOCSBPQETHOPT reserved for enhancements * SIOCSBPQETHADDR set the destination and accepted * source ethernet address (broadcast * or multicast: accept all) */ static int bpq_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { struct bpq_ethaddr __user *ethaddr = data; struct bpqdev *bpq = netdev_priv(dev); struct bpq_req req; if (!capable(CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case SIOCSBPQETHOPT: if (copy_from_user(&req, data, sizeof(struct bpq_req))) return -EFAULT; switch (req.cmd) { case SIOCGBPQETHPARAM: case SIOCSBPQETHPARAM: default: return -EINVAL; } break; case SIOCSBPQETHADDR: if (copy_from_user(bpq->dest_addr, ethaddr->destination, ETH_ALEN)) return -EFAULT; if (copy_from_user(bpq->acpt_addr, ethaddr->accept, ETH_ALEN)) return -EFAULT; break; default: return -EINVAL; } return 0; } /* * open/close a device */ static int bpq_open(struct net_device *dev) { netif_start_queue(dev); return 0; } static int bpq_close(struct net_device *dev) { netif_stop_queue(dev); return 0; } /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS /* * Proc filesystem */ static void *bpq_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { int i = 1; struct bpqdev *bpqdev; rcu_read_lock(); if (*pos == 0) return SEQ_START_TOKEN; list_for_each_entry_rcu(bpqdev, &bpq_devices, bpq_list) { if (i == *pos) return bpqdev; } return NULL; } static void *bpq_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct list_head *p; struct bpqdev *bpqdev = v; ++*pos; if (v == SEQ_START_TOKEN) p = rcu_dereference(list_next_rcu(&bpq_devices)); else p = rcu_dereference(list_next_rcu(&bpqdev->bpq_list)); return (p == &bpq_devices) ? NULL : list_entry(p, struct bpqdev, bpq_list); } static void bpq_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int bpq_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "dev ether destination accept from\n"); else { const struct bpqdev *bpqdev = v; seq_printf(seq, "%-5s %-10s %pM ", bpqdev->axdev->name, bpqdev->ethdev->name, bpqdev->dest_addr); if (is_multicast_ether_addr(bpqdev->acpt_addr)) seq_printf(seq, "*\n"); else seq_printf(seq, "%pM\n", bpqdev->acpt_addr); } return 0; } static const struct seq_operations bpq_seqops = { .start = bpq_seq_start, .next = bpq_seq_next, .stop = bpq_seq_stop, .show = bpq_seq_show, }; #endif /* ------------------------------------------------------------------------ */ static const struct net_device_ops bpq_netdev_ops = { .ndo_open = bpq_open, .ndo_stop = bpq_close, .ndo_start_xmit = bpq_xmit, .ndo_set_mac_address = bpq_set_mac_address, .ndo_siocdevprivate = bpq_siocdevprivate, }; static void bpq_setup(struct net_device *dev) { netdev_lockdep_set_classes(dev); dev->netdev_ops = &bpq_netdev_ops; dev->needs_free_netdev = true; dev->flags = 0; dev->lltx = true; /* Allow recursion */ #if IS_ENABLED(CONFIG_AX25) dev->header_ops = &ax25_header_ops; #endif dev->type = ARPHRD_AX25; dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN; dev->mtu = AX25_DEF_PACLEN; dev->addr_len = AX25_ADDR_LEN; memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); dev_addr_set(dev, (u8 *)&ax25_defaddr); } /* * Setup a new device. */ static int bpq_new_device(struct net_device *edev) { int err; struct net_device *ndev; struct bpqdev *bpq; ndev = alloc_netdev(sizeof(struct bpqdev), "bpq%d", NET_NAME_UNKNOWN, bpq_setup); if (!ndev) return -ENOMEM; bpq = netdev_priv(ndev); dev_hold(edev); bpq->ethdev = edev; bpq->axdev = ndev; eth_broadcast_addr(bpq->dest_addr); eth_broadcast_addr(bpq->acpt_addr); err = register_netdevice(ndev); if (err) goto error; /* List protected by RTNL */ list_add_rcu(&bpq->bpq_list, &bpq_devices); return 0; error: dev_put(edev); free_netdev(ndev); return err; } static void bpq_free_device(struct net_device *ndev) { struct bpqdev *bpq = netdev_priv(ndev); dev_put(bpq->ethdev); list_del_rcu(&bpq->bpq_list); unregister_netdevice(ndev); } /* * Handle device status changes. */ static int bpq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (!dev_is_ethdev(dev) && !bpq_get_ax25_dev(dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UP: /* new ethernet device -> new BPQ interface */ if (bpq_get_ax25_dev(dev) == NULL) bpq_new_device(dev); break; case NETDEV_DOWN: /* ethernet device closed -> close BPQ interface */ if ((dev = bpq_get_ax25_dev(dev)) != NULL) dev_close(dev); break; case NETDEV_UNREGISTER: /* ethernet device removed -> free BPQ interface */ if ((dev = bpq_get_ax25_dev(dev)) != NULL) bpq_free_device(dev); break; default: break; } return NOTIFY_DONE; } /* ------------------------------------------------------------------------ */ /* * Initialize driver. To be called from af_ax25 if not compiled as a * module */ static int __init bpq_init_driver(void) { #ifdef CONFIG_PROC_FS if (!proc_create_seq("bpqether", 0444, init_net.proc_net, &bpq_seqops)) { printk(KERN_ERR "bpq: cannot create /proc/net/bpqether entry.\n"); return -ENOENT; } #endif /* CONFIG_PROC_FS */ dev_add_pack(&bpq_packet_type); register_netdevice_notifier(&bpq_dev_notifier); printk(banner); return 0; } static void __exit bpq_cleanup_driver(void) { struct bpqdev *bpq; dev_remove_pack(&bpq_packet_type); unregister_netdevice_notifier(&bpq_dev_notifier); remove_proc_entry("bpqether", init_net.proc_net); rtnl_lock(); while (!list_empty(&bpq_devices)) { bpq = list_entry(bpq_devices.next, struct bpqdev, bpq_list); bpq_free_device(bpq->axdev); } rtnl_unlock(); } MODULE_AUTHOR("Joerg Reuter DL1BKE <jreuter@yaina.de>"); MODULE_DESCRIPTION("Transmit and receive AX.25 packets over Ethernet"); MODULE_LICENSE("GPL"); module_init(bpq_init_driver); module_exit(bpq_cleanup_driver);
2 1 1 4 3 1 7 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 // SPDX-License-Identifier: GPL-2.0 /* * Zoned block device handling * * Copyright (c) 2015, Hannes Reinecke * Copyright (c) 2015, SUSE Linux GmbH * * Copyright (c) 2016, Damien Le Moal * Copyright (c) 2016, Western Digital * Copyright (c) 2024, Western Digital Corporation or its affiliates. */ #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/blk-mq.h> #include <linux/spinlock.h> #include <linux/refcount.h> #include <linux/mempool.h> #include <trace/events/block.h> #include "blk.h" #include "blk-mq-sched.h" #include "blk-mq-debugfs.h" #define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name static const char *const zone_cond_name[] = { ZONE_COND_NAME(NOT_WP), ZONE_COND_NAME(EMPTY), ZONE_COND_NAME(IMP_OPEN), ZONE_COND_NAME(EXP_OPEN), ZONE_COND_NAME(CLOSED), ZONE_COND_NAME(READONLY), ZONE_COND_NAME(FULL), ZONE_COND_NAME(OFFLINE), }; #undef ZONE_COND_NAME /* * Per-zone write plug. * @node: hlist_node structure for managing the plug using a hash table. * @ref: Zone write plug reference counter. A zone write plug reference is * always at least 1 when the plug is hashed in the disk plug hash table. * The reference is incremented whenever a new BIO needing plugging is * submitted and when a function needs to manipulate a plug. The * reference count is decremented whenever a plugged BIO completes and * when a function that referenced the plug returns. The initial * reference is dropped whenever the zone of the zone write plug is reset, * finished and when the zone becomes full (last write BIO to the zone * completes). * @lock: Spinlock to atomically manipulate the plug. * @flags: Flags indicating the plug state. * @zone_no: The number of the zone the plug is managing. * @wp_offset: The zone write pointer location relative to the start of the zone * as a number of 512B sectors. * @bio_list: The list of BIOs that are currently plugged. * @bio_work: Work struct to handle issuing of plugged BIOs * @rcu_head: RCU head to free zone write plugs with an RCU grace period. * @disk: The gendisk the plug belongs to. */ struct blk_zone_wplug { struct hlist_node node; refcount_t ref; spinlock_t lock; unsigned int flags; unsigned int zone_no; unsigned int wp_offset; struct bio_list bio_list; struct work_struct bio_work; struct rcu_head rcu_head; struct gendisk *disk; }; /* * Zone write plug flags bits: * - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged, * that is, that write BIOs are being throttled due to a write BIO already * being executed or the zone write plug bio list is not empty. * - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone * write pointer offset and need to update it. * - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed * from the disk hash table and that the initial reference to the zone * write plug set when the plug was first added to the hash table has been * dropped. This flag is set when a zone is reset, finished or become full, * to prevent new references to the zone write plug to be taken for * newly incoming BIOs. A zone write plug flagged with this flag will be * freed once all remaining references from BIOs or functions are dropped. */ #define BLK_ZONE_WPLUG_PLUGGED (1U << 0) #define BLK_ZONE_WPLUG_NEED_WP_UPDATE (1U << 1) #define BLK_ZONE_WPLUG_UNHASHED (1U << 2) /** * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX. * @zone_cond: BLK_ZONE_COND_XXX. * * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX * into string format. Useful in the debugging and tracing zone conditions. For * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN". */ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond) { static const char *zone_cond_str = "UNKNOWN"; if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond]) zone_cond_str = zone_cond_name[zone_cond]; return zone_cond_str; } EXPORT_SYMBOL_GPL(blk_zone_cond_str); struct disk_report_zones_cb_args { struct gendisk *disk; report_zones_cb user_cb; void *user_data; }; static void disk_zone_wplug_sync_wp_offset(struct gendisk *disk, struct blk_zone *zone); static int disk_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct disk_report_zones_cb_args *args = data; struct gendisk *disk = args->disk; if (disk->zone_wplugs_hash) disk_zone_wplug_sync_wp_offset(disk, zone); if (!args->user_cb) return 0; return args->user_cb(zone, idx, args->user_data); } /** * blkdev_report_zones - Get zones information * @bdev: Target block device * @sector: Sector from which to report zones * @nr_zones: Maximum number of zones to report * @cb: Callback function called for each reported zone * @data: Private data for the callback * * Description: * Get zone information starting from the zone containing @sector for at most * @nr_zones, and call @cb for each zone reported by the device. * To report all zones in a device starting from @sector, the BLK_ALL_ZONES * constant can be passed to @nr_zones. * Returns the number of zones reported by the device, or a negative errno * value in case of failure. * * Note: The caller must use memalloc_noXX_save/restore() calls to control * memory allocations done within this function. */ int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { struct gendisk *disk = bdev->bd_disk; sector_t capacity = get_capacity(disk); struct disk_report_zones_cb_args args = { .disk = disk, .user_cb = cb, .user_data = data, }; if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones)) return -EOPNOTSUPP; if (!nr_zones || sector >= capacity) return 0; return disk->fops->report_zones(disk, sector, nr_zones, disk_report_zones_cb, &args); } EXPORT_SYMBOL_GPL(blkdev_report_zones); static int blkdev_zone_reset_all(struct block_device *bdev) { struct bio bio; bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC); trace_blkdev_zone_mgmt(&bio, 0); return submit_bio_wait(&bio); } /** * blkdev_zone_mgmt - Execute a zone management operation on a range of zones * @bdev: Target block device * @op: Operation to be performed on the zones * @sector: Start sector of the first zone to operate on * @nr_sectors: Number of sectors, should be at least the length of one zone and * must be zone size aligned. * * Description: * Perform the specified operation on the range of zones specified by * @sector..@sector+@nr_sectors. Specifying the entire disk sector range * is valid, but the specified range should not contain conventional zones. * The operation to execute on each zone can be a zone reset, open, close * or finish request. */ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, sector_t sector, sector_t nr_sectors) { sector_t zone_sectors = bdev_zone_sectors(bdev); sector_t capacity = bdev_nr_sectors(bdev); sector_t end_sector = sector + nr_sectors; struct bio *bio = NULL; int ret = 0; if (!bdev_is_zoned(bdev)) return -EOPNOTSUPP; if (bdev_read_only(bdev)) return -EPERM; if (!op_is_zone_mgmt(op)) return -EOPNOTSUPP; if (end_sector <= sector || end_sector > capacity) /* Out of range */ return -EINVAL; /* Check alignment (handle eventual smaller last zone) */ if (!bdev_is_zone_start(bdev, sector)) return -EINVAL; if (!bdev_is_zone_start(bdev, nr_sectors) && end_sector != capacity) return -EINVAL; /* * In the case of a zone reset operation over all zones, use * REQ_OP_ZONE_RESET_ALL. */ if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) return blkdev_zone_reset_all(bdev); while (sector < end_sector) { bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL); bio->bi_iter.bi_sector = sector; sector += zone_sectors; /* This may take a while, so be nice to others */ cond_resched(); } trace_blkdev_zone_mgmt(bio, nr_sectors); ret = submit_bio_wait(bio); bio_put(bio); return ret; } EXPORT_SYMBOL_GPL(blkdev_zone_mgmt); struct zone_report_args { struct blk_zone __user *zones; }; static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx, void *data) { struct zone_report_args *args = data; if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone))) return -EFAULT; return 0; } /* * BLKREPORTZONE ioctl processing. * Called from blkdev_ioctl. */ int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct zone_report_args args; struct blk_zone_report rep; int ret; if (!argp) return -EINVAL; if (!bdev_is_zoned(bdev)) return -ENOTTY; if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) return -EFAULT; if (!rep.nr_zones) return -EINVAL; args.zones = argp + sizeof(struct blk_zone_report); ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones, blkdev_copy_zone_to_user, &args); if (ret < 0) return ret; rep.nr_zones = ret; rep.flags = BLK_ZONE_REP_CAPACITY; if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) return -EFAULT; return 0; } static int blkdev_truncate_zone_range(struct block_device *bdev, blk_mode_t mode, const struct blk_zone_range *zrange) { loff_t start, end; if (zrange->sector + zrange->nr_sectors <= zrange->sector || zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk)) /* Out of range */ return -EINVAL; start = zrange->sector << SECTOR_SHIFT; end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1; return truncate_bdev_range(bdev, mode, start, end); } /* * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. * Called from blkdev_ioctl. */ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct blk_zone_range zrange; enum req_op op; int ret; if (!argp) return -EINVAL; if (!bdev_is_zoned(bdev)) return -ENOTTY; if (!(mode & BLK_OPEN_WRITE)) return -EBADF; if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) return -EFAULT; switch (cmd) { case BLKRESETZONE: op = REQ_OP_ZONE_RESET; /* Invalidate the page cache, including dirty pages. */ inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); ret = blkdev_truncate_zone_range(bdev, mode, &zrange); if (ret) goto fail; break; case BLKOPENZONE: op = REQ_OP_ZONE_OPEN; break; case BLKCLOSEZONE: op = REQ_OP_ZONE_CLOSE; break; case BLKFINISHZONE: op = REQ_OP_ZONE_FINISH; break; default: return -ENOTTY; } ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors); fail: if (cmd == BLKRESETZONE) { filemap_invalidate_unlock(bdev->bd_mapping); inode_unlock(bdev->bd_mapping->host); } return ret; } static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone) { return zone->start + zone->len >= get_capacity(disk); } static bool disk_zone_is_full(struct gendisk *disk, unsigned int zno, unsigned int offset_in_zone) { if (zno < disk->nr_zones - 1) return offset_in_zone >= disk->zone_capacity; return offset_in_zone >= disk->last_zone_capacity; } static bool disk_zone_wplug_is_full(struct gendisk *disk, struct blk_zone_wplug *zwplug) { return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset); } static bool disk_insert_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { struct blk_zone_wplug *zwplg; unsigned long flags; unsigned int idx = hash_32(zwplug->zone_no, disk->zone_wplugs_hash_bits); /* * Add the new zone write plug to the hash table, but carefully as we * are racing with other submission context, so we may already have a * zone write plug for the same zone. */ spin_lock_irqsave(&disk->zone_wplugs_lock, flags); hlist_for_each_entry_rcu(zwplg, &disk->zone_wplugs_hash[idx], node) { if (zwplg->zone_no == zwplug->zone_no) { spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); return false; } } hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]); atomic_inc(&disk->nr_zone_wplugs); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); return true; } static struct blk_zone_wplug *disk_get_hashed_zone_wplug(struct gendisk *disk, sector_t sector) { unsigned int zno = disk_zone_no(disk, sector); unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits); struct blk_zone_wplug *zwplug; rcu_read_lock(); hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[idx], node) { if (zwplug->zone_no == zno && refcount_inc_not_zero(&zwplug->ref)) { rcu_read_unlock(); return zwplug; } } rcu_read_unlock(); return NULL; } static inline struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, sector_t sector) { if (!atomic_read(&disk->nr_zone_wplugs)) return NULL; return disk_get_hashed_zone_wplug(disk, sector); } static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head) { struct blk_zone_wplug *zwplug = container_of(rcu_head, struct blk_zone_wplug, rcu_head); mempool_free(zwplug, zwplug->disk->zone_wplugs_pool); } static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug) { if (refcount_dec_and_test(&zwplug->ref)) { WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list)); WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED); WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)); call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu); } } static inline bool disk_should_remove_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { lockdep_assert_held(&zwplug->lock); /* If the zone write plug was already removed, we are done. */ if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) return false; /* If the zone write plug is still plugged, it cannot be removed. */ if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED) return false; /* * Completions of BIOs with blk_zone_write_plug_bio_endio() may * happen after handling a request completion with * blk_zone_write_plug_finish_request() (e.g. with split BIOs * that are chained). In such case, disk_zone_wplug_unplug_bio() * should not attempt to remove the zone write plug until all BIO * completions are seen. Check by looking at the zone write plug * reference count, which is 2 when the plug is unused (one reference * taken when the plug was allocated and another reference taken by the * caller context). */ if (refcount_read(&zwplug->ref) > 2) return false; /* We can remove zone write plugs for zones that are empty or full. */ return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug); } static void disk_remove_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { unsigned long flags; /* If the zone write plug was already removed, we have nothing to do. */ if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) return; /* * Mark the zone write plug as unhashed and drop the extra reference we * took when the plug was inserted in the hash table. */ zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED; spin_lock_irqsave(&disk->zone_wplugs_lock, flags); hlist_del_init_rcu(&zwplug->node); atomic_dec(&disk->nr_zone_wplugs); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); disk_put_zone_wplug(zwplug); } static void blk_zone_wplug_bio_work(struct work_struct *work); /* * Get a reference on the write plug for the zone containing @sector. * If the plug does not exist, it is allocated and hashed. * Return a pointer to the zone write plug with the plug spinlock held. */ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk, sector_t sector, gfp_t gfp_mask, unsigned long *flags) { unsigned int zno = disk_zone_no(disk, sector); struct blk_zone_wplug *zwplug; again: zwplug = disk_get_zone_wplug(disk, sector); if (zwplug) { /* * Check that a BIO completion or a zone reset or finish * operation has not already removed the zone write plug from * the hash table and dropped its reference count. In such case, * we need to get a new plug so start over from the beginning. */ spin_lock_irqsave(&zwplug->lock, *flags); if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) { spin_unlock_irqrestore(&zwplug->lock, *flags); disk_put_zone_wplug(zwplug); goto again; } return zwplug; } /* * Allocate and initialize a zone write plug with an extra reference * so that it is not freed when the zone write plug becomes idle without * the zone being full. */ zwplug = mempool_alloc(disk->zone_wplugs_pool, gfp_mask); if (!zwplug) return NULL; INIT_HLIST_NODE(&zwplug->node); refcount_set(&zwplug->ref, 2); spin_lock_init(&zwplug->lock); zwplug->flags = 0; zwplug->zone_no = zno; zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector); bio_list_init(&zwplug->bio_list); INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work); zwplug->disk = disk; spin_lock_irqsave(&zwplug->lock, *flags); /* * Insert the new zone write plug in the hash table. This can fail only * if another context already inserted a plug. Retry from the beginning * in such case. */ if (!disk_insert_zone_wplug(disk, zwplug)) { spin_unlock_irqrestore(&zwplug->lock, *flags); mempool_free(zwplug, disk->zone_wplugs_pool); goto again; } return zwplug; } static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug, struct bio *bio) { struct request_queue *q = zwplug->disk->queue; bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING); bio_io_error(bio); disk_put_zone_wplug(zwplug); /* Drop the reference taken by disk_zone_wplug_add_bio(() */ blk_queue_exit(q); } /* * Abort (fail) all plugged BIOs of a zone write plug. */ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) { struct bio *bio; if (bio_list_empty(&zwplug->bio_list)) return; pr_warn_ratelimited("%s: zone %u: Aborting plugged BIOs\n", zwplug->disk->disk_name, zwplug->zone_no); while ((bio = bio_list_pop(&zwplug->bio_list))) blk_zone_wplug_bio_io_error(zwplug, bio); } /* * Set a zone write plug write pointer offset to the specified value. * This aborts all plugged BIOs, which is fine as this function is called for * a zone reset operation, a zone finish operation or if the zone needs a wp * update from a report zone after a write error. */ static void disk_zone_wplug_set_wp_offset(struct gendisk *disk, struct blk_zone_wplug *zwplug, unsigned int wp_offset) { lockdep_assert_held(&zwplug->lock); /* Update the zone write pointer and abort all plugged BIOs. */ zwplug->flags &= ~BLK_ZONE_WPLUG_NEED_WP_UPDATE; zwplug->wp_offset = wp_offset; disk_zone_wplug_abort(zwplug); /* * The zone write plug now has no BIO plugged: remove it from the * hash table so that it cannot be seen. The plug will be freed * when the last reference is dropped. */ if (disk_should_remove_zone_wplug(disk, zwplug)) disk_remove_zone_wplug(disk, zwplug); } static unsigned int blk_zone_wp_offset(struct blk_zone *zone) { switch (zone->cond) { case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_EXP_OPEN: case BLK_ZONE_COND_CLOSED: return zone->wp - zone->start; case BLK_ZONE_COND_FULL: return zone->len; case BLK_ZONE_COND_EMPTY: return 0; case BLK_ZONE_COND_NOT_WP: case BLK_ZONE_COND_OFFLINE: case BLK_ZONE_COND_READONLY: default: /* * Conventional, offline and read-only zones do not have a valid * write pointer. */ return UINT_MAX; } } static void disk_zone_wplug_sync_wp_offset(struct gendisk *disk, struct blk_zone *zone) { struct blk_zone_wplug *zwplug; unsigned long flags; zwplug = disk_get_zone_wplug(disk, zone->start); if (!zwplug) return; spin_lock_irqsave(&zwplug->lock, flags); if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) disk_zone_wplug_set_wp_offset(disk, zwplug, blk_zone_wp_offset(zone)); spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); } static int disk_zone_sync_wp_offset(struct gendisk *disk, sector_t sector) { struct disk_report_zones_cb_args args = { .disk = disk, }; return disk->fops->report_zones(disk, sector, 1, disk_report_zones_cb, &args); } static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio, unsigned int wp_offset) { struct gendisk *disk = bio->bi_bdev->bd_disk; sector_t sector = bio->bi_iter.bi_sector; struct blk_zone_wplug *zwplug; unsigned long flags; /* Conventional zones cannot be reset nor finished. */ if (!bdev_zone_is_seq(bio->bi_bdev, sector)) { bio_io_error(bio); return true; } /* * No-wait reset or finish BIOs do not make much sense as the callers * issue these as blocking operations in most cases. To avoid issues * the BIO execution potentially failing with BLK_STS_AGAIN, warn about * REQ_NOWAIT being set and ignore that flag. */ if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) bio->bi_opf &= ~REQ_NOWAIT; /* * If we have a zone write plug, set its write pointer offset to 0 * (reset case) or to the zone size (finish case). This will abort all * BIOs plugged for the target zone. It is fine as resetting or * finishing zones while writes are still in-flight will result in the * writes failing anyway. */ zwplug = disk_get_zone_wplug(disk, sector); if (zwplug) { spin_lock_irqsave(&zwplug->lock, flags); disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset); spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); } return false; } static bool blk_zone_wplug_handle_reset_all(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; struct blk_zone_wplug *zwplug; unsigned long flags; sector_t sector; /* * Set the write pointer offset of all zone write plugs to 0. This will * abort all plugged BIOs. It is fine as resetting zones while writes * are still in-flight will result in the writes failing anyway. */ for (sector = 0; sector < get_capacity(disk); sector += disk->queue->limits.chunk_sectors) { zwplug = disk_get_zone_wplug(disk, sector); if (zwplug) { spin_lock_irqsave(&zwplug->lock, flags); disk_zone_wplug_set_wp_offset(disk, zwplug, 0); spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); } } return false; } static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk, struct blk_zone_wplug *zwplug) { /* * Take a reference on the zone write plug and schedule the submission * of the next plugged BIO. blk_zone_wplug_bio_work() will release the * reference we take here. */ WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)); refcount_inc(&zwplug->ref); queue_work(disk->zone_wplugs_wq, &zwplug->bio_work); } static inline void disk_zone_wplug_add_bio(struct gendisk *disk, struct blk_zone_wplug *zwplug, struct bio *bio, unsigned int nr_segs) { bool schedule_bio_work = false; /* * Grab an extra reference on the BIO request queue usage counter. * This reference will be reused to submit a request for the BIO for * blk-mq devices and dropped when the BIO is failed and after * it is issued in the case of BIO-based devices. */ percpu_ref_get(&bio->bi_bdev->bd_disk->queue->q_usage_counter); /* * The BIO is being plugged and thus will have to wait for the on-going * write and for all other writes already plugged. So polling makes * no sense. */ bio_clear_polled(bio); /* * REQ_NOWAIT BIOs are always handled using the zone write plug BIO * work, which can block. So clear the REQ_NOWAIT flag and schedule the * work if this is the first BIO we are plugging. */ if (bio->bi_opf & REQ_NOWAIT) { schedule_bio_work = !(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED); bio->bi_opf &= ~REQ_NOWAIT; } /* * Reuse the poll cookie field to store the number of segments when * split to the hardware limits. */ bio->__bi_nr_segments = nr_segs; /* * We always receive BIOs after they are split and ready to be issued. * The block layer passes the parts of a split BIO in order, and the * user must also issue write sequentially. So simply add the new BIO * at the tail of the list to preserve the sequential write order. */ bio_list_add(&zwplug->bio_list, bio); trace_disk_zone_wplug_add_bio(zwplug->disk->queue, zwplug->zone_no, bio->bi_iter.bi_sector, bio_sectors(bio)); zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED; if (schedule_bio_work) disk_zone_wplug_schedule_bio_work(disk, zwplug); } /* * Called from bio_attempt_back_merge() when a BIO was merged with a request. */ void blk_zone_write_plug_bio_merged(struct bio *bio) { struct blk_zone_wplug *zwplug; unsigned long flags; /* * If the BIO was already plugged, then we were called through * blk_zone_write_plug_init_request() -> blk_attempt_bio_merge(). * For this case, we already hold a reference on the zone write plug for * the BIO and blk_zone_write_plug_init_request() will handle the * zone write pointer offset update. */ if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING)) return; bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING); /* * Get a reference on the zone write plug of the target zone and advance * the zone write pointer offset. Given that this is a merge, we already * have at least one request and one BIO referencing the zone write * plug. So this should not fail. */ zwplug = disk_get_zone_wplug(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); if (WARN_ON_ONCE(!zwplug)) return; spin_lock_irqsave(&zwplug->lock, flags); zwplug->wp_offset += bio_sectors(bio); spin_unlock_irqrestore(&zwplug->lock, flags); } /* * Attempt to merge plugged BIOs with a newly prepared request for a BIO that * already went through zone write plugging (either a new BIO or one that was * unplugged). */ void blk_zone_write_plug_init_request(struct request *req) { sector_t req_back_sector = blk_rq_pos(req) + blk_rq_sectors(req); struct request_queue *q = req->q; struct gendisk *disk = q->disk; struct blk_zone_wplug *zwplug = disk_get_zone_wplug(disk, blk_rq_pos(req)); unsigned long flags; struct bio *bio; if (WARN_ON_ONCE(!zwplug)) return; /* * Indicate that completion of this request needs to be handled with * blk_zone_write_plug_finish_request(), which will drop the reference * on the zone write plug we took above on entry to this function. */ req->rq_flags |= RQF_ZONE_WRITE_PLUGGING; if (blk_queue_nomerges(q)) return; /* * Walk through the list of plugged BIOs to check if they can be merged * into the back of the request. */ spin_lock_irqsave(&zwplug->lock, flags); while (!disk_zone_wplug_is_full(disk, zwplug)) { bio = bio_list_peek(&zwplug->bio_list); if (!bio) break; if (bio->bi_iter.bi_sector != req_back_sector || !blk_rq_merge_ok(req, bio)) break; WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE_ZEROES && !bio->__bi_nr_segments); bio_list_pop(&zwplug->bio_list); if (bio_attempt_back_merge(req, bio, bio->__bi_nr_segments) != BIO_MERGE_OK) { bio_list_add_head(&zwplug->bio_list, bio); break; } /* Drop the reference taken by disk_zone_wplug_add_bio(). */ blk_queue_exit(q); zwplug->wp_offset += bio_sectors(bio); req_back_sector += bio_sectors(bio); } spin_unlock_irqrestore(&zwplug->lock, flags); } /* * Check and prepare a BIO for submission by incrementing the write pointer * offset of its zone write plug and changing zone append operations into * regular write when zone append emulation is needed. */ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; lockdep_assert_held(&zwplug->lock); /* * If we lost track of the zone write pointer due to a write error, * the user must either execute a report zones, reset the zone or finish * the to recover a reliable write pointer position. Fail BIOs if the * user did not do that as we cannot handle emulated zone append * otherwise. */ if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) return false; /* * Check that the user is not attempting to write to a full zone. * We know such BIO will fail, and that would potentially overflow our * write pointer offset beyond the end of the zone. */ if (disk_zone_wplug_is_full(disk, zwplug)) return false; if (bio_op(bio) == REQ_OP_ZONE_APPEND) { /* * Use a regular write starting at the current write pointer. * Similarly to native zone append operations, do not allow * merging. */ bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= REQ_OP_WRITE | REQ_NOMERGE; bio->bi_iter.bi_sector += zwplug->wp_offset; /* * Remember that this BIO is in fact a zone append operation * so that we can restore its operation code on completion. */ bio_set_flag(bio, BIO_EMULATES_ZONE_APPEND); } else { /* * Check for non-sequential writes early as we know that BIOs * with a start sector not unaligned to the zone write pointer * will fail. */ if (bio_offset_from_zone_start(bio) != zwplug->wp_offset) return false; } /* Advance the zone write pointer offset. */ zwplug->wp_offset += bio_sectors(bio); return true; } static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) { struct gendisk *disk = bio->bi_bdev->bd_disk; sector_t sector = bio->bi_iter.bi_sector; struct blk_zone_wplug *zwplug; gfp_t gfp_mask = GFP_NOIO; unsigned long flags; /* * BIOs must be fully contained within a zone so that we use the correct * zone write plug for the entire BIO. For blk-mq devices, the block * layer should already have done any splitting required to ensure this * and this BIO should thus not be straddling zone boundaries. For * BIO-based devices, it is the responsibility of the driver to split * the bio before submitting it. */ if (WARN_ON_ONCE(bio_straddles_zones(bio))) { bio_io_error(bio); return true; } /* Conventional zones do not need write plugging. */ if (!bdev_zone_is_seq(bio->bi_bdev, sector)) { /* Zone append to conventional zones is not allowed. */ if (bio_op(bio) == REQ_OP_ZONE_APPEND) { bio_io_error(bio); return true; } return false; } if (bio->bi_opf & REQ_NOWAIT) gfp_mask = GFP_NOWAIT; zwplug = disk_get_and_lock_zone_wplug(disk, sector, gfp_mask, &flags); if (!zwplug) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); else bio_io_error(bio); return true; } /* Indicate that this BIO is being handled using zone write plugging. */ bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING); /* * If the zone is already plugged, add the BIO to the plug BIO list. * Do the same for REQ_NOWAIT BIOs to ensure that we will not see a * BLK_STS_AGAIN failure if we let the BIO execute. * Otherwise, plug and let the BIO execute. */ if ((zwplug->flags & BLK_ZONE_WPLUG_PLUGGED) || (bio->bi_opf & REQ_NOWAIT)) goto plug; if (!blk_zone_wplug_prepare_bio(zwplug, bio)) { spin_unlock_irqrestore(&zwplug->lock, flags); bio_io_error(bio); return true; } zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED; spin_unlock_irqrestore(&zwplug->lock, flags); return false; plug: disk_zone_wplug_add_bio(disk, zwplug, bio, nr_segs); spin_unlock_irqrestore(&zwplug->lock, flags); return true; } static void blk_zone_wplug_handle_native_zone_append(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; struct blk_zone_wplug *zwplug; unsigned long flags; /* * We have native support for zone append operations, so we are not * going to handle @bio through plugging. However, we may already have a * zone write plug for the target zone if that zone was previously * partially written using regular writes. In such case, we risk leaving * the plug in the disk hash table if the zone is fully written using * zone append operations. Avoid this by removing the zone write plug. */ zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); if (likely(!zwplug)) return; spin_lock_irqsave(&zwplug->lock, flags); /* * We are about to remove the zone write plug. But if the user * (mistakenly) has issued regular writes together with native zone * append, we must aborts the writes as otherwise the plugged BIOs would * not be executed by the plug BIO work as disk_get_zone_wplug() will * return NULL after the plug is removed. Aborting the plugged write * BIOs is consistent with the fact that these writes will most likely * fail anyway as there is no ordering guarantees between zone append * operations and regular write operations. */ if (!bio_list_empty(&zwplug->bio_list)) { pr_warn_ratelimited("%s: zone %u: Invalid mix of zone append and regular writes\n", disk->disk_name, zwplug->zone_no); disk_zone_wplug_abort(zwplug); } disk_remove_zone_wplug(disk, zwplug); spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); } /** * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging * @bio: The BIO being submitted * @nr_segs: The number of physical segments of @bio * * Handle write, write zeroes and zone append operations requiring emulation * using zone write plugging. * * Return true whenever @bio execution needs to be delayed through the zone * write plug. Otherwise, return false to let the submission path process * @bio normally. */ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) { struct block_device *bdev = bio->bi_bdev; if (WARN_ON_ONCE(!bdev->bd_disk->zone_wplugs_hash)) return false; /* * Regular writes and write zeroes need to be handled through the target * zone write plug. This includes writes with REQ_FUA | REQ_PREFLUSH * which may need to go through the flush machinery depending on the * target device capabilities. Plugging such writes is fine as the flush * machinery operates at the request level, below the plug, and * completion of the flush sequence will go through the regular BIO * completion, which will handle zone write plugging. * Zone append operations for devices that requested emulation must * also be plugged so that these BIOs can be changed into regular * write BIOs. * Zone reset, reset all and finish commands need special treatment * to correctly track the write pointer offset of zones. These commands * are not plugged as we do not need serialization with write * operations. It is the responsibility of the user to not issue reset * and finish commands when write operations are in flight. */ switch (bio_op(bio)) { case REQ_OP_ZONE_APPEND: if (!bdev_emulates_zone_append(bdev)) { blk_zone_wplug_handle_native_zone_append(bio); return false; } fallthrough; case REQ_OP_WRITE: case REQ_OP_WRITE_ZEROES: return blk_zone_wplug_handle_write(bio, nr_segs); case REQ_OP_ZONE_RESET: return blk_zone_wplug_handle_reset_or_finish(bio, 0); case REQ_OP_ZONE_FINISH: return blk_zone_wplug_handle_reset_or_finish(bio, bdev_zone_sectors(bdev)); case REQ_OP_ZONE_RESET_ALL: return blk_zone_wplug_handle_reset_all(bio); default: return false; } return false; } EXPORT_SYMBOL_GPL(blk_zone_plug_bio); static void disk_zone_wplug_unplug_bio(struct gendisk *disk, struct blk_zone_wplug *zwplug) { unsigned long flags; spin_lock_irqsave(&zwplug->lock, flags); /* Schedule submission of the next plugged BIO if we have one. */ if (!bio_list_empty(&zwplug->bio_list)) { disk_zone_wplug_schedule_bio_work(disk, zwplug); spin_unlock_irqrestore(&zwplug->lock, flags); return; } zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; /* * If the zone is full (it was fully written or finished, or empty * (it was reset), remove its zone write plug from the hash table. */ if (disk_should_remove_zone_wplug(disk, zwplug)) disk_remove_zone_wplug(disk, zwplug); spin_unlock_irqrestore(&zwplug->lock, flags); } void blk_zone_append_update_request_bio(struct request *rq, struct bio *bio) { /* * For zone append requests, the request sector indicates the location * at which the BIO data was written. Return this value to the BIO * issuer through the BIO iter sector. * For plugged zone writes, which include emulated zone append, we need * the original BIO sector so that blk_zone_write_plug_bio_endio() can * lookup the zone write plug. */ bio->bi_iter.bi_sector = rq->__sector; trace_blk_zone_append_update_request_bio(rq); } void blk_zone_write_plug_bio_endio(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; struct blk_zone_wplug *zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); unsigned long flags; if (WARN_ON_ONCE(!zwplug)) return; /* Make sure we do not see this BIO again by clearing the plug flag. */ bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING); /* * If this is a regular write emulating a zone append operation, * restore the original operation code. */ if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) { bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= REQ_OP_ZONE_APPEND; bio_clear_flag(bio, BIO_EMULATES_ZONE_APPEND); } /* * If the BIO failed, abort all plugged BIOs and mark the plug as * needing a write pointer update. */ if (bio->bi_status != BLK_STS_OK) { spin_lock_irqsave(&zwplug->lock, flags); disk_zone_wplug_abort(zwplug); zwplug->flags |= BLK_ZONE_WPLUG_NEED_WP_UPDATE; spin_unlock_irqrestore(&zwplug->lock, flags); } /* Drop the reference we took when the BIO was issued. */ disk_put_zone_wplug(zwplug); /* * For BIO-based devices, blk_zone_write_plug_finish_request() * is not called. So we need to schedule execution of the next * plugged BIO here. */ if (bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) disk_zone_wplug_unplug_bio(disk, zwplug); /* Drop the reference we took when entering this function. */ disk_put_zone_wplug(zwplug); } void blk_zone_write_plug_finish_request(struct request *req) { struct gendisk *disk = req->q->disk; struct blk_zone_wplug *zwplug; zwplug = disk_get_zone_wplug(disk, req->__sector); if (WARN_ON_ONCE(!zwplug)) return; req->rq_flags &= ~RQF_ZONE_WRITE_PLUGGING; /* * Drop the reference we took when the request was initialized in * blk_zone_write_plug_init_request(). */ disk_put_zone_wplug(zwplug); disk_zone_wplug_unplug_bio(disk, zwplug); /* Drop the reference we took when entering this function. */ disk_put_zone_wplug(zwplug); } static void blk_zone_wplug_bio_work(struct work_struct *work) { struct blk_zone_wplug *zwplug = container_of(work, struct blk_zone_wplug, bio_work); struct block_device *bdev; unsigned long flags; struct bio *bio; bool prepared; /* * Submit the next plugged BIO. If we do not have any, clear * the plugged flag. */ again: spin_lock_irqsave(&zwplug->lock, flags); bio = bio_list_pop(&zwplug->bio_list); if (!bio) { zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; spin_unlock_irqrestore(&zwplug->lock, flags); goto put_zwplug; } trace_blk_zone_wplug_bio(zwplug->disk->queue, zwplug->zone_no, bio->bi_iter.bi_sector, bio_sectors(bio)); prepared = blk_zone_wplug_prepare_bio(zwplug, bio); spin_unlock_irqrestore(&zwplug->lock, flags); if (!prepared) { blk_zone_wplug_bio_io_error(zwplug, bio); goto again; } bdev = bio->bi_bdev; /* * blk-mq devices will reuse the extra reference on the request queue * usage counter we took when the BIO was plugged, but the submission * path for BIO-based devices will not do that. So drop this extra * reference here. */ if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) { bdev->bd_disk->fops->submit_bio(bio); blk_queue_exit(bdev->bd_disk->queue); } else { blk_mq_submit_bio(bio); } put_zwplug: /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */ disk_put_zone_wplug(zwplug); } static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk) { return 1U << disk->zone_wplugs_hash_bits; } void disk_init_zone_resources(struct gendisk *disk) { spin_lock_init(&disk->zone_wplugs_lock); } /* * For the size of a disk zone write plug hash table, use the size of the * zone write plug mempool, which is the maximum of the disk open zones and * active zones limits. But do not exceed 4KB (512 hlist head entries), that is, * 9 bits. For a disk that has no limits, mempool size defaults to 128. */ #define BLK_ZONE_WPLUG_MAX_HASH_BITS 9 #define BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE 128 static int disk_alloc_zone_resources(struct gendisk *disk, unsigned int pool_size) { unsigned int i; atomic_set(&disk->nr_zone_wplugs, 0); disk->zone_wplugs_hash_bits = min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS); disk->zone_wplugs_hash = kcalloc(disk_zone_wplugs_hash_size(disk), sizeof(struct hlist_head), GFP_KERNEL); if (!disk->zone_wplugs_hash) return -ENOMEM; for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) INIT_HLIST_HEAD(&disk->zone_wplugs_hash[i]); disk->zone_wplugs_pool = mempool_create_kmalloc_pool(pool_size, sizeof(struct blk_zone_wplug)); if (!disk->zone_wplugs_pool) goto free_hash; disk->zone_wplugs_wq = alloc_workqueue("%s_zwplugs", WQ_MEM_RECLAIM | WQ_HIGHPRI, pool_size, disk->disk_name); if (!disk->zone_wplugs_wq) goto destroy_pool; return 0; destroy_pool: mempool_destroy(disk->zone_wplugs_pool); disk->zone_wplugs_pool = NULL; free_hash: kfree(disk->zone_wplugs_hash); disk->zone_wplugs_hash = NULL; disk->zone_wplugs_hash_bits = 0; return -ENOMEM; } static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk) { struct blk_zone_wplug *zwplug; unsigned int i; if (!disk->zone_wplugs_hash) return; /* Free all the zone write plugs we have. */ for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) { while (!hlist_empty(&disk->zone_wplugs_hash[i])) { zwplug = hlist_entry(disk->zone_wplugs_hash[i].first, struct blk_zone_wplug, node); refcount_inc(&zwplug->ref); disk_remove_zone_wplug(disk, zwplug); disk_put_zone_wplug(zwplug); } } WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs)); kfree(disk->zone_wplugs_hash); disk->zone_wplugs_hash = NULL; disk->zone_wplugs_hash_bits = 0; } static unsigned int disk_set_conv_zones_bitmap(struct gendisk *disk, unsigned long *bitmap) { unsigned int nr_conv_zones = 0; unsigned long flags; spin_lock_irqsave(&disk->zone_wplugs_lock, flags); if (bitmap) nr_conv_zones = bitmap_weight(bitmap, disk->nr_zones); bitmap = rcu_replace_pointer(disk->conv_zones_bitmap, bitmap, lockdep_is_held(&disk->zone_wplugs_lock)); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); kfree_rcu_mightsleep(bitmap); return nr_conv_zones; } void disk_free_zone_resources(struct gendisk *disk) { if (!disk->zone_wplugs_pool) return; if (disk->zone_wplugs_wq) { destroy_workqueue(disk->zone_wplugs_wq); disk->zone_wplugs_wq = NULL; } disk_destroy_zone_wplugs_hash_table(disk); /* * Wait for the zone write plugs to be RCU-freed before * destorying the mempool. */ rcu_barrier(); mempool_destroy(disk->zone_wplugs_pool); disk->zone_wplugs_pool = NULL; disk_set_conv_zones_bitmap(disk, NULL); disk->zone_capacity = 0; disk->last_zone_capacity = 0; disk->nr_zones = 0; } static inline bool disk_need_zone_resources(struct gendisk *disk) { /* * All mq zoned devices need zone resources so that the block layer * can automatically handle write BIO plugging. BIO-based device drivers * (e.g. DM devices) are normally responsible for handling zone write * ordering and do not need zone resources, unless the driver requires * zone append emulation. */ return queue_is_mq(disk->queue) || queue_emulates_zone_append(disk->queue); } static int disk_revalidate_zone_resources(struct gendisk *disk, unsigned int nr_zones) { struct queue_limits *lim = &disk->queue->limits; unsigned int pool_size; if (!disk_need_zone_resources(disk)) return 0; /* * If the device has no limit on the maximum number of open and active * zones, use BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE. */ pool_size = max(lim->max_open_zones, lim->max_active_zones); if (!pool_size) pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_zones); if (!disk->zone_wplugs_hash) return disk_alloc_zone_resources(disk, pool_size); return 0; } struct blk_revalidate_zone_args { struct gendisk *disk; unsigned long *conv_zones_bitmap; unsigned int nr_zones; unsigned int zone_capacity; unsigned int last_zone_capacity; sector_t sector; }; /* * Update the disk zone resources information and device queue limits. * The disk queue is frozen when this is executed. */ static int disk_update_zone_resources(struct gendisk *disk, struct blk_revalidate_zone_args *args) { struct request_queue *q = disk->queue; unsigned int nr_seq_zones, nr_conv_zones; unsigned int pool_size; struct queue_limits lim; disk->nr_zones = args->nr_zones; disk->zone_capacity = args->zone_capacity; disk->last_zone_capacity = args->last_zone_capacity; nr_conv_zones = disk_set_conv_zones_bitmap(disk, args->conv_zones_bitmap); if (nr_conv_zones >= disk->nr_zones) { pr_warn("%s: Invalid number of conventional zones %u / %u\n", disk->disk_name, nr_conv_zones, disk->nr_zones); return -ENODEV; } lim = queue_limits_start_update(q); /* * Some devices can advertize zone resource limits that are larger than * the number of sequential zones of the zoned block device, e.g. a * small ZNS namespace. For such case, assume that the zoned device has * no zone resource limits. */ nr_seq_zones = disk->nr_zones - nr_conv_zones; if (lim.max_open_zones >= nr_seq_zones) lim.max_open_zones = 0; if (lim.max_active_zones >= nr_seq_zones) lim.max_active_zones = 0; if (!disk->zone_wplugs_pool) goto commit; /* * If the device has no limit on the maximum number of open and active * zones, set its max open zone limit to the mempool size to indicate * to the user that there is a potential performance impact due to * dynamic zone write plug allocation when simultaneously writing to * more zones than the size of the mempool. */ pool_size = max(lim.max_open_zones, lim.max_active_zones); if (!pool_size) pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_seq_zones); mempool_resize(disk->zone_wplugs_pool, pool_size); if (!lim.max_open_zones && !lim.max_active_zones) { if (pool_size < nr_seq_zones) lim.max_open_zones = pool_size; else lim.max_open_zones = 0; } commit: return queue_limits_commit_update_frozen(q, &lim); } static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx, struct blk_revalidate_zone_args *args) { struct gendisk *disk = args->disk; if (zone->capacity != zone->len) { pr_warn("%s: Invalid conventional zone capacity\n", disk->disk_name); return -ENODEV; } if (disk_zone_is_last(disk, zone)) args->last_zone_capacity = zone->capacity; if (!disk_need_zone_resources(disk)) return 0; if (!args->conv_zones_bitmap) { args->conv_zones_bitmap = bitmap_zalloc(args->nr_zones, GFP_NOIO); if (!args->conv_zones_bitmap) return -ENOMEM; } set_bit(idx, args->conv_zones_bitmap); return 0; } static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx, struct blk_revalidate_zone_args *args) { struct gendisk *disk = args->disk; struct blk_zone_wplug *zwplug; unsigned int wp_offset; unsigned long flags; /* * Remember the capacity of the first sequential zone and check * if it is constant for all zones, ignoring the last zone as it can be * smaller. */ if (!args->zone_capacity) args->zone_capacity = zone->capacity; if (disk_zone_is_last(disk, zone)) { args->last_zone_capacity = zone->capacity; } else if (zone->capacity != args->zone_capacity) { pr_warn("%s: Invalid variable zone capacity\n", disk->disk_name); return -ENODEV; } /* * If the device needs zone append emulation, we need to track the * write pointer of all zones that are not empty nor full. So make sure * we have a zone write plug for such zone if the device has a zone * write plug hash table. */ if (!queue_emulates_zone_append(disk->queue) || !disk->zone_wplugs_hash) return 0; disk_zone_wplug_sync_wp_offset(disk, zone); wp_offset = blk_zone_wp_offset(zone); if (!wp_offset || wp_offset >= zone->capacity) return 0; zwplug = disk_get_and_lock_zone_wplug(disk, zone->wp, GFP_NOIO, &flags); if (!zwplug) return -ENOMEM; spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); return 0; } /* * Helper function to check the validity of zones of a zoned block device. */ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct blk_revalidate_zone_args *args = data; struct gendisk *disk = args->disk; sector_t zone_sectors = disk->queue->limits.chunk_sectors; int ret; /* Check for bad zones and holes in the zone report */ if (zone->start != args->sector) { pr_warn("%s: Zone gap at sectors %llu..%llu\n", disk->disk_name, args->sector, zone->start); return -ENODEV; } if (zone->start >= get_capacity(disk) || !zone->len) { pr_warn("%s: Invalid zone start %llu, length %llu\n", disk->disk_name, zone->start, zone->len); return -ENODEV; } /* * All zones must have the same size, with the exception on an eventual * smaller last zone. */ if (!disk_zone_is_last(disk, zone)) { if (zone->len != zone_sectors) { pr_warn("%s: Invalid zoned device with non constant zone size\n", disk->disk_name); return -ENODEV; } } else if (zone->len > zone_sectors) { pr_warn("%s: Invalid zoned device with larger last zone size\n", disk->disk_name); return -ENODEV; } if (!zone->capacity || zone->capacity > zone->len) { pr_warn("%s: Invalid zone capacity\n", disk->disk_name); return -ENODEV; } /* Check zone type */ switch (zone->type) { case BLK_ZONE_TYPE_CONVENTIONAL: ret = blk_revalidate_conv_zone(zone, idx, args); break; case BLK_ZONE_TYPE_SEQWRITE_REQ: ret = blk_revalidate_seq_zone(zone, idx, args); break; case BLK_ZONE_TYPE_SEQWRITE_PREF: default: pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n", disk->disk_name, (int)zone->type, zone->start); ret = -ENODEV; } if (!ret) args->sector += zone->len; return ret; } /** * blk_revalidate_disk_zones - (re)allocate and initialize zone write plugs * @disk: Target disk * * Helper function for low-level device drivers to check, (re) allocate and * initialize resources used for managing zoned disks. This function should * normally be called by blk-mq based drivers when a zoned gendisk is probed * and when the zone configuration of the gendisk changes (e.g. after a format). * Before calling this function, the device driver must already have set the * device zone size (chunk_sector limit) and the max zone append limit. * BIO based drivers can also use this function as long as the device queue * can be safely frozen. */ int blk_revalidate_disk_zones(struct gendisk *disk) { struct request_queue *q = disk->queue; sector_t zone_sectors = q->limits.chunk_sectors; sector_t capacity = get_capacity(disk); struct blk_revalidate_zone_args args = { }; unsigned int noio_flag; int ret = -ENOMEM; if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) return -EIO; if (!capacity) return -ENODEV; /* * Checks that the device driver indicated a valid zone size and that * the max zone append limit is set. */ if (!zone_sectors || !is_power_of_2(zone_sectors)) { pr_warn("%s: Invalid non power of two zone size (%llu)\n", disk->disk_name, zone_sectors); return -ENODEV; } /* * Ensure that all memory allocations in this context are done as if * GFP_NOIO was specified. */ args.disk = disk; args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors); noio_flag = memalloc_noio_save(); ret = disk_revalidate_zone_resources(disk, args.nr_zones); if (ret) { memalloc_noio_restore(noio_flag); return ret; } ret = disk->fops->report_zones(disk, 0, UINT_MAX, blk_revalidate_zone_cb, &args); if (!ret) { pr_warn("%s: No zones reported\n", disk->disk_name); ret = -ENODEV; } memalloc_noio_restore(noio_flag); /* * If zones where reported, make sure that the entire disk capacity * has been checked. */ if (ret > 0 && args.sector != capacity) { pr_warn("%s: Missing zones from sector %llu\n", disk->disk_name, args.sector); ret = -ENODEV; } /* * Set the new disk zone parameters only once the queue is frozen and * all I/Os are completed. */ if (ret > 0) ret = disk_update_zone_resources(disk, &args); else pr_warn("%s: failed to revalidate zones\n", disk->disk_name); if (ret) { unsigned int memflags = blk_mq_freeze_queue(q); disk_free_zone_resources(disk); blk_mq_unfreeze_queue(q, memflags); } return ret; } EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); /** * blk_zone_issue_zeroout - zero-fill a block range in a zone * @bdev: blockdev to write * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) * * Description: * Zero-fill a block range in a zone (@sector must be equal to the zone write * pointer), handling potential errors due to the (initially unknown) lack of * hardware offload (See blkdev_issue_zeroout()). */ int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask) { int ret; if (WARN_ON_ONCE(!bdev_is_zoned(bdev))) return -EIO; ret = blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, BLKDEV_ZERO_NOFALLBACK); if (ret != -EOPNOTSUPP) return ret; /* * The failed call to blkdev_issue_zeroout() advanced the zone write * pointer. Undo this using a report zone to update the zone write * pointer to the correct current value. */ ret = disk_zone_sync_wp_offset(bdev->bd_disk, sector); if (ret != 1) return ret < 0 ? ret : -EIO; /* * Retry without BLKDEV_ZERO_NOFALLBACK to force the fallback to a * regular write with zero-pages. */ return blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, 0); } EXPORT_SYMBOL_GPL(blk_zone_issue_zeroout); #ifdef CONFIG_BLK_DEBUG_FS static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug, struct seq_file *m) { unsigned int zwp_wp_offset, zwp_flags; unsigned int zwp_zone_no, zwp_ref; unsigned int zwp_bio_list_size; unsigned long flags; spin_lock_irqsave(&zwplug->lock, flags); zwp_zone_no = zwplug->zone_no; zwp_flags = zwplug->flags; zwp_ref = refcount_read(&zwplug->ref); zwp_wp_offset = zwplug->wp_offset; zwp_bio_list_size = bio_list_size(&zwplug->bio_list); spin_unlock_irqrestore(&zwplug->lock, flags); seq_printf(m, "%u 0x%x %u %u %u\n", zwp_zone_no, zwp_flags, zwp_ref, zwp_wp_offset, zwp_bio_list_size); } int queue_zone_wplugs_show(void *data, struct seq_file *m) { struct request_queue *q = data; struct gendisk *disk = q->disk; struct blk_zone_wplug *zwplug; unsigned int i; if (!disk->zone_wplugs_hash) return 0; rcu_read_lock(); for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[i], node) queue_zone_wplug_show(zwplug, m); rcu_read_unlock(); return 0; } #endif
16 2 1 11 2 15 4 12 2 1 1 1 1 4 1 1 2 2 1 1 3 1 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 // SPDX-License-Identifier: GPL-2.0-or-later /* Large capacity key type * * Copyright (C) 2017-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) "big_key: "fmt #include <linux/init.h> #include <linux/seq_file.h> #include <linux/file.h> #include <linux/shmem_fs.h> #include <linux/err.h> #include <linux/random.h> #include <keys/user-type.h> #include <keys/big_key-type.h> #include <crypto/chacha20poly1305.h> /* * Layout of key payload words. */ struct big_key_payload { u8 *data; struct path path; size_t length; }; #define to_big_key_payload(payload) \ (struct big_key_payload *)((payload).data) /* * If the data is under this limit, there's no point creating a shm file to * hold it as the permanently resident metadata for the shmem fs will be at * least as large as the data. */ #define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry)) /* * big_key defined keys take an arbitrary string as the description and an * arbitrary blob of data as the payload */ struct key_type key_type_big_key = { .name = "big_key", .preparse = big_key_preparse, .free_preparse = big_key_free_preparse, .instantiate = generic_key_instantiate, .revoke = big_key_revoke, .destroy = big_key_destroy, .describe = big_key_describe, .read = big_key_read, .update = big_key_update, }; /* * Preparse a big key */ int big_key_preparse(struct key_preparsed_payload *prep) { struct big_key_payload *payload = to_big_key_payload(prep->payload); struct file *file; u8 *buf, *enckey; ssize_t written; size_t datalen = prep->datalen; size_t enclen = datalen + CHACHA20POLY1305_AUTHTAG_SIZE; int ret; BUILD_BUG_ON(sizeof(*payload) != sizeof(prep->payload.data)); if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data) return -EINVAL; /* Set an arbitrary quota */ prep->quotalen = 16; payload->length = datalen; if (datalen > BIG_KEY_FILE_THRESHOLD) { /* Create a shmem file to store the data in. This will permit the data * to be swapped out if needed. * * File content is stored encrypted with randomly generated key. * Since the key is random for each file, we can set the nonce * to zero, provided we never define a ->update() call. */ loff_t pos = 0; buf = kvmalloc(enclen, GFP_KERNEL); if (!buf) return -ENOMEM; /* generate random key */ enckey = kmalloc(CHACHA20POLY1305_KEY_SIZE, GFP_KERNEL); if (!enckey) { ret = -ENOMEM; goto error; } ret = get_random_bytes_wait(enckey, CHACHA20POLY1305_KEY_SIZE); if (unlikely(ret)) goto err_enckey; /* encrypt data */ chacha20poly1305_encrypt(buf, prep->data, datalen, NULL, 0, 0, enckey); /* save aligned data to file */ file = shmem_kernel_file_setup("", enclen, 0); if (IS_ERR(file)) { ret = PTR_ERR(file); goto err_enckey; } written = kernel_write(file, buf, enclen, &pos); if (written != enclen) { ret = written; if (written >= 0) ret = -EIO; goto err_fput; } /* Pin the mount and dentry to the key so that we can open it again * later */ payload->data = enckey; payload->path = file->f_path; path_get(&payload->path); fput(file); kvfree_sensitive(buf, enclen); } else { /* Just store the data in a buffer */ void *data = kmalloc(datalen, GFP_KERNEL); if (!data) return -ENOMEM; payload->data = data; memcpy(data, prep->data, prep->datalen); } return 0; err_fput: fput(file); err_enckey: kfree_sensitive(enckey); error: kvfree_sensitive(buf, enclen); return ret; } /* * Clear preparsement. */ void big_key_free_preparse(struct key_preparsed_payload *prep) { struct big_key_payload *payload = to_big_key_payload(prep->payload); if (prep->datalen > BIG_KEY_FILE_THRESHOLD) path_put(&payload->path); kfree_sensitive(payload->data); } /* * dispose of the links from a revoked keyring * - called with the key sem write-locked */ void big_key_revoke(struct key *key) { struct big_key_payload *payload = to_big_key_payload(key->payload); /* clear the quota */ key_payload_reserve(key, 0); if (key_is_positive(key) && payload->length > BIG_KEY_FILE_THRESHOLD) vfs_truncate(&payload->path, 0); } /* * dispose of the data dangling from the corpse of a big_key key */ void big_key_destroy(struct key *key) { struct big_key_payload *payload = to_big_key_payload(key->payload); if (payload->length > BIG_KEY_FILE_THRESHOLD) { path_put(&payload->path); payload->path.mnt = NULL; payload->path.dentry = NULL; } kfree_sensitive(payload->data); payload->data = NULL; } /* * Update a big key */ int big_key_update(struct key *key, struct key_preparsed_payload *prep) { int ret; ret = key_payload_reserve(key, prep->datalen); if (ret < 0) return ret; if (key_is_positive(key)) big_key_destroy(key); return generic_key_instantiate(key, prep); } /* * describe the big_key key */ void big_key_describe(const struct key *key, struct seq_file *m) { struct big_key_payload *payload = to_big_key_payload(key->payload); seq_puts(m, key->description); if (key_is_positive(key)) seq_printf(m, ": %zu [%s]", payload->length, payload->length > BIG_KEY_FILE_THRESHOLD ? "file" : "buff"); } /* * read the key data * - the key's semaphore is read-locked */ long big_key_read(const struct key *key, char *buffer, size_t buflen) { struct big_key_payload *payload = to_big_key_payload(key->payload); size_t datalen = payload->length; long ret; if (!buffer || buflen < datalen) return datalen; if (datalen > BIG_KEY_FILE_THRESHOLD) { struct file *file; u8 *buf, *enckey = payload->data; size_t enclen = datalen + CHACHA20POLY1305_AUTHTAG_SIZE; loff_t pos = 0; buf = kvmalloc(enclen, GFP_KERNEL); if (!buf) return -ENOMEM; file = dentry_open(&payload->path, O_RDONLY, current_cred()); if (IS_ERR(file)) { ret = PTR_ERR(file); goto error; } /* read file to kernel and decrypt */ ret = kernel_read(file, buf, enclen, &pos); if (ret != enclen) { if (ret >= 0) ret = -EIO; goto err_fput; } ret = chacha20poly1305_decrypt(buf, buf, enclen, NULL, 0, 0, enckey) ? 0 : -EBADMSG; if (unlikely(ret)) goto err_fput; ret = datalen; /* copy out decrypted data */ memcpy(buffer, buf, datalen); err_fput: fput(file); error: kvfree_sensitive(buf, enclen); } else { ret = datalen; memcpy(buffer, payload->data, datalen); } return ret; } /* * Register key type */ static int __init big_key_init(void) { return register_key_type(&key_type_big_key); } late_initcall(big_key_init);
6 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 /* * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. * Copyright (c) 2014,2018 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/security.h> #include <linux/xarray.h> #include <rdma/ib_cache.h> #include "mad_priv.h" #include "core_priv.h" #include "mad_rmpp.h" #include "smi.h" #include "opa_smi.h" #include "agent.h" #define CREATE_TRACE_POINTS #include <trace/events/ib_mad.h> #ifdef CONFIG_TRACEPOINTS static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_qp_info *qp_info, struct trace_event_raw_ib_mad_send_template *entry) { struct ib_ud_wr *wr = &mad_send_wr->send_wr; struct rdma_ah_attr attr = {}; rdma_query_ah(wr->ah, &attr); /* These are common */ entry->sl = attr.sl; entry->rqpn = wr->remote_qpn; entry->rqkey = wr->remote_qkey; entry->dlid = rdma_ah_get_dlid(&attr); } #endif static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; module_param_named(send_queue_size, mad_sendq_size, int, 0444); MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests"); module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); static DEFINE_XARRAY_ALLOC1(ib_mad_clients); static u32 ib_mad_client_next; static struct list_head ib_mad_port_list; /* Port list lock */ static DEFINE_SPINLOCK(ib_mad_port_list_lock); /* Forward declarations */ static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req); static void remove_mad_reg_req(struct ib_mad_agent_private *priv); static struct ib_mad_agent_private *find_mad_agent( struct ib_mad_port_private *port_priv, const struct ib_mad_hdr *mad); static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); static void timeout_sends(struct work_struct *work); static void local_completions(struct work_struct *work); static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv, u8 mgmt_class); static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv); static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, struct ib_wc *wc); static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc); /* * Returns a ib_mad_port_private structure or NULL for a device/port * Assumes ib_mad_port_list_lock is being held */ static inline struct ib_mad_port_private * __ib_get_mad_port(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *entry; list_for_each_entry(entry, &ib_mad_port_list, port_list) { if (entry->device == device && entry->port_num == port_num) return entry; } return NULL; } /* * Wrapper function to return a ib_mad_port_private structure or NULL * for a device/port */ static inline struct ib_mad_port_private * ib_get_mad_port(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *entry; unsigned long flags; spin_lock_irqsave(&ib_mad_port_list_lock, flags); entry = __ib_get_mad_port(device, port_num); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); return entry; } static inline u8 convert_mgmt_class(u8 mgmt_class) { /* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */ return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ? 0 : mgmt_class; } static int get_spl_qp_index(enum ib_qp_type qp_type) { switch (qp_type) { case IB_QPT_SMI: return 0; case IB_QPT_GSI: return 1; default: return -1; } } static int vendor_class_index(u8 mgmt_class) { return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START; } static int is_vendor_class(u8 mgmt_class) { if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) || (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END)) return 0; return 1; } static int is_vendor_oui(char *oui) { if (oui[0] || oui[1] || oui[2]) return 1; return 0; } static int is_vendor_method_in_use( struct ib_mad_mgmt_vendor_class *vendor_class, struct ib_mad_reg_req *mad_reg_req) { struct ib_mad_mgmt_method_table *method; int i; for (i = 0; i < MAX_MGMT_OUI; i++) { if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) { method = vendor_class->method_table[i]; if (method) { if (method_in_use(&method, mad_reg_req)) return 1; else break; } } } return 0; } int ib_response_mad(const struct ib_mad_hdr *hdr) { return ((hdr->method & IB_MGMT_METHOD_RESP) || (hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) || ((hdr->mgmt_class == IB_MGMT_CLASS_BM) && (hdr->attr_mod & IB_BM_ATTR_MOD_RESP))); } EXPORT_SYMBOL(ib_response_mad); #define SOL_FC_MAX_DEFAULT_FRAC 4 #define SOL_FC_MAX_SA_FRAC 32 static int get_sol_fc_max_outstanding(struct ib_mad_reg_req *mad_reg_req) { if (!mad_reg_req) /* Send only agent */ return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC; switch (mad_reg_req->mgmt_class) { case IB_MGMT_CLASS_CM: return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC; case IB_MGMT_CLASS_SUBN_ADM: return mad_recvq_size / SOL_FC_MAX_SA_FRAC; case IB_MGMT_CLASS_SUBN_LID_ROUTED: case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: return min(mad_recvq_size, IB_MAD_QP_RECV_SIZE) / SOL_FC_MAX_DEFAULT_FRAC; default: return 0; } } /* * ib_register_mad_agent - Register to send/receive MADs * * Context: Process context. */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, u32 port_num, enum ib_qp_type qp_type, struct ib_mad_reg_req *mad_reg_req, u8 rmpp_version, ib_mad_send_handler send_handler, ib_mad_recv_handler recv_handler, void *context, u32 registration_flags) { struct ib_mad_port_private *port_priv; struct ib_mad_agent *ret = ERR_PTR(-EINVAL); struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_reg_req *reg_req = NULL; struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; struct ib_mad_mgmt_method_table *method; int ret2, qpn; u8 mgmt_class, vclass; if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) || (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num))) return ERR_PTR(-EPROTONOSUPPORT); /* Validate parameters */ qpn = get_spl_qp_index(qp_type); if (qpn == -1) { dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n", __func__, qp_type); goto error1; } if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { dev_dbg_ratelimited(&device->dev, "%s: invalid RMPP Version %u\n", __func__, rmpp_version); goto error1; } /* Validate MAD registration request if supplied */ if (mad_reg_req) { if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { dev_dbg_ratelimited(&device->dev, "%s: invalid Class Version %u\n", __func__, mad_reg_req->mgmt_class_version); goto error1; } if (!recv_handler) { dev_dbg_ratelimited(&device->dev, "%s: no recv_handler\n", __func__); goto error1; } if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { /* * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only * one in this range currently allowed */ if (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { dev_dbg_ratelimited(&device->dev, "%s: Invalid Mgmt Class 0x%x\n", __func__, mad_reg_req->mgmt_class); goto error1; } } else if (mad_reg_req->mgmt_class == 0) { /* * Class 0 is reserved in IBA and is used for * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE */ dev_dbg_ratelimited(&device->dev, "%s: Invalid Mgmt Class 0\n", __func__); goto error1; } else if (is_vendor_class(mad_reg_req->mgmt_class)) { /* * If class is in "new" vendor range, * ensure supplied OUI is not zero */ if (!is_vendor_oui(mad_reg_req->oui)) { dev_dbg_ratelimited(&device->dev, "%s: No OUI specified for class 0x%x\n", __func__, mad_reg_req->mgmt_class); goto error1; } } /* Make sure class supplied is consistent with RMPP */ if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { if (rmpp_version) { dev_dbg_ratelimited(&device->dev, "%s: RMPP version for non-RMPP class 0x%x\n", __func__, mad_reg_req->mgmt_class); goto error1; } } /* Make sure class supplied is consistent with QP type */ if (qp_type == IB_QPT_SMI) { if ((mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED) && (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { dev_dbg_ratelimited(&device->dev, "%s: Invalid SM QP type: class 0x%x\n", __func__, mad_reg_req->mgmt_class); goto error1; } } else { if ((mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { dev_dbg_ratelimited(&device->dev, "%s: Invalid GS QP type: class 0x%x\n", __func__, mad_reg_req->mgmt_class); goto error1; } } } else { /* No registration request supplied */ if (!send_handler) goto error1; if (registration_flags & IB_MAD_USER_RMPP) goto error1; } /* Validate device and port */ port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n", __func__, port_num); ret = ERR_PTR(-ENODEV); goto error1; } /* Verify the QP requested is supported. For example, Ethernet devices * will not have QP0. */ if (!port_priv->qp_info[qpn].qp) { dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n", __func__, qpn); ret = ERR_PTR(-EPROTONOSUPPORT); goto error1; } /* Allocate structures */ mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL); if (!mad_agent_priv) { ret = ERR_PTR(-ENOMEM); goto error1; } if (mad_reg_req) { reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL); if (!reg_req) { ret = ERR_PTR(-ENOMEM); goto error3; } } /* Now, fill in the various structures */ mad_agent_priv->qp_info = &port_priv->qp_info[qpn]; mad_agent_priv->reg_req = reg_req; mad_agent_priv->agent.rmpp_version = rmpp_version; mad_agent_priv->agent.device = device; mad_agent_priv->agent.recv_handler = recv_handler; mad_agent_priv->agent.send_handler = send_handler; mad_agent_priv->agent.context = context; mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; mad_agent_priv->agent.port_num = port_num; mad_agent_priv->agent.flags = registration_flags; spin_lock_init(&mad_agent_priv->lock); INIT_LIST_HEAD(&mad_agent_priv->send_list); INIT_LIST_HEAD(&mad_agent_priv->wait_list); INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); INIT_LIST_HEAD(&mad_agent_priv->backlog_list); INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions); refcount_set(&mad_agent_priv->refcount, 1); init_completion(&mad_agent_priv->comp); mad_agent_priv->sol_fc_send_count = 0; mad_agent_priv->sol_fc_wait_count = 0; mad_agent_priv->sol_fc_max = recv_handler ? get_sol_fc_max_outstanding(mad_reg_req) : 0; ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type); if (ret2) { ret = ERR_PTR(ret2); goto error4; } /* * The mlx4 driver uses the top byte to distinguish which virtual * function generated the MAD, so we must avoid using it. */ ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid, mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1), &ib_mad_client_next, GFP_KERNEL); if (ret2 < 0) { ret = ERR_PTR(ret2); goto error5; } /* * Make sure MAD registration (if supplied) * is non overlapping with any existing ones */ spin_lock_irq(&port_priv->reg_lock); if (mad_reg_req) { mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); if (!is_vendor_class(mgmt_class)) { class = port_priv->version[mad_reg_req-> mgmt_class_version].class; if (class) { method = class->method_table[mgmt_class]; if (method) { if (method_in_use(&method, mad_reg_req)) goto error6; } } ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, mgmt_class); } else { /* "New" vendor class range */ vendor = port_priv->version[mad_reg_req-> mgmt_class_version].vendor; if (vendor) { vclass = vendor_class_index(mgmt_class); vendor_class = vendor->vendor_class[vclass]; if (vendor_class) { if (is_vendor_method_in_use( vendor_class, mad_reg_req)) goto error6; } } ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); } if (ret2) { ret = ERR_PTR(ret2); goto error6; } } spin_unlock_irq(&port_priv->reg_lock); trace_ib_mad_create_agent(mad_agent_priv); return &mad_agent_priv->agent; error6: spin_unlock_irq(&port_priv->reg_lock); xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); error5: ib_mad_agent_security_cleanup(&mad_agent_priv->agent); error4: kfree(reg_req); error3: kfree(mad_agent_priv); error1: return ret; } EXPORT_SYMBOL(ib_register_mad_agent); static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { if (refcount_dec_and_test(&mad_agent_priv->refcount)) complete(&mad_agent_priv->comp); } static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; /* Note that we could still be handling received MADs */ trace_ib_mad_unregister_agent(mad_agent_priv); /* * Canceling all sends results in dropping received response * MADs, preventing us from queuing additional work */ cancel_mads(mad_agent_priv); port_priv = mad_agent_priv->qp_info->port_priv; cancel_delayed_work(&mad_agent_priv->timed_work); spin_lock_irq(&port_priv->reg_lock); remove_mad_reg_req(mad_agent_priv); spin_unlock_irq(&port_priv->reg_lock); xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); flush_workqueue(port_priv->wq); deref_mad_agent(mad_agent_priv); wait_for_completion(&mad_agent_priv->comp); ib_cancel_rmpp_recvs(mad_agent_priv); ib_mad_agent_security_cleanup(&mad_agent_priv->agent); kfree(mad_agent_priv->reg_req); kfree_rcu(mad_agent_priv, rcu); } /* * ib_unregister_mad_agent - Unregisters a client from using MAD services * * Context: Process context. */ void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { struct ib_mad_agent_private *mad_agent_priv; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); unregister_mad_agent(mad_agent_priv); } EXPORT_SYMBOL(ib_unregister_mad_agent); static void dequeue_mad(struct ib_mad_list_head *mad_list) { struct ib_mad_queue *mad_queue; unsigned long flags; mad_queue = mad_list->mad_queue; spin_lock_irqsave(&mad_queue->lock, flags); list_del(&mad_list->list); mad_queue->count--; spin_unlock_irqrestore(&mad_queue->lock, flags); } static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid, u16 pkey_index, u32 port_num, struct ib_wc *wc) { memset(wc, 0, sizeof *wc); wc->wr_cqe = cqe; wc->status = IB_WC_SUCCESS; wc->opcode = IB_WC_RECV; wc->pkey_index = pkey_index; wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh); wc->src_qp = IB_QP0; wc->qp = qp; wc->slid = slid; wc->sl = 0; wc->dlid_path_bits = 0; wc->port_num = port_num; } static size_t mad_priv_size(const struct ib_mad_private *mp) { return sizeof(struct ib_mad_private) + mp->mad_size; } static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags) { size_t size = sizeof(struct ib_mad_private) + mad_size; struct ib_mad_private *ret = kzalloc(size, flags); if (ret) ret->mad_size = mad_size; return ret; } static size_t port_mad_size(const struct ib_mad_port_private *port_priv) { return rdma_max_mad_size(port_priv->device, port_priv->port_num); } static size_t mad_priv_dma_size(const struct ib_mad_private *mp) { return sizeof(struct ib_grh) + mp->mad_size; } /* * Return 0 if SMP is to be sent * Return 1 if SMP was consumed locally (whether or not solicited) * Return < 0 if error */ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_wr_private *mad_send_wr) { int ret = 0; struct ib_smp *smp = mad_send_wr->send_buf.mad; struct opa_smp *opa_smp = (struct opa_smp *)smp; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; struct ib_mad_port_private *port_priv; struct ib_mad_agent_private *recv_mad_agent = NULL; struct ib_device *device = mad_agent_priv->agent.device; u32 port_num; struct ib_wc mad_wc; struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); u16 out_mad_pkey_index = 0; u16 drslid; bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, mad_agent_priv->qp_info->port_priv->port_num); if (rdma_cap_ib_switch(device) && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) port_num = send_wr->port_num; else port_num = mad_agent_priv->agent.port_num; /* * Directed route handling starts if the initial LID routed part of * a request or the ending LID routed part of a response is empty. * If we are at the start of the LID routed part, don't update the * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. */ if (opa && smp->class_version == OPA_SM_CLASS_VERSION) { u32 opa_drslid; trace_ib_mad_handle_out_opa_smi(opa_smp); if ((opa_get_smp_direction(opa_smp) ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == OPA_LID_PERMISSIVE && opa_smi_handle_dr_smp_send(opa_smp, rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid directed route\n"); goto out; } opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) && opa_drslid & 0xffff0000) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", opa_drslid); goto out; } drslid = (u16)(opa_drslid & 0x0000ffff); /* Check to post send on QP or process locally */ if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD && opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) goto out; } else { trace_ib_mad_handle_out_ib_smi(smp); if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == IB_LID_PERMISSIVE && smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "Invalid directed route\n"); goto out; } drslid = be16_to_cpu(smp->dr_slid); /* Check to post send on QP or process locally */ if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) goto out; } local = kmalloc(sizeof *local, GFP_ATOMIC); if (!local) { ret = -ENOMEM; goto out; } local->mad_priv = NULL; local->recv_mad_agent = NULL; mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; kfree(local); goto out; } build_smp_wc(mad_agent_priv->agent.qp, send_wr->wr.wr_cqe, drslid, send_wr->pkey_index, send_wr->port_num, &mad_wc); if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { mad_wc.byte_len = mad_send_wr->send_buf.hdr_len + mad_send_wr->send_buf.data_len + sizeof(struct ib_grh); } /* No GRH for DR SMP */ ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL, (const struct ib_mad *)smp, (struct ib_mad *)mad_priv->mad, &mad_size, &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) && mad_agent_priv->agent.recv_handler) { local->mad_priv = mad_priv; local->recv_mad_agent = mad_agent_priv; /* * Reference MAD agent until receive * side of local completion handled */ refcount_inc(&mad_agent_priv->refcount); } else kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { memcpy(mad_priv->mad, smp, mad_priv->mad_size); recv_mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)mad_priv->mad); } if (!port_priv || !recv_mad_agent) { /* * No receiving agent so drop packet and * generate send completion. */ kfree(mad_priv); break; } local->mad_priv = mad_priv; local->recv_mad_agent = recv_mad_agent; break; default: kfree(mad_priv); kfree(local); ret = -EINVAL; goto out; } local->mad_send_wr = mad_send_wr; if (opa) { local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index; local->return_wc_byte_len = mad_size; } /* Reference MAD agent until send side of local completion handled */ refcount_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ spin_lock_irqsave(&mad_agent_priv->lock, flags); list_add_tail(&local->completion_list, &mad_agent_priv->local_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); queue_work(mad_agent_priv->qp_info->port_priv->wq, &mad_agent_priv->local_work); ret = 1; out: return ret; } static int get_pad_size(int hdr_len, int data_len, size_t mad_size) { int seg_size, pad; seg_size = mad_size - hdr_len; if (data_len && seg_size) { pad = seg_size - data_len % seg_size; return pad == seg_size ? 0 : pad; } else return seg_size; } static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_segment *s, *t; list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) { list_del(&s->list); kfree(s); } } static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, size_t mad_size, gfp_t gfp_mask) { struct ib_mad_send_buf *send_buf = &send_wr->send_buf; struct ib_rmpp_mad *rmpp_mad = send_buf->mad; struct ib_rmpp_segment *seg = NULL; int left, seg_size, pad; send_buf->seg_size = mad_size - send_buf->hdr_len; send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR; seg_size = send_buf->seg_size; pad = send_wr->pad; /* Allocate data segments. */ for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask); if (!seg) { free_send_rmpp_list(send_wr); return -ENOMEM; } seg->num = ++send_buf->seg_count; list_add_tail(&seg->list, &send_wr->rmpp_list); } /* Zero any padding */ if (pad) memset(seg->data + seg_size - pad, 0, pad); rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv-> agent.rmpp_version; rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); send_wr->cur_seg = container_of(send_wr->rmpp_list.next, struct ib_rmpp_segment, list); send_wr->last_ack_seg = send_wr->cur_seg; return 0; } int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent) { return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); } EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent); struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask, u8 base_version) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; int pad, message_size, ret, size; void *buf; size_t mad_size; bool opa; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num); if (opa && base_version == OPA_MGMT_BASE_VERSION) mad_size = sizeof(struct opa_mad); else mad_size = sizeof(struct ib_mad); pad = get_pad_size(hdr_len, data_len, mad_size); message_size = hdr_len + data_len + pad; if (ib_mad_kernel_rmpp_agent(mad_agent)) { if (!rmpp_active && message_size > mad_size) return ERR_PTR(-EINVAL); } else if (rmpp_active || message_size > mad_size) return ERR_PTR(-EINVAL); size = rmpp_active ? hdr_len : mad_size; buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); mad_send_wr = buf + size; INIT_LIST_HEAD(&mad_send_wr->rmpp_list); mad_send_wr->send_buf.mad = buf; mad_send_wr->send_buf.hdr_len = hdr_len; mad_send_wr->send_buf.data_len = data_len; mad_send_wr->pad = pad; mad_send_wr->mad_agent_priv = mad_agent_priv; mad_send_wr->sg_list[0].length = hdr_len; mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey; /* OPA MADs don't have to be the full 2048 bytes */ if (opa && base_version == OPA_MGMT_BASE_VERSION && data_len < mad_size - hdr_len) mad_send_wr->sg_list[1].length = data_len; else mad_send_wr->sg_list[1].length = mad_size - hdr_len; mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; mad_send_wr->mad_list.cqe.done = ib_mad_send_done; mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; mad_send_wr->send_wr.wr.num_sge = 2; mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED; mad_send_wr->send_wr.remote_qpn = remote_qpn; mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY; mad_send_wr->send_wr.pkey_index = pkey_index; if (rmpp_active) { ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); if (ret) { kfree(buf); return ERR_PTR(ret); } } mad_send_wr->send_buf.mad_agent = mad_agent; refcount_inc(&mad_agent_priv->refcount); return &mad_send_wr->send_buf; } EXPORT_SYMBOL(ib_create_send_mad); int ib_get_mad_data_offset(u8 mgmt_class) { if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) return IB_MGMT_SA_HDR; else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || (mgmt_class == IB_MGMT_CLASS_BIS)) return IB_MGMT_DEVICE_HDR; else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) return IB_MGMT_VENDOR_HDR; else return IB_MGMT_MAD_HDR; } EXPORT_SYMBOL(ib_get_mad_data_offset); int ib_is_mad_class_rmpp(u8 mgmt_class) { if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) || (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || (mgmt_class == IB_MGMT_CLASS_BIS) || ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))) return 1; return 0; } EXPORT_SYMBOL(ib_is_mad_class_rmpp); void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num) { struct ib_mad_send_wr_private *mad_send_wr; struct list_head *list; mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); list = &mad_send_wr->cur_seg->list; if (mad_send_wr->cur_seg->num < seg_num) { list_for_each_entry(mad_send_wr->cur_seg, list, list) if (mad_send_wr->cur_seg->num == seg_num) break; } else if (mad_send_wr->cur_seg->num > seg_num) { list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list) if (mad_send_wr->cur_seg->num == seg_num) break; } return mad_send_wr->cur_seg->data; } EXPORT_SYMBOL(ib_get_rmpp_segment); static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr) { if (mad_send_wr->send_buf.seg_count) return ib_get_rmpp_segment(&mad_send_wr->send_buf, mad_send_wr->seg_num); else return mad_send_wr->send_buf.mad + mad_send_wr->send_buf.hdr_len; } void ib_free_send_mad(struct ib_mad_send_buf *send_buf) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); free_send_rmpp_list(mad_send_wr); kfree(send_buf->mad); deref_mad_agent(mad_agent_priv); } EXPORT_SYMBOL(ib_free_send_mad); int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; struct list_head *list; struct ib_mad_agent *mad_agent; struct ib_sge *sge; unsigned long flags; int ret; /* Set WR ID to find mad_send_wr upon completion */ qp_info = mad_send_wr->mad_agent_priv->qp_info; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; mad_send_wr->mad_list.cqe.done = ib_mad_send_done; mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; mad_agent = mad_send_wr->send_buf.mad_agent; sge = mad_send_wr->sg_list; sge[0].addr = ib_dma_map_single(mad_agent->device, mad_send_wr->send_buf.mad, sge[0].length, DMA_TO_DEVICE); if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) return -ENOMEM; mad_send_wr->header_mapping = sge[0].addr; sge[1].addr = ib_dma_map_single(mad_agent->device, ib_get_payload(mad_send_wr), sge[1].length, DMA_TO_DEVICE); if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { ib_dma_unmap_single(mad_agent->device, mad_send_wr->header_mapping, sge[0].length, DMA_TO_DEVICE); return -ENOMEM; } mad_send_wr->payload_mapping = sge[1].addr; spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { trace_ib_mad_ib_send_mad(mad_send_wr, qp_info); ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, NULL); list = &qp_info->send_queue.list; } else { ret = 0; list = &qp_info->overflow_list; } if (!ret) { qp_info->send_queue.count++; list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); if (ret) { ib_dma_unmap_single(mad_agent->device, mad_send_wr->header_mapping, sge[0].length, DMA_TO_DEVICE); ib_dma_unmap_single(mad_agent->device, mad_send_wr->payload_mapping, sge[1].length, DMA_TO_DEVICE); } return ret; } static void handle_queued_state(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_agent_private *mad_agent_priv) { if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) { mad_agent_priv->sol_fc_wait_count--; list_move_tail(&mad_send_wr->agent_list, &mad_agent_priv->backlog_list); } else { expect_mad_state(mad_send_wr, IB_MAD_STATE_INIT); list_add_tail(&mad_send_wr->agent_list, &mad_agent_priv->backlog_list); } } static void handle_send_state(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_agent_private *mad_agent_priv) { if (mad_send_wr->state == IB_MAD_STATE_INIT) { list_add_tail(&mad_send_wr->agent_list, &mad_agent_priv->send_list); } else { expect_mad_state2(mad_send_wr, IB_MAD_STATE_WAIT_RESP, IB_MAD_STATE_QUEUED); list_move_tail(&mad_send_wr->agent_list, &mad_agent_priv->send_list); } if (mad_send_wr->is_solicited_fc) { if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) mad_agent_priv->sol_fc_wait_count--; mad_agent_priv->sol_fc_send_count++; } } static void handle_wait_state(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_send_wr_private *temp_mad_send_wr; struct list_head *list_item; unsigned long delay; expect_mad_state3(mad_send_wr, IB_MAD_STATE_SEND_START, IB_MAD_STATE_WAIT_RESP, IB_MAD_STATE_CANCELED); if (mad_send_wr->state == IB_MAD_STATE_SEND_START && mad_send_wr->is_solicited_fc) { mad_agent_priv->sol_fc_send_count--; mad_agent_priv->sol_fc_wait_count++; } list_del_init(&mad_send_wr->agent_list); delay = mad_send_wr->timeout; mad_send_wr->timeout += jiffies; if (delay) { list_for_each_prev(list_item, &mad_agent_priv->wait_list) { temp_mad_send_wr = list_entry( list_item, struct ib_mad_send_wr_private, agent_list); if (time_after(mad_send_wr->timeout, temp_mad_send_wr->timeout)) break; } } else { list_item = &mad_agent_priv->wait_list; } list_add(&mad_send_wr->agent_list, list_item); } static void handle_early_resp_state(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_agent_private *mad_agent_priv) { expect_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); mad_agent_priv->sol_fc_send_count -= mad_send_wr->is_solicited_fc; } static void handle_canceled_state(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_agent_private *mad_agent_priv) { not_expect_mad_state(mad_send_wr, IB_MAD_STATE_DONE); if (mad_send_wr->is_solicited_fc) { if (mad_send_wr->state == IB_MAD_STATE_SEND_START) mad_agent_priv->sol_fc_send_count--; else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) mad_agent_priv->sol_fc_wait_count--; } } static void handle_done_state(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_agent_private *mad_agent_priv) { if (mad_send_wr->is_solicited_fc) { if (mad_send_wr->state == IB_MAD_STATE_SEND_START) mad_agent_priv->sol_fc_send_count--; else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) mad_agent_priv->sol_fc_wait_count--; } list_del_init(&mad_send_wr->agent_list); } void change_mad_state(struct ib_mad_send_wr_private *mad_send_wr, enum ib_mad_state new_state) { struct ib_mad_agent_private *mad_agent_priv = mad_send_wr->mad_agent_priv; switch (new_state) { case IB_MAD_STATE_INIT: break; case IB_MAD_STATE_QUEUED: handle_queued_state(mad_send_wr, mad_agent_priv); break; case IB_MAD_STATE_SEND_START: handle_send_state(mad_send_wr, mad_agent_priv); break; case IB_MAD_STATE_WAIT_RESP: handle_wait_state(mad_send_wr, mad_agent_priv); if (mad_send_wr->state == IB_MAD_STATE_CANCELED) return; break; case IB_MAD_STATE_EARLY_RESP: handle_early_resp_state(mad_send_wr, mad_agent_priv); break; case IB_MAD_STATE_CANCELED: handle_canceled_state(mad_send_wr, mad_agent_priv); break; case IB_MAD_STATE_DONE: handle_done_state(mad_send_wr, mad_agent_priv); break; } mad_send_wr->state = new_state; } static bool is_solicited_fc_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; u8 mgmt_class; if (!mad_send_wr->timeout) return 0; rmpp_mad = mad_send_wr->send_buf.mad; if (mad_send_wr->mad_agent_priv->agent.rmpp_version && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return 0; mgmt_class = ((struct ib_mad_hdr *)mad_send_wr->send_buf.mad)->mgmt_class; return mgmt_class == IB_MGMT_CLASS_CM || mgmt_class == IB_MGMT_CLASS_SUBN_ADM || mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE; } static bool mad_is_for_backlog(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_agent_private *mad_agent_priv = mad_send_wr->mad_agent_priv; if (!mad_send_wr->is_solicited_fc || !mad_agent_priv->sol_fc_max) return false; if (!list_empty(&mad_agent_priv->backlog_list)) return true; return mad_agent_priv->sol_fc_send_count + mad_agent_priv->sol_fc_wait_count >= mad_agent_priv->sol_fc_max; } /* * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client */ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, struct ib_mad_send_buf **bad_send_buf) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_buf *next_send_buf; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; int ret = -EINVAL; /* Walk list of send WRs and post each on send list */ for (; send_buf; send_buf = next_send_buf) { mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); mad_agent_priv = mad_send_wr->mad_agent_priv; ret = ib_mad_enforce_security(mad_agent_priv, mad_send_wr->send_wr.pkey_index); if (ret) goto error; if (!send_buf->mad_agent->send_handler) { ret = -EINVAL; goto error; } if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) { if (mad_agent_priv->agent.rmpp_version) { ret = -EINVAL; goto error; } } /* * Save pointer to next work request to post in case the * current one completes, and the user modifies the work * request associated with the completion */ next_send_buf = send_buf->next; mad_send_wr->send_wr.ah = send_buf->ah; if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { ret = handle_outgoing_dr_smp(mad_agent_priv, mad_send_wr); if (ret < 0) /* error */ goto error; else if (ret == 1) /* locally consumed */ continue; } mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; /* Timeout will be updated after send completes */ mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); mad_send_wr->max_retries = send_buf->retries; mad_send_wr->retries_left = send_buf->retries; send_buf->retries = 0; change_mad_state(mad_send_wr, IB_MAD_STATE_INIT); /* Reference MAD agent until send completes */ refcount_inc(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr->is_solicited_fc = is_solicited_fc_mad(mad_send_wr); if (mad_is_for_backlog(mad_send_wr)) { change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return 0; } change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_send_rmpp_mad(mad_send_wr); if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) ret = ib_send_mad(mad_send_wr); } else ret = ib_send_mad(mad_send_wr); if (ret < 0) { /* Fail send request */ spin_lock_irqsave(&mad_agent_priv->lock, flags); change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); deref_mad_agent(mad_agent_priv); goto error; } } return 0; error: if (bad_send_buf) *bad_send_buf = send_buf; return ret; } EXPORT_SYMBOL(ib_post_send_mad); /* * ib_free_recv_mad - Returns data buffers used to receive * a MAD to the access layer */ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf; struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *priv; struct list_head free_list; INIT_LIST_HEAD(&free_list); list_splice_init(&mad_recv_wc->rmpp_list, &free_list); list_for_each_entry_safe(mad_recv_buf, temp_recv_buf, &free_list, list) { mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc, recv_buf); mad_priv_hdr = container_of(mad_recv_wc, struct ib_mad_private_header, recv_wc); priv = container_of(mad_priv_hdr, struct ib_mad_private, header); kfree(priv); } } EXPORT_SYMBOL(ib_free_recv_mad); static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req) { int i; for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { if ((*method)->agent[i]) { pr_err("Method %d already in use\n", i); return -EINVAL; } } return 0; } static int allocate_method_table(struct ib_mad_mgmt_method_table **method) { /* Allocate management method table */ *method = kzalloc(sizeof **method, GFP_ATOMIC); return (*method) ? 0 : (-ENOMEM); } /* * Check to see if there are any methods still in use */ static int check_method_table(struct ib_mad_mgmt_method_table *method) { int i; for (i = 0; i < IB_MGMT_MAX_METHODS; i++) if (method->agent[i]) return 1; return 0; } /* * Check to see if there are any method tables for this class still in use */ static int check_class_table(struct ib_mad_mgmt_class_table *class) { int i; for (i = 0; i < MAX_MGMT_CLASS; i++) if (class->method_table[i]) return 1; return 0; } static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class) { int i; for (i = 0; i < MAX_MGMT_OUI; i++) if (vendor_class->method_table[i]) return 1; return 0; } static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, const char *oui) { int i; for (i = 0; i < MAX_MGMT_OUI; i++) /* Is there matching OUI for this vendor class ? */ if (!memcmp(vendor_class->oui[i], oui, 3)) return i; return -1; } static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor) { int i; for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++) if (vendor->vendor_class[i]) return 1; return 0; } static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method, struct ib_mad_agent_private *agent) { int i; /* Remove any methods for this mad agent */ for (i = 0; i < IB_MGMT_MAX_METHODS; i++) if (method->agent[i] == agent) method->agent[i] = NULL; } static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv, u8 mgmt_class) { struct ib_mad_port_private *port_priv; struct ib_mad_mgmt_class_table **class; struct ib_mad_mgmt_method_table **method; int i, ret; port_priv = agent_priv->qp_info->port_priv; class = &port_priv->version[mad_reg_req->mgmt_class_version].class; if (!*class) { /* Allocate management class table for "new" class version */ *class = kzalloc(sizeof **class, GFP_ATOMIC); if (!*class) { ret = -ENOMEM; goto error1; } /* Allocate method table for this management class */ method = &(*class)->method_table[mgmt_class]; if ((ret = allocate_method_table(method))) goto error2; } else { method = &(*class)->method_table[mgmt_class]; if (!*method) { /* Allocate method table for this management class */ if ((ret = allocate_method_table(method))) goto error1; } } /* Now, make sure methods are not already in use */ if (method_in_use(method, mad_reg_req)) goto error3; /* Finally, add in methods being registered */ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; return 0; error3: /* Remove any methods for this mad agent */ remove_methods_mad_agent(*method, agent_priv); /* Now, check to see if there are any methods in use */ if (!check_method_table(*method)) { /* If not, release management method table */ kfree(*method); *method = NULL; } ret = -EINVAL; goto error1; error2: kfree(*class); *class = NULL; error1: return ret; } static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv) { struct ib_mad_port_private *port_priv; struct ib_mad_mgmt_vendor_class_table **vendor_table; struct ib_mad_mgmt_vendor_class_table *vendor = NULL; struct ib_mad_mgmt_vendor_class *vendor_class = NULL; struct ib_mad_mgmt_method_table **method; int i, ret = -ENOMEM; u8 vclass; /* "New" vendor (with OUI) class */ vclass = vendor_class_index(mad_reg_req->mgmt_class); port_priv = agent_priv->qp_info->port_priv; vendor_table = &port_priv->version[ mad_reg_req->mgmt_class_version].vendor; if (!*vendor_table) { /* Allocate mgmt vendor class table for "new" class version */ vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); if (!vendor) goto error1; *vendor_table = vendor; } if (!(*vendor_table)->vendor_class[vclass]) { /* Allocate table for this management vendor class */ vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); if (!vendor_class) goto error2; (*vendor_table)->vendor_class[vclass] = vendor_class; } for (i = 0; i < MAX_MGMT_OUI; i++) { /* Is there matching OUI for this vendor class ? */ if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i], mad_reg_req->oui, 3)) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; if (!*method) goto error3; goto check_in_use; } } for (i = 0; i < MAX_MGMT_OUI; i++) { /* OUI slot available ? */ if (!is_vendor_oui((*vendor_table)->vendor_class[ vclass]->oui[i])) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; /* Allocate method table for this OUI */ if (!*method) { ret = allocate_method_table(method); if (ret) goto error3; } memcpy((*vendor_table)->vendor_class[vclass]->oui[i], mad_reg_req->oui, 3); goto check_in_use; } } dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n"); goto error3; check_in_use: /* Now, make sure methods are not already in use */ if (method_in_use(method, mad_reg_req)) goto error4; /* Finally, add in methods being registered */ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; return 0; error4: /* Remove any methods for this mad agent */ remove_methods_mad_agent(*method, agent_priv); /* Now, check to see if there are any methods in use */ if (!check_method_table(*method)) { /* If not, release management method table */ kfree(*method); *method = NULL; } ret = -EINVAL; error3: if (vendor_class) { (*vendor_table)->vendor_class[vclass] = NULL; kfree(vendor_class); } error2: if (vendor) { *vendor_table = NULL; kfree(vendor); } error1: return ret; } static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) { struct ib_mad_port_private *port_priv; struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_method_table *method; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; int index; u8 mgmt_class; /* * Was MAD registration request supplied * with original registration ? */ if (!agent_priv->reg_req) goto out; port_priv = agent_priv->qp_info->port_priv; mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class); class = port_priv->version[ agent_priv->reg_req->mgmt_class_version].class; if (!class) goto vendor_check; method = class->method_table[mgmt_class]; if (method) { /* Remove any methods for this mad agent */ remove_methods_mad_agent(method, agent_priv); /* Now, check to see if there are any methods still in use */ if (!check_method_table(method)) { /* If not, release management method table */ kfree(method); class->method_table[mgmt_class] = NULL; /* Any management classes left ? */ if (!check_class_table(class)) { /* If not, release management class table */ kfree(class); port_priv->version[ agent_priv->reg_req-> mgmt_class_version].class = NULL; } } } vendor_check: if (!is_vendor_class(mgmt_class)) goto out; /* normalize mgmt_class to vendor range 2 */ mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class); vendor = port_priv->version[ agent_priv->reg_req->mgmt_class_version].vendor; if (!vendor) goto out; vendor_class = vendor->vendor_class[mgmt_class]; if (vendor_class) { index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui); if (index < 0) goto out; method = vendor_class->method_table[index]; if (method) { /* Remove any methods for this mad agent */ remove_methods_mad_agent(method, agent_priv); /* * Now, check to see if there are * any methods still in use */ if (!check_method_table(method)) { /* If not, release management method table */ kfree(method); vendor_class->method_table[index] = NULL; memset(vendor_class->oui[index], 0, 3); /* Any OUIs left ? */ if (!check_vendor_class(vendor_class)) { /* If not, release vendor class table */ kfree(vendor_class); vendor->vendor_class[mgmt_class] = NULL; /* Any other vendor classes left ? */ if (!check_vendor_table(vendor)) { kfree(vendor); port_priv->version[ agent_priv->reg_req-> mgmt_class_version]. vendor = NULL; } } } } } out: return; } static struct ib_mad_agent_private * find_mad_agent(struct ib_mad_port_private *port_priv, const struct ib_mad_hdr *mad_hdr) { struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; if (ib_response_mad(mad_hdr)) { u32 hi_tid; /* * Routing is based on high 32 bits of transaction ID * of MAD. */ hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; rcu_read_lock(); mad_agent = xa_load(&ib_mad_clients, hi_tid); if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount)) mad_agent = NULL; rcu_read_unlock(); } else { struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_method_table *method; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; const struct ib_vendor_mad *vendor_mad; int index; spin_lock_irqsave(&port_priv->reg_lock, flags); /* * Routing is based on version, class, and method * For "newer" vendor MADs, also based on OUI */ if (mad_hdr->class_version >= MAX_MGMT_VERSION) goto out; if (!is_vendor_class(mad_hdr->mgmt_class)) { class = port_priv->version[ mad_hdr->class_version].class; if (!class) goto out; if (convert_mgmt_class(mad_hdr->mgmt_class) >= ARRAY_SIZE(class->method_table)) goto out; method = class->method_table[convert_mgmt_class( mad_hdr->mgmt_class)]; if (method) mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } else { vendor = port_priv->version[ mad_hdr->class_version].vendor; if (!vendor) goto out; vendor_class = vendor->vendor_class[vendor_class_index( mad_hdr->mgmt_class)]; if (!vendor_class) goto out; /* Find matching OUI */ vendor_mad = (const struct ib_vendor_mad *)mad_hdr; index = find_vendor_oui(vendor_class, vendor_mad->oui); if (index == -1) goto out; method = vendor_class->method_table[index]; if (method) { mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } } if (mad_agent) refcount_inc(&mad_agent->refcount); out: spin_unlock_irqrestore(&port_priv->reg_lock, flags); } if (mad_agent && !mad_agent->agent.recv_handler) { dev_notice(&port_priv->device->dev, "No receive handler for client %p on port %u\n", &mad_agent->agent, port_priv->port_num); deref_mad_agent(mad_agent); mad_agent = NULL; } return mad_agent; } static int validate_mad(const struct ib_mad_hdr *mad_hdr, const struct ib_mad_qp_info *qp_info, bool opa) { int valid = 0; u32 qp_num = qp_info->qp->qp_num; /* Make sure MAD base version is understood */ if (mad_hdr->base_version != IB_MGMT_BASE_VERSION && (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) { pr_err("MAD received with unsupported base version %u %s\n", mad_hdr->base_version, opa ? "(opa)" : ""); goto out; } /* Filter SMI packets sent to other than QP0 */ if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { if (qp_num == 0) valid = 1; } else { /* CM attributes other than ClassPortInfo only use Send method */ if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) && (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) && (mad_hdr->method != IB_MGMT_METHOD_SEND)) goto out; /* Filter GSI packets sent to QP0 */ if (qp_num != 0) valid = 1; } out: return valid; } static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv, const struct ib_mad_hdr *mad_hdr) { struct ib_rmpp_mad *rmpp_mad; rmpp_mad = (struct ib_rmpp_mad *)mad_hdr; return !mad_agent_priv->agent.rmpp_version || !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) || !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) || (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); } static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr, const struct ib_mad_recv_wc *rwc) { return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class == rwc->recv_buf.mad->mad_hdr.mgmt_class; } static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, const struct ib_mad_send_wr_private *wr, const struct ib_mad_recv_wc *rwc) { struct rdma_ah_attr attr; u8 send_resp, rcv_resp; union ib_gid sgid; struct ib_device *device = mad_agent_priv->agent.device; u32 port_num = mad_agent_priv->agent.port_num; u8 lmc; bool has_grh; send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad); rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr); if (send_resp == rcv_resp) /* both requests, or both responses. GIDs different */ return 0; if (rdma_query_ah(wr->send_buf.ah, &attr)) /* Assume not equal, to avoid false positives. */ return 0; has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH); if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH)) /* one has GID, other does not. Assume different */ return 0; if (!send_resp && rcv_resp) { /* is request/response. */ if (!has_grh) { if (ib_get_cached_lmc(device, port_num, &lmc)) return 0; return (!lmc || !((rdma_ah_get_path_bits(&attr) ^ rwc->wc->dlid_path_bits) & ((1 << lmc) - 1))); } else { const struct ib_global_route *grh = rdma_ah_read_grh(&attr); if (rdma_query_gid(device, port_num, grh->sgid_index, &sgid)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); } } if (!has_grh) return rdma_ah_get_dlid(&attr) == rwc->wc->slid; else return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw, rwc->recv_buf.grh->sgid.raw, 16); } static inline int is_direct(u8 class) { return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE); } struct ib_mad_send_wr_private* ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, const struct ib_mad_recv_wc *wc) { struct ib_mad_send_wr_private *wr; const struct ib_mad_hdr *mad_hdr; mad_hdr = &wc->recv_buf.mad->mad_hdr; list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { if ((wr->tid == mad_hdr->tid) && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL; } list_for_each_entry(wr, &mad_agent_priv->backlog_list, agent_list) { if ((wr->tid == mad_hdr->tid) && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL; } /* * It's possible to receive the response before we've * been notified that the send has completed */ list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) && wr->tid == mad_hdr->tid && wr->timeout && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) /* Verify request has not been canceled */ return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL; } return NULL; } static void process_backlog_mads(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc = {}; unsigned long flags; int ret; spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->backlog_list) && (mad_agent_priv->sol_fc_send_count + mad_agent_priv->sol_fc_wait_count < mad_agent_priv->sol_fc_max)) { mad_send_wr = list_entry(mad_agent_priv->backlog_list.next, struct ib_mad_send_wr_private, agent_list); change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); ret = ib_send_mad(mad_send_wr); if (ret) { spin_lock_irqsave(&mad_agent_priv->lock, flags); deref_mad_agent(mad_agent_priv); change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_send_wc.status = IB_WC_LOC_QP_OP_ERR; mad_agent_priv->agent.send_handler( &mad_agent_priv->agent, &mad_send_wc); } spin_lock_irqsave(&mad_agent_priv->lock, flags); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) { mad_send_wr->timeout = 0; if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP || mad_send_wr->state == IB_MAD_STATE_QUEUED) change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); else change_mad_state(mad_send_wr, IB_MAD_STATE_EARLY_RESP); } static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; bool is_mad_done; int ret; INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); ret = ib_mad_enforce_security(mad_agent_priv, mad_recv_wc->wc->pkey_index); if (ret) { ib_free_recv_mad(mad_recv_wc); deref_mad_agent(mad_agent_priv); return; } list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, mad_recv_wc); if (!mad_recv_wc) { deref_mad_agent(mad_agent_priv); return; } } /* Complete corresponding request */ if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) { spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class) && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { /* user rmpp is in effect * and this is an active RMPP MAD */ mad_agent_priv->agent.recv_handler( &mad_agent_priv->agent, NULL, mad_recv_wc); deref_mad_agent(mad_agent_priv); } else { /* not user rmpp, revert to normal behavior and * drop the mad */ ib_free_recv_mad(mad_recv_wc); deref_mad_agent(mad_agent_priv); return; } } else { ib_mark_mad_done(mad_send_wr); is_mad_done = (mad_send_wr->state == IB_MAD_STATE_DONE); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ mad_agent_priv->agent.recv_handler( &mad_agent_priv->agent, &mad_send_wr->send_buf, mad_recv_wc); deref_mad_agent(mad_agent_priv); if (is_mad_done) { mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; mad_send_wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); } } } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL, mad_recv_wc); deref_mad_agent(mad_agent_priv); } } static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv, const struct ib_mad_qp_info *qp_info, const struct ib_wc *wc, u32 port_num, struct ib_mad_private *recv, struct ib_mad_private *response) { enum smi_forward_action retsmi; struct ib_smp *smp = (struct ib_smp *)recv->mad; trace_ib_mad_handle_ib_smi(smp); if (smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) return IB_SMI_DISCARD; retsmi = smi_check_forward_dr_smp(smp); if (retsmi == IB_SMI_LOCAL) return IB_SMI_HANDLE; if (retsmi == IB_SMI_SEND) { /* don't forward */ if (smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) return IB_SMI_DISCARD; } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; agent_send_response((const struct ib_mad_hdr *)response->mad, &response->grh, wc, port_priv->device, smi_get_fwd_port(smp), qp_info->qp->qp_num, response->mad_size, false); return IB_SMI_DISCARD; } return IB_SMI_HANDLE; } static bool generate_unmatched_resp(const struct ib_mad_private *recv, struct ib_mad_private *response, size_t *resp_len, bool opa) { const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad; struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad; if (recv_hdr->method == IB_MGMT_METHOD_GET || recv_hdr->method == IB_MGMT_METHOD_SET) { memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; resp_hdr->method = IB_MGMT_METHOD_GET_RESP; resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) resp_hdr->status |= IB_SMP_DIRECTION; if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) { if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) *resp_len = opa_get_smp_header_size( (struct opa_smp *)recv->mad); else *resp_len = sizeof(struct ib_mad_hdr); } return true; } else { return false; } } static enum smi_action handle_opa_smi(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info, struct ib_wc *wc, u32 port_num, struct ib_mad_private *recv, struct ib_mad_private *response) { enum smi_forward_action retsmi; struct opa_smp *smp = (struct opa_smp *)recv->mad; trace_ib_mad_handle_opa_smi(smp); if (opa_smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) return IB_SMI_DISCARD; retsmi = opa_smi_check_forward_dr_smp(smp); if (retsmi == IB_SMI_LOCAL) return IB_SMI_HANDLE; if (retsmi == IB_SMI_SEND) { /* don't forward */ if (opa_smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; if (opa_smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) return IB_SMI_DISCARD; } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; response->header.recv_wc.recv_buf.opa_mad = (struct opa_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; agent_send_response((const struct ib_mad_hdr *)response->mad, &response->grh, wc, port_priv->device, opa_smi_get_fwd_port(smp), qp_info->qp->qp_num, recv->header.wc.byte_len, true); return IB_SMI_DISCARD; } return IB_SMI_HANDLE; } static enum smi_action handle_smi(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info, struct ib_wc *wc, u32 port_num, struct ib_mad_private *recv, struct ib_mad_private *response, bool opa) { struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && mad_hdr->class_version == OPA_SM_CLASS_VERSION) return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, response); return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response); } static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct ib_mad_port_private *port_priv = cq->cq_context; struct ib_mad_list_head *mad_list = container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); struct ib_mad_qp_info *qp_info; struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *recv, *response = NULL; struct ib_mad_agent_private *mad_agent; u32 port_num; int ret = IB_MAD_RESULT_SUCCESS; size_t mad_size; u16 resp_mad_pkey_index = 0; bool opa; if (list_empty_careful(&port_priv->port_list)) return; if (wc->status != IB_WC_SUCCESS) { /* * Receive errors indicate that the QP has entered the error * state - error handling/shutdown code will cleanup */ return; } qp_info = mad_list->mad_queue->qp_info; dequeue_mad(mad_list); opa = rdma_cap_opa_mad(qp_info->port_priv->device, qp_info->port_priv->port_num); mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); ib_dma_unmap_single(port_priv->device, recv->header.mapping, mad_priv_dma_size(recv), DMA_FROM_DEVICE); /* Setup MAD receive work completion from "normal" work completion */ recv->header.wc = *wc; recv->header.recv_wc.wc = &recv->header.wc; if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) { recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh); recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); } else { recv->header.recv_wc.mad_len = sizeof(struct ib_mad); recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); } recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; recv->header.recv_wc.recv_buf.grh = &recv->grh; /* Validate MAD */ if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; trace_ib_mad_recv_done_handler(qp_info, wc, (struct ib_mad_hdr *)recv->mad); mad_size = recv->mad_size; response = alloc_mad_private(mad_size, GFP_KERNEL); if (!response) goto out; if (rdma_cap_ib_switch(port_priv->device)) port_num = wc->port_num; else port_num = port_priv->port_num; if (((struct ib_mad_hdr *)recv->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { if (handle_smi(port_priv, qp_info, wc, port_num, recv, response, opa) == IB_SMI_DISCARD) goto out; } /* Give driver "right of first refusal" on incoming MAD */ if (port_priv->device->ops.process_mad) { ret = port_priv->device->ops.process_mad( port_priv->device, 0, port_priv->port_num, wc, &recv->grh, (const struct ib_mad *)recv->mad, (struct ib_mad *)response->mad, &mad_size, &resp_mad_pkey_index); if (opa) wc->pkey_index = resp_mad_pkey_index; if (ret & IB_MAD_RESULT_SUCCESS) { if (ret & IB_MAD_RESULT_CONSUMED) goto out; if (ret & IB_MAD_RESULT_REPLY) { agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, port_priv->device, port_num, qp_info->qp->qp_num, mad_size, opa); goto out; } } } mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); if (mad_agent) { trace_ib_mad_recv_done_agent(mad_agent); ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* * recv is freed up in error cases in ib_mad_complete_recv * or via recv_handler in ib_mad_complete_recv() */ recv = NULL; } else if ((ret & IB_MAD_RESULT_SUCCESS) && generate_unmatched_resp(recv, response, &mad_size, opa)) { agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, port_priv->device, port_num, qp_info->qp->qp_num, mad_size, opa); } out: /* Post another receive request for this QP */ if (response) { ib_mad_post_receive_mads(qp_info, response); kfree(recv); } else ib_mad_post_receive_mads(qp_info, recv); } static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_send_wr_private *mad_send_wr; unsigned long delay; if (list_empty(&mad_agent_priv->wait_list)) { cancel_delayed_work(&mad_agent_priv->timed_work); } else { mad_send_wr = list_entry(mad_agent_priv->wait_list.next, struct ib_mad_send_wr_private, agent_list); if (time_after(mad_agent_priv->timeout, mad_send_wr->timeout)) { mad_agent_priv->timeout = mad_send_wr->timeout; delay = mad_send_wr->timeout - jiffies; if ((long)delay <= 0) delay = 1; mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, &mad_agent_priv->timed_work, delay); } } } static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_agent_private *mad_agent_priv; unsigned long delay; mad_agent_priv = mad_send_wr->mad_agent_priv; delay = mad_send_wr->timeout; change_mad_state(mad_send_wr, IB_MAD_STATE_WAIT_RESP); /* Reschedule a work item if we have a shorter timeout */ if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, &mad_agent_priv->timed_work, delay); } void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, unsigned long timeout_ms) { mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); wait_for_response(mad_send_wr); } /* * Process a send work completion */ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_agent_private *mad_agent_priv; unsigned long flags; int ret; mad_agent_priv = mad_send_wr->mad_agent_priv; spin_lock_irqsave(&mad_agent_priv->lock, flags); if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); if (ret == IB_RMPP_RESULT_CONSUMED) goto done; } else ret = IB_RMPP_RESULT_UNHANDLED; if (mad_send_wr->state == IB_MAD_STATE_CANCELED) mad_send_wc->status = IB_WC_WR_FLUSH_ERR; else if (mad_send_wr->state == IB_MAD_STATE_SEND_START && mad_send_wr->timeout) { wait_for_response(mad_send_wr); goto done; } /* Remove send from MAD agent and notify client of completion */ if (mad_send_wr->state != IB_MAD_STATE_DONE) change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); adjust_timeout(mad_agent_priv); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); if (ret == IB_RMPP_RESULT_INTERNAL) { ib_rmpp_send_handler(mad_send_wc); } else { if (mad_send_wr->is_solicited_fc) process_backlog_mads(mad_agent_priv); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, mad_send_wc); } /* Release reference on agent taken when sending */ deref_mad_agent(mad_agent_priv); return; done: spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) { struct ib_mad_port_private *port_priv = cq->cq_context; struct ib_mad_list_head *mad_list = container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; struct ib_mad_qp_info *qp_info; struct ib_mad_queue *send_queue; struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; if (list_empty_careful(&port_priv->port_list)) return; if (wc->status != IB_WC_SUCCESS) { if (!ib_mad_send_error(port_priv, wc)) return; } mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); send_queue = mad_list->mad_queue; qp_info = send_queue->qp_info; trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv); trace_ib_mad_send_done_handler(mad_send_wr, wc); retry: ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, mad_send_wr->header_mapping, mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, mad_send_wr->payload_mapping, mad_send_wr->sg_list[1].length, DMA_TO_DEVICE); queued_send_wr = NULL; spin_lock_irqsave(&send_queue->lock, flags); list_del(&mad_list->list); /* Move queued send to the send queue */ if (send_queue->count-- > send_queue->max_active) { mad_list = container_of(qp_info->overflow_list.next, struct ib_mad_list_head, list); queued_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); list_move_tail(&mad_list->list, &send_queue->list); } spin_unlock_irqrestore(&send_queue->lock, flags); mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_send_wc.status = wc->status; mad_send_wc.vendor_err = wc->vendor_err; ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { trace_ib_mad_send_done_resend(queued_send_wr, qp_info); ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, NULL); if (ret) { dev_err(&port_priv->device->dev, "ib_post_send failed: %d\n", ret); mad_send_wr = queued_send_wr; wc->status = IB_WC_LOC_QP_OP_ERR; goto retry; } } } static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_list_head *mad_list; unsigned long flags; spin_lock_irqsave(&qp_info->send_queue.lock, flags); list_for_each_entry(mad_list, &qp_info->send_queue.list, list) { mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); mad_send_wr->retry = 1; } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); } static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, struct ib_wc *wc) { struct ib_mad_list_head *mad_list = container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info; struct ib_mad_send_wr_private *mad_send_wr; int ret; /* * Send errors will transition the QP to SQE - move * QP to RTS and repost flushed work requests */ mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); if (wc->status == IB_WC_WR_FLUSH_ERR) { if (mad_send_wr->retry) { /* Repost send */ mad_send_wr->retry = 0; trace_ib_mad_error_handler(mad_send_wr, qp_info); ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, NULL); if (!ret) return false; } } else { struct ib_qp_attr *attr; /* Transition QP to RTS and fail offending send */ attr = kmalloc(sizeof *attr, GFP_KERNEL); if (attr) { attr->qp_state = IB_QPS_RTS; attr->cur_qp_state = IB_QPS_SQE; ret = ib_modify_qp(qp_info->qp, attr, IB_QP_STATE | IB_QP_CUR_STATE); kfree(attr); if (ret) dev_err(&port_priv->device->dev, "%s - ib_modify_qp to RTS: %d\n", __func__, ret); else mark_sends_for_retry(qp_info); } } return true; } static void clear_mad_error_list(struct list_head *list, enum ib_wc_status wc_status, struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_send_wr_private *mad_send_wr, *n; struct ib_mad_send_wc mad_send_wc; mad_send_wc.status = wc_status; mad_send_wc.vendor_err = 0; list_for_each_entry_safe(mad_send_wr, n, list, agent_list) { mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); deref_mad_agent(mad_agent_priv); } } static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) { unsigned long flags; struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr; struct list_head cancel_list; INIT_LIST_HEAD(&cancel_list); spin_lock_irqsave(&mad_agent_priv->lock, flags); list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &mad_agent_priv->send_list, agent_list) change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED); /* Empty wait & backlog list to prevent receives from finding request */ list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &mad_agent_priv->wait_list, agent_list) { change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); list_add_tail(&mad_send_wr->agent_list, &cancel_list); } list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &mad_agent_priv->backlog_list, agent_list) { change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); list_add_tail(&mad_send_wr->agent_list, &cancel_list); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Report all cancelled requests */ clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv); } static struct ib_mad_send_wr_private* find_send_wr(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_buf *send_buf) { struct ib_mad_send_wr_private *mad_send_wr; list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, agent_list) { if (&mad_send_wr->send_buf == send_buf) return mad_send_wr; } list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { if (is_rmpp_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && &mad_send_wr->send_buf == send_buf) return mad_send_wr; } list_for_each_entry(mad_send_wr, &mad_agent_priv->backlog_list, agent_list) { if (&mad_send_wr->send_buf == send_buf) return mad_send_wr; } return NULL; } int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; int active; if (!send_buf) return -EINVAL; mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = find_send_wr(mad_agent_priv, send_buf); if (!mad_send_wr || mad_send_wr->state == IB_MAD_STATE_CANCELED) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return -EINVAL; } active = ((mad_send_wr->state == IB_MAD_STATE_SEND_START) || (mad_send_wr->state == IB_MAD_STATE_EARLY_RESP) || (mad_send_wr->state == IB_MAD_STATE_QUEUED && timeout_ms)); if (!timeout_ms) change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED); mad_send_wr->send_buf.timeout_ms = timeout_ms; if (active) mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); else ib_reset_mad_timeout(mad_send_wr, timeout_ms); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return 0; } EXPORT_SYMBOL(ib_modify_mad); static void local_completions(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_local_private *local; struct ib_mad_agent_private *recv_mad_agent; unsigned long flags; int free_mad; struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; bool opa; mad_agent_priv = container_of(work, struct ib_mad_agent_private, local_work); opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, mad_agent_priv->qp_info->port_priv->port_num); spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->local_list)) { local = list_entry(mad_agent_priv->local_list.next, struct ib_mad_local_private, completion_list); list_del(&local->completion_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); free_mad = 0; if (local->mad_priv) { u8 base_version; recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { dev_err(&mad_agent_priv->agent.device->dev, "No receive MAD agent for local completion\n"); free_mad = 1; goto local_send_completion; } /* * Defined behavior is to complete response * before request */ build_smp_wc(recv_mad_agent->agent.qp, local->mad_send_wr->send_wr.wr.wr_cqe, be16_to_cpu(IB_LID_PERMISSIVE), local->mad_send_wr->send_wr.pkey_index, recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version; if (opa && base_version == OPA_MGMT_BASE_VERSION) { local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len; local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); } else { local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); } INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); list_add(&local->mad_priv->header.recv_wc.recv_buf.list, &local->mad_priv->header.recv_wc.rmpp_list); local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = (struct ib_mad *)local->mad_priv->mad; recv_mad_agent->agent.recv_handler( &recv_mad_agent->agent, &local->mad_send_wr->send_buf, &local->mad_priv->header.recv_wc); spin_lock_irqsave(&recv_mad_agent->lock, flags); deref_mad_agent(recv_mad_agent); spin_unlock_irqrestore(&recv_mad_agent->lock, flags); } local_send_completion: /* Complete send */ mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; mad_send_wc.send_buf = &local->mad_send_wr->send_buf; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); spin_lock_irqsave(&mad_agent_priv->lock, flags); deref_mad_agent(mad_agent_priv); if (free_mad) kfree(local->mad_priv); kfree(local); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) { int ret; if (!mad_send_wr->retries_left) return -ETIMEDOUT; mad_send_wr->retries_left--; mad_send_wr->send_buf.retries++; mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); if (mad_send_wr->is_solicited_fc && !list_empty(&mad_send_wr->mad_agent_priv->backlog_list)) { change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED); return 0; } if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) { ret = ib_retry_rmpp(mad_send_wr); switch (ret) { case IB_RMPP_RESULT_UNHANDLED: ret = ib_send_mad(mad_send_wr); break; case IB_RMPP_RESULT_CONSUMED: ret = 0; break; default: ret = -ECOMM; break; } } else ret = ib_send_mad(mad_send_wr); if (!ret) change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START); return ret; } static void timeout_sends(struct work_struct *work) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_agent_private *mad_agent_priv; struct list_head timeout_list; struct list_head cancel_list; struct list_head *list_item; unsigned long flags, delay; mad_agent_priv = container_of(work, struct ib_mad_agent_private, timed_work.work); INIT_LIST_HEAD(&timeout_list); INIT_LIST_HEAD(&cancel_list); spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->wait_list)) { mad_send_wr = list_entry(mad_agent_priv->wait_list.next, struct ib_mad_send_wr_private, agent_list); if (time_after(mad_send_wr->timeout, jiffies)) { delay = mad_send_wr->timeout - jiffies; if ((long)delay <= 0) delay = 1; queue_delayed_work(mad_agent_priv->qp_info-> port_priv->wq, &mad_agent_priv->timed_work, delay); break; } if (mad_send_wr->state == IB_MAD_STATE_CANCELED) list_item = &cancel_list; else if (retry_send(mad_send_wr)) list_item = &timeout_list; else continue; change_mad_state(mad_send_wr, IB_MAD_STATE_DONE); list_add_tail(&mad_send_wr->agent_list, list_item); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); process_backlog_mads(mad_agent_priv); clear_mad_error_list(&timeout_list, IB_WC_RESP_TIMEOUT_ERR, mad_agent_priv); clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv); } /* * Allocate receive MADs and post receive WRs for them */ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad) { unsigned long flags; struct ib_mad_private *mad_priv; struct ib_sge sg_list; struct ib_recv_wr recv_wr; struct ib_mad_queue *recv_queue = &qp_info->recv_queue; int ret = 0; /* Initialize common scatter list fields */ sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; /* Initialize common receive WR fields */ recv_wr.next = NULL; recv_wr.sg_list = &sg_list; recv_wr.num_sge = 1; while (true) { /* Allocate and map receive buffer */ if (mad) { mad_priv = mad; mad = NULL; } else { mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), GFP_ATOMIC); if (!mad_priv) return -ENOMEM; } sg_list.length = mad_priv_dma_size(mad_priv); sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, &mad_priv->grh, mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, sg_list.addr))) { ret = -ENOMEM; goto free_mad_priv; } mad_priv->header.mapping = sg_list.addr; mad_priv->header.mad_list.mad_queue = recv_queue; mad_priv->header.mad_list.cqe.done = ib_mad_recv_done; recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe; spin_lock_irqsave(&recv_queue->lock, flags); if (recv_queue->count >= recv_queue->max_active) { /* Fully populated the receive queue */ spin_unlock_irqrestore(&recv_queue->lock, flags); break; } recv_queue->count++; list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); spin_unlock_irqrestore(&recv_queue->lock, flags); ret = ib_post_recv(qp_info->qp, &recv_wr, NULL); if (ret) { spin_lock_irqsave(&recv_queue->lock, flags); list_del(&mad_priv->header.mad_list.list); recv_queue->count--; spin_unlock_irqrestore(&recv_queue->lock, flags); dev_err(&qp_info->port_priv->device->dev, "ib_post_recv failed: %d\n", ret); break; } } ib_dma_unmap_single(qp_info->port_priv->device, mad_priv->header.mapping, mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); free_mad_priv: kfree(mad_priv); return ret; } /* * Return all the posted receive MADs */ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) { struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *recv; struct ib_mad_list_head *mad_list; if (!qp_info->qp) return; while (!list_empty(&qp_info->recv_queue.list)) { mad_list = list_entry(qp_info->recv_queue.list.next, struct ib_mad_list_head, list); mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); /* Remove from posted receive MAD list */ list_del(&mad_list->list); ib_dma_unmap_single(qp_info->port_priv->device, recv->header.mapping, mad_priv_dma_size(recv), DMA_FROM_DEVICE); kfree(recv); } qp_info->recv_queue.count = 0; } /* * Start the port */ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) { int ret, i; struct ib_qp_attr *attr; struct ib_qp *qp; u16 pkey_index; attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; ret = ib_find_pkey(port_priv->device, port_priv->port_num, IB_DEFAULT_PKEY_FULL, &pkey_index); if (ret) pkey_index = 0; for (i = 0; i < IB_MAD_QPS_CORE; i++) { qp = port_priv->qp_info[i].qp; if (!qp) continue; /* * PKey index for QP1 is irrelevant but * one is needed for the Reset to Init transition */ attr->qp_state = IB_QPS_INIT; attr->pkey_index = pkey_index; attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY); if (ret) { dev_err(&port_priv->device->dev, "Couldn't change QP%d state to INIT: %d\n", i, ret); goto out; } attr->qp_state = IB_QPS_RTR; ret = ib_modify_qp(qp, attr, IB_QP_STATE); if (ret) { dev_err(&port_priv->device->dev, "Couldn't change QP%d state to RTR: %d\n", i, ret); goto out; } attr->qp_state = IB_QPS_RTS; attr->sq_psn = IB_MAD_SEND_Q_PSN; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN); if (ret) { dev_err(&port_priv->device->dev, "Couldn't change QP%d state to RTS: %d\n", i, ret); goto out; } } ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); if (ret) { dev_err(&port_priv->device->dev, "Failed to request completion notification: %d\n", ret); goto out; } for (i = 0; i < IB_MAD_QPS_CORE; i++) { if (!port_priv->qp_info[i].qp) continue; ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); if (ret) { dev_err(&port_priv->device->dev, "Couldn't post receive WRs\n"); goto out; } } out: kfree(attr); return ret; } static void qp_event_handler(struct ib_event *event, void *qp_context) { struct ib_mad_qp_info *qp_info = qp_context; /* It's worse than that! He's dead, Jim! */ dev_err(&qp_info->port_priv->device->dev, "Fatal error (%d) on MAD QP (%u)\n", event->event, qp_info->qp->qp_num); } static void init_mad_queue(struct ib_mad_qp_info *qp_info, struct ib_mad_queue *mad_queue) { mad_queue->qp_info = qp_info; mad_queue->count = 0; spin_lock_init(&mad_queue->lock); INIT_LIST_HEAD(&mad_queue->list); } static void init_mad_qp(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info) { qp_info->port_priv = port_priv; init_mad_queue(qp_info, &qp_info->send_queue); init_mad_queue(qp_info, &qp_info->recv_queue); INIT_LIST_HEAD(&qp_info->overflow_list); } static int create_mad_qp(struct ib_mad_qp_info *qp_info, enum ib_qp_type qp_type) { struct ib_qp_init_attr qp_init_attr; int ret; memset(&qp_init_attr, 0, sizeof qp_init_attr); qp_init_attr.send_cq = qp_info->port_priv->cq; qp_init_attr.recv_cq = qp_info->port_priv->cq; qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; qp_init_attr.cap.max_send_wr = mad_sendq_size; qp_init_attr.cap.max_recv_wr = mad_recvq_size; qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG; qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG; qp_init_attr.qp_type = qp_type; qp_init_attr.port_num = qp_info->port_priv->port_num; qp_init_attr.qp_context = qp_info; qp_init_attr.event_handler = qp_event_handler; qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr); if (IS_ERR(qp_info->qp)) { dev_err(&qp_info->port_priv->device->dev, "Couldn't create ib_mad QP%d\n", get_spl_qp_index(qp_type)); ret = PTR_ERR(qp_info->qp); goto error; } /* Use minimum queue sizes unless the CQ is resized */ qp_info->send_queue.max_active = mad_sendq_size; qp_info->recv_queue.max_active = mad_recvq_size; return 0; error: return ret; } static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) { if (!qp_info->qp) return; ib_destroy_qp(qp_info->qp); } /* * Open the port * Create the QP, PD, MR, and CQ if needed */ static int ib_mad_port_open(struct ib_device *device, u32 port_num) { int ret, cq_size; struct ib_mad_port_private *port_priv; unsigned long flags; int has_smi; if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) return -EFAULT; if (WARN_ON(rdma_cap_opa_mad(device, port_num) && rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE)) return -EFAULT; /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) return -ENOMEM; port_priv->device = device; port_priv->port_num = port_num; spin_lock_init(&port_priv->reg_lock); init_mad_qp(port_priv, &port_priv->qp_info[0]); init_mad_qp(port_priv, &port_priv->qp_info[1]); cq_size = mad_sendq_size + mad_recvq_size; has_smi = rdma_cap_ib_smi(device, port_num); if (has_smi) cq_size *= 2; port_priv->pd = ib_alloc_pd(device, 0); if (IS_ERR(port_priv->pd)) { dev_err(&device->dev, "Couldn't create ib_mad PD\n"); ret = PTR_ERR(port_priv->pd); goto error3; } port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, IB_POLL_UNBOUND_WORKQUEUE); if (IS_ERR(port_priv->cq)) { dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); goto error4; } if (has_smi) { ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); if (ret) goto error6; } if (rdma_cap_ib_cm(device, port_num)) { ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); if (ret) goto error7; } port_priv->wq = alloc_ordered_workqueue("ib_mad%u", WQ_MEM_RECLAIM, port_num); if (!port_priv->wq) { ret = -ENOMEM; goto error8; } spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_add_tail(&port_priv->port_list, &ib_mad_port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); ret = ib_mad_port_start(port_priv); if (ret) { dev_err(&device->dev, "Couldn't start port\n"); goto error9; } return 0; error9: spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_del_init(&port_priv->port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); destroy_workqueue(port_priv->wq); error8: destroy_mad_qp(&port_priv->qp_info[1]); error7: destroy_mad_qp(&port_priv->qp_info[0]); error6: ib_free_cq(port_priv->cq); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); error4: ib_dealloc_pd(port_priv->pd); error3: kfree(port_priv); return ret; } /* * Close the port * If there are no classes using the port, free the port * resources (CQ, MR, PD, QP) and remove the port's info structure */ static int ib_mad_port_close(struct ib_device *device, u32 port_num) { struct ib_mad_port_private *port_priv; unsigned long flags; spin_lock_irqsave(&ib_mad_port_list_lock, flags); port_priv = __ib_get_mad_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); dev_err(&device->dev, "Port %u not found\n", port_num); return -ENODEV; } list_del_init(&port_priv->port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); destroy_workqueue(port_priv->wq); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); ib_free_cq(port_priv->cq); ib_dealloc_pd(port_priv->pd); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); /* XXX: Handle deallocation of MAD registration tables */ kfree(port_priv); return 0; } static int ib_mad_init_device(struct ib_device *device) { int start, i; unsigned int count = 0; int ret; start = rdma_start_port(device); for (i = start; i <= rdma_end_port(device); i++) { if (!rdma_cap_ib_mad(device, i)) continue; ret = ib_mad_port_open(device, i); if (ret) { dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; } ret = ib_agent_port_open(device, i); if (ret) { dev_err(&device->dev, "Couldn't open port %d for agents\n", i); goto error_agent; } count++; } if (!count) return -EOPNOTSUPP; return 0; error_agent: if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); error: while (--i >= start) { if (!rdma_cap_ib_mad(device, i)) continue; if (ib_agent_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d for agents\n", i); if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); } return ret; } static void ib_mad_remove_device(struct ib_device *device, void *client_data) { unsigned int i; rdma_for_each_port (device, i) { if (!rdma_cap_ib_mad(device, i)) continue; if (ib_agent_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %u for agents\n", i); if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %u\n", i); } } static struct ib_client mad_client = { .name = "mad", .add = ib_mad_init_device, .remove = ib_mad_remove_device }; int ib_mad_init(void) { mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); INIT_LIST_HEAD(&ib_mad_port_list); if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); return -EINVAL; } return 0; } void ib_mad_cleanup(void) { ib_unregister_client(&mad_client); }
51 5032 5048 203 204 203 204 102 5422 5028 68 1882 298 5510 5513 298 3322 4385 1880 2407 3758 142 4 4 2493 2431 2480 5006 4985 2661 2501 4927 3437 2420 2407 2408 2410 2420 2405 2407 2410 2413 4308 102 3366 184 181 188 187 188 187 184 188 5022 5019 207 128 140 203 192 143 2 2 1 1 2 2 133 3430 5019 5027 5017 3455 130 133 4287 41 2 1 2 1 1 1 2 2 68 69 41 27 2 70 4 69 70 70 2104 1692 2014 70 1 69 4 69 68 69 1 1 2099 2121 5022 5028 5023 5062 3471 5015 5025 5018 5014 200 4106 3471 5057 4956 3252 65 5027 5059 5013 133 2936 4979 5019 5697 4722 4719 1566 4709 4760 7 7 7 7 7 7 1701 1698 1705 1703 1697 561 1328 1334 1463 1463 246 1438 1479 381 382 388 39 38 105 313 248 67 67 68 68 228 162 234 230 162 108 233 78 232 234 7 233 231 7 232 429 432 430 306 307 232 230 234 7 227 7 7 7 7 7 7 146 145 146 145 2193 1724 226 725 163 309 310 307 307 724 2188 2209 259 52 656 18 179 179 160 26 18 18 18 1695 1689 1868 1868 413 1695 1876 1886 1690 3 1695 1693 1690 1337 412 580 15 16 1688 1688 1689 1691 1700 10 11 11 184 185 186 185 185 103 104 104 104 104 189 186 188 186 186 186 185 191 189 37 92 190 188 185 187 185 187 188 183 188 188 184 187 186 185 187 93 103 187 186 187 187 185 188 191 38 38 92 104 191 190 191 120 105 9 102 188 188 189 189 190 191 191 38 186 38 185 189 1 191 38 188 192 189 189 191 191 189 143 142 142 68 104 143 143 141 142 143 143 10 6 4 9 5 4 4 1 1 48 49 49 95 47 5417 5428 5418 102 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 // SPDX-License-Identifier: GPL-2.0-only /* * kernel/workqueue.c - generic async execution with shared worker pool * * Copyright (C) 2002 Ingo Molnar * * Derived from the taskqueue/keventd code by: * David Woodhouse <dwmw2@infradead.org> * Andrew Morton * Kai Petzke <wpp@marie.physik.tu-berlin.de> * Theodore Ts'o <tytso@mit.edu> * * Made to use alloc_percpu by Christoph Lameter. * * Copyright (C) 2010 SUSE Linux Products GmbH * Copyright (C) 2010 Tejun Heo <tj@kernel.org> * * This is the generic async execution mechanism. Work items as are * executed in process context. The worker pool is shared and * automatically managed. There are two worker pools for each CPU (one for * normal work items and the other for high priority ones) and some extra * pools for workqueues which are not bound to any specific CPU - the * number of these backing pools is dynamic. * * Please read Documentation/core-api/workqueue.rst for details. */ #include <linux/export.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/signal.h> #include <linux/completion.h> #include <linux/workqueue.h> #include <linux/slab.h> #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/kthread.h> #include <linux/hardirq.h> #include <linux/mempolicy.h> #include <linux/freezer.h> #include <linux/debug_locks.h> #include <linux/lockdep.h> #include <linux/idr.h> #include <linux/jhash.h> #include <linux/hashtable.h> #include <linux/rculist.h> #include <linux/nodemask.h> #include <linux/moduleparam.h> #include <linux/uaccess.h> #include <linux/sched/isolation.h> #include <linux/sched/debug.h> #include <linux/nmi.h> #include <linux/kvm_para.h> #include <linux/delay.h> #include <linux/irq_work.h> #include "workqueue_internal.h" enum worker_pool_flags { /* * worker_pool flags * * A bound pool is either associated or disassociated with its CPU. * While associated (!DISASSOCIATED), all workers are bound to the * CPU and none has %WORKER_UNBOUND set and concurrency management * is in effect. * * While DISASSOCIATED, the cpu may be offline and all workers have * %WORKER_UNBOUND set and concurrency management disabled, and may * be executing on any CPU. The pool behaves as an unbound one. * * Note that DISASSOCIATED should be flipped only while holding * wq_pool_attach_mutex to avoid changing binding state while * worker_attach_to_pool() is in progress. * * As there can only be one concurrent BH execution context per CPU, a * BH pool is per-CPU and always DISASSOCIATED. */ POOL_BH = 1 << 0, /* is a BH pool */ POOL_MANAGER_ACTIVE = 1 << 1, /* being managed */ POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ POOL_BH_DRAINING = 1 << 3, /* draining after CPU offline */ }; enum worker_flags { /* worker flags */ WORKER_DIE = 1 << 1, /* die die die */ WORKER_IDLE = 1 << 2, /* is idle */ WORKER_PREP = 1 << 3, /* preparing to run works */ WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */ WORKER_UNBOUND = 1 << 7, /* worker is unbound */ WORKER_REBOUND = 1 << 8, /* worker was rebound */ WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE | WORKER_UNBOUND | WORKER_REBOUND, }; enum work_cancel_flags { WORK_CANCEL_DELAYED = 1 << 0, /* canceling a delayed_work */ WORK_CANCEL_DISABLE = 1 << 1, /* canceling to disable */ }; enum wq_internal_consts { NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */ UNBOUND_POOL_HASH_ORDER = 6, /* hashed by pool->attrs */ BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2, /* call for help after 10ms (min two ticks) */ MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ CREATE_COOLDOWN = HZ, /* time to breath after fail */ /* * Rescue workers are used only on emergencies and shared by * all cpus. Give MIN_NICE. */ RESCUER_NICE_LEVEL = MIN_NICE, HIGHPRI_NICE_LEVEL = MIN_NICE, WQ_NAME_LEN = 32, WORKER_ID_LEN = 10 + WQ_NAME_LEN, /* "kworker/R-" + WQ_NAME_LEN */ }; /* * We don't want to trap softirq for too long. See MAX_SOFTIRQ_TIME and * MAX_SOFTIRQ_RESTART in kernel/softirq.c. These are macros because * msecs_to_jiffies() can't be an initializer. */ #define BH_WORKER_JIFFIES msecs_to_jiffies(2) #define BH_WORKER_RESTARTS 10 /* * Structure fields follow one of the following exclusion rules. * * I: Modifiable by initialization/destruction paths and read-only for * everyone else. * * P: Preemption protected. Disabling preemption is enough and should * only be modified and accessed from the local cpu. * * L: pool->lock protected. Access with pool->lock held. * * LN: pool->lock and wq_node_nr_active->lock protected for writes. Either for * reads. * * K: Only modified by worker while holding pool->lock. Can be safely read by * self, while holding pool->lock or from IRQ context if %current is the * kworker. * * S: Only modified by worker self. * * A: wq_pool_attach_mutex protected. * * PL: wq_pool_mutex protected. * * PR: wq_pool_mutex protected for writes. RCU protected for reads. * * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. * * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or * RCU for reads. * * WQ: wq->mutex protected. * * WR: wq->mutex protected for writes. RCU protected for reads. * * WO: wq->mutex protected for writes. Updated with WRITE_ONCE() and can be read * with READ_ONCE() without locking. * * MD: wq_mayday_lock protected. * * WD: Used internally by the watchdog. */ /* struct worker is defined in workqueue_internal.h */ struct worker_pool { raw_spinlock_t lock; /* the pool lock */ int cpu; /* I: the associated cpu */ int node; /* I: the associated node ID */ int id; /* I: pool ID */ unsigned int flags; /* L: flags */ unsigned long watchdog_ts; /* L: watchdog timestamp */ bool cpu_stall; /* WD: stalled cpu bound pool */ /* * The counter is incremented in a process context on the associated CPU * w/ preemption disabled, and decremented or reset in the same context * but w/ pool->lock held. The readers grab pool->lock and are * guaranteed to see if the counter reached zero. */ int nr_running; struct list_head worklist; /* L: list of pending works */ int nr_workers; /* L: total number of workers */ int nr_idle; /* L: currently idle workers */ struct list_head idle_list; /* L: list of idle workers */ struct timer_list idle_timer; /* L: worker idle timeout */ struct work_struct idle_cull_work; /* L: worker idle cleanup */ struct timer_list mayday_timer; /* L: SOS timer for workers */ /* a workers is either on busy_hash or idle_list, or the manager */ DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER); /* L: hash of busy workers */ struct worker *manager; /* L: purely informational */ struct list_head workers; /* A: attached workers */ struct ida worker_ida; /* worker IDs for task name */ struct workqueue_attrs *attrs; /* I: worker attributes */ struct hlist_node hash_node; /* PL: unbound_pool_hash node */ int refcnt; /* PL: refcnt for unbound pools */ /* * Destruction of pool is RCU protected to allow dereferences * from get_work_pool(). */ struct rcu_head rcu; }; /* * Per-pool_workqueue statistics. These can be monitored using * tools/workqueue/wq_monitor.py. */ enum pool_workqueue_stats { PWQ_STAT_STARTED, /* work items started execution */ PWQ_STAT_COMPLETED, /* work items completed execution */ PWQ_STAT_CPU_TIME, /* total CPU time consumed */ PWQ_STAT_CPU_INTENSIVE, /* wq_cpu_intensive_thresh_us violations */ PWQ_STAT_CM_WAKEUP, /* concurrency-management worker wakeups */ PWQ_STAT_REPATRIATED, /* unbound workers brought back into scope */ PWQ_STAT_MAYDAY, /* maydays to rescuer */ PWQ_STAT_RESCUED, /* linked work items executed by rescuer */ PWQ_NR_STATS, }; /* * The per-pool workqueue. While queued, bits below WORK_PWQ_SHIFT * of work_struct->data are used for flags and the remaining high bits * point to the pwq; thus, pwqs need to be aligned at two's power of the * number of flag bits. */ struct pool_workqueue { struct worker_pool *pool; /* I: the associated pool */ struct workqueue_struct *wq; /* I: the owning workqueue */ int work_color; /* L: current color */ int flush_color; /* L: flushing color */ int refcnt; /* L: reference count */ int nr_in_flight[WORK_NR_COLORS]; /* L: nr of in_flight works */ bool plugged; /* L: execution suspended */ /* * nr_active management and WORK_STRUCT_INACTIVE: * * When pwq->nr_active >= max_active, new work item is queued to * pwq->inactive_works instead of pool->worklist and marked with * WORK_STRUCT_INACTIVE. * * All work items marked with WORK_STRUCT_INACTIVE do not participate in * nr_active and all work items in pwq->inactive_works are marked with * WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE work items are * in pwq->inactive_works. Some of them are ready to run in * pool->worklist or worker->scheduled. Those work itmes are only struct * wq_barrier which is used for flush_work() and should not participate * in nr_active. For non-barrier work item, it is marked with * WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works. */ int nr_active; /* L: nr of active works */ struct list_head inactive_works; /* L: inactive works */ struct list_head pending_node; /* LN: node on wq_node_nr_active->pending_pwqs */ struct list_head pwqs_node; /* WR: node on wq->pwqs */ struct list_head mayday_node; /* MD: node on wq->maydays */ u64 stats[PWQ_NR_STATS]; /* * Release of unbound pwq is punted to a kthread_worker. See put_pwq() * and pwq_release_workfn() for details. pool_workqueue itself is also * RCU protected so that the first pwq can be determined without * grabbing wq->mutex. */ struct kthread_work release_work; struct rcu_head rcu; } __aligned(1 << WORK_STRUCT_PWQ_SHIFT); /* * Structure used to wait for workqueue flush. */ struct wq_flusher { struct list_head list; /* WQ: list of flushers */ int flush_color; /* WQ: flush color waiting for */ struct completion done; /* flush completion */ }; struct wq_device; /* * Unlike in a per-cpu workqueue where max_active limits its concurrency level * on each CPU, in an unbound workqueue, max_active applies to the whole system. * As sharing a single nr_active across multiple sockets can be very expensive, * the counting and enforcement is per NUMA node. * * The following struct is used to enforce per-node max_active. When a pwq wants * to start executing a work item, it should increment ->nr using * tryinc_node_nr_active(). If acquisition fails due to ->nr already being over * ->max, the pwq is queued on ->pending_pwqs. As in-flight work items finish * and decrement ->nr, node_activate_pending_pwq() activates the pending pwqs in * round-robin order. */ struct wq_node_nr_active { int max; /* per-node max_active */ atomic_t nr; /* per-node nr_active */ raw_spinlock_t lock; /* nests inside pool locks */ struct list_head pending_pwqs; /* LN: pwqs with inactive works */ }; /* * The externally visible workqueue. It relays the issued work items to * the appropriate worker_pool through its pool_workqueues. */ struct workqueue_struct { struct list_head pwqs; /* WR: all pwqs of this wq */ struct list_head list; /* PR: list of all workqueues */ struct mutex mutex; /* protects this wq */ int work_color; /* WQ: current work color */ int flush_color; /* WQ: current flush color */ atomic_t nr_pwqs_to_flush; /* flush in progress */ struct wq_flusher *first_flusher; /* WQ: first flusher */ struct list_head flusher_queue; /* WQ: flush waiters */ struct list_head flusher_overflow; /* WQ: flush overflow list */ struct list_head maydays; /* MD: pwqs requesting rescue */ struct worker *rescuer; /* MD: rescue worker */ int nr_drainers; /* WQ: drain in progress */ /* See alloc_workqueue() function comment for info on min/max_active */ int max_active; /* WO: max active works */ int min_active; /* WO: min active works */ int saved_max_active; /* WQ: saved max_active */ int saved_min_active; /* WQ: saved min_active */ struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */ struct pool_workqueue __rcu *dfl_pwq; /* PW: only for unbound wqs */ #ifdef CONFIG_SYSFS struct wq_device *wq_dev; /* I: for sysfs interface */ #endif #ifdef CONFIG_LOCKDEP char *lock_name; struct lock_class_key key; struct lockdep_map __lockdep_map; struct lockdep_map *lockdep_map; #endif char name[WQ_NAME_LEN]; /* I: workqueue name */ /* * Destruction of workqueue_struct is RCU protected to allow walking * the workqueues list without grabbing wq_pool_mutex. * This is used to dump all workqueues from sysrq. */ struct rcu_head rcu; /* hot fields used during command issue, aligned to cacheline */ unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */ struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */ }; /* * Each pod type describes how CPUs should be grouped for unbound workqueues. * See the comment above workqueue_attrs->affn_scope. */ struct wq_pod_type { int nr_pods; /* number of pods */ cpumask_var_t *pod_cpus; /* pod -> cpus */ int *pod_node; /* pod -> node */ int *cpu_pod; /* cpu -> pod */ }; struct work_offq_data { u32 pool_id; u32 disable; u32 flags; }; static const char *wq_affn_names[WQ_AFFN_NR_TYPES] = { [WQ_AFFN_DFL] = "default", [WQ_AFFN_CPU] = "cpu", [WQ_AFFN_SMT] = "smt", [WQ_AFFN_CACHE] = "cache", [WQ_AFFN_NUMA] = "numa", [WQ_AFFN_SYSTEM] = "system", }; /* * Per-cpu work items which run for longer than the following threshold are * automatically considered CPU intensive and excluded from concurrency * management to prevent them from noticeably delaying other per-cpu work items. * ULONG_MAX indicates that the user hasn't overridden it with a boot parameter. * The actual value is initialized in wq_cpu_intensive_thresh_init(). */ static unsigned long wq_cpu_intensive_thresh_us = ULONG_MAX; module_param_named(cpu_intensive_thresh_us, wq_cpu_intensive_thresh_us, ulong, 0644); #ifdef CONFIG_WQ_CPU_INTENSIVE_REPORT static unsigned int wq_cpu_intensive_warning_thresh = 4; module_param_named(cpu_intensive_warning_thresh, wq_cpu_intensive_warning_thresh, uint, 0644); #endif /* see the comment above the definition of WQ_POWER_EFFICIENT */ static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT); module_param_named(power_efficient, wq_power_efficient, bool, 0444); static bool wq_online; /* can kworkers be created yet? */ static bool wq_topo_initialized __read_mostly = false; static struct kmem_cache *pwq_cache; static struct wq_pod_type wq_pod_types[WQ_AFFN_NR_TYPES]; static enum wq_affn_scope wq_affn_dfl = WQ_AFFN_CACHE; /* buf for wq_update_unbound_pod_attrs(), protected by CPU hotplug exclusion */ static struct workqueue_attrs *unbound_wq_update_pwq_attrs_buf; static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */ static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ /* wait for manager to go away */ static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait); static LIST_HEAD(workqueues); /* PR: list of all workqueues */ static bool workqueue_freezing; /* PL: have wqs started freezing? */ /* PL: mirror the cpu_online_mask excluding the CPU in the midst of hotplugging */ static cpumask_var_t wq_online_cpumask; /* PL&A: allowable cpus for unbound wqs and work items */ static cpumask_var_t wq_unbound_cpumask; /* PL: user requested unbound cpumask via sysfs */ static cpumask_var_t wq_requested_unbound_cpumask; /* PL: isolated cpumask to be excluded from unbound cpumask */ static cpumask_var_t wq_isolated_cpumask; /* for further constrain wq_unbound_cpumask by cmdline parameter*/ static struct cpumask wq_cmdline_cpumask __initdata; /* CPU where unbound work was last round robin scheduled from this CPU */ static DEFINE_PER_CPU(int, wq_rr_cpu_last); /* * Local execution of unbound work items is no longer guaranteed. The * following always forces round-robin CPU selection on unbound work items * to uncover usages which depend on it. */ #ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU static bool wq_debug_force_rr_cpu = true; #else static bool wq_debug_force_rr_cpu = false; #endif module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644); /* to raise softirq for the BH worker pools on other CPUs */ static DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_work [NR_STD_WORKER_POOLS], bh_pool_irq_works); /* the BH worker pools */ static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], bh_worker_pools); /* the per-cpu worker pools */ static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools); static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */ /* PL: hash of all unbound pools keyed by pool->attrs */ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER); /* I: attributes used when instantiating standard unbound pools on demand */ static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; /* I: attributes used when instantiating ordered pools on demand */ static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS]; /* * I: kthread_worker to release pwq's. pwq release needs to be bounced to a * process context while holding a pool lock. Bounce to a dedicated kthread * worker to avoid A-A deadlocks. */ static struct kthread_worker *pwq_release_worker __ro_after_init; struct workqueue_struct *system_wq __ro_after_init; EXPORT_SYMBOL(system_wq); struct workqueue_struct *system_percpu_wq __ro_after_init; EXPORT_SYMBOL(system_percpu_wq); struct workqueue_struct *system_highpri_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_highpri_wq); struct workqueue_struct *system_long_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_long_wq); struct workqueue_struct *system_unbound_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_unbound_wq); struct workqueue_struct *system_dfl_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_dfl_wq); struct workqueue_struct *system_freezable_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_freezable_wq); struct workqueue_struct *system_power_efficient_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_power_efficient_wq); struct workqueue_struct *system_freezable_power_efficient_wq __ro_after_init; EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); struct workqueue_struct *system_bh_wq; EXPORT_SYMBOL_GPL(system_bh_wq); struct workqueue_struct *system_bh_highpri_wq; EXPORT_SYMBOL_GPL(system_bh_highpri_wq); static int worker_thread(void *__worker); static void workqueue_sysfs_unregister(struct workqueue_struct *wq); static void show_pwq(struct pool_workqueue *pwq); static void show_one_worker_pool(struct worker_pool *pool); #define CREATE_TRACE_POINTS #include <trace/events/workqueue.h> #define assert_rcu_or_pool_mutex() \ RCU_LOCKDEP_WARN(!rcu_read_lock_any_held() && \ !lockdep_is_held(&wq_pool_mutex), \ "RCU or wq_pool_mutex should be held") #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ RCU_LOCKDEP_WARN(!rcu_read_lock_any_held() && \ !lockdep_is_held(&wq->mutex) && \ !lockdep_is_held(&wq_pool_mutex), \ "RCU, wq->mutex or wq_pool_mutex should be held") #define for_each_bh_worker_pool(pool, cpu) \ for ((pool) = &per_cpu(bh_worker_pools, cpu)[0]; \ (pool) < &per_cpu(bh_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \ (pool)++) #define for_each_cpu_worker_pool(pool, cpu) \ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \ (pool)++) /** * for_each_pool - iterate through all worker_pools in the system * @pool: iteration cursor * @pi: integer used for iteration * * This must be called either with wq_pool_mutex held or RCU read * locked. If the pool needs to be used beyond the locking in effect, the * caller is responsible for guaranteeing that the pool stays online. * * The if/else clause exists only for the lockdep assertion and can be * ignored. */ #define for_each_pool(pool, pi) \ idr_for_each_entry(&worker_pool_idr, pool, pi) \ if (({ assert_rcu_or_pool_mutex(); false; })) { } \ else /** * for_each_pool_worker - iterate through all workers of a worker_pool * @worker: iteration cursor * @pool: worker_pool to iterate workers of * * This must be called with wq_pool_attach_mutex. * * The if/else clause exists only for the lockdep assertion and can be * ignored. */ #define for_each_pool_worker(worker, pool) \ list_for_each_entry((worker), &(pool)->workers, node) \ if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \ else /** * for_each_pwq - iterate through all pool_workqueues of the specified workqueue * @pwq: iteration cursor * @wq: the target workqueue * * This must be called either with wq->mutex held or RCU read locked. * If the pwq needs to be used beyond the locking in effect, the caller is * responsible for guaranteeing that the pwq stays online. * * The if/else clause exists only for the lockdep assertion and can be * ignored. */ #define for_each_pwq(pwq, wq) \ list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \ lockdep_is_held(&(wq->mutex))) #ifdef CONFIG_DEBUG_OBJECTS_WORK static const struct debug_obj_descr work_debug_descr; static void *work_debug_hint(void *addr) { return ((struct work_struct *) addr)->func; } static bool work_is_static_object(void *addr) { struct work_struct *work = addr; return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work)); } /* * fixup_init is called when: * - an active object is initialized */ static bool work_fixup_init(void *addr, enum debug_obj_state state) { struct work_struct *work = addr; switch (state) { case ODEBUG_STATE_ACTIVE: cancel_work_sync(work); debug_object_init(work, &work_debug_descr); return true; default: return false; } } /* * fixup_free is called when: * - an active object is freed */ static bool work_fixup_free(void *addr, enum debug_obj_state state) { struct work_struct *work = addr; switch (state) { case ODEBUG_STATE_ACTIVE: cancel_work_sync(work); debug_object_free(work, &work_debug_descr); return true; default: return false; } } static const struct debug_obj_descr work_debug_descr = { .name = "work_struct", .debug_hint = work_debug_hint, .is_static_object = work_is_static_object, .fixup_init = work_fixup_init, .fixup_free = work_fixup_free, }; static inline void debug_work_activate(struct work_struct *work) { debug_object_activate(work, &work_debug_descr); } static inline void debug_work_deactivate(struct work_struct *work) { debug_object_deactivate(work, &work_debug_descr); } void __init_work(struct work_struct *work, int onstack) { if (onstack) debug_object_init_on_stack(work, &work_debug_descr); else debug_object_init(work, &work_debug_descr); } EXPORT_SYMBOL_GPL(__init_work); void destroy_work_on_stack(struct work_struct *work) { debug_object_free(work, &work_debug_descr); } EXPORT_SYMBOL_GPL(destroy_work_on_stack); void destroy_delayed_work_on_stack(struct delayed_work *work) { timer_destroy_on_stack(&work->timer); debug_object_free(&work->work, &work_debug_descr); } EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack); #else static inline void debug_work_activate(struct work_struct *work) { } static inline void debug_work_deactivate(struct work_struct *work) { } #endif /** * worker_pool_assign_id - allocate ID and assign it to @pool * @pool: the pool pointer of interest * * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned * successfully, -errno on failure. */ static int worker_pool_assign_id(struct worker_pool *pool) { int ret; lockdep_assert_held(&wq_pool_mutex); ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE, GFP_KERNEL); if (ret >= 0) { pool->id = ret; return 0; } return ret; } static struct pool_workqueue __rcu ** unbound_pwq_slot(struct workqueue_struct *wq, int cpu) { if (cpu >= 0) return per_cpu_ptr(wq->cpu_pwq, cpu); else return &wq->dfl_pwq; } /* @cpu < 0 for dfl_pwq */ static struct pool_workqueue *unbound_pwq(struct workqueue_struct *wq, int cpu) { return rcu_dereference_check(*unbound_pwq_slot(wq, cpu), lockdep_is_held(&wq_pool_mutex) || lockdep_is_held(&wq->mutex)); } /** * unbound_effective_cpumask - effective cpumask of an unbound workqueue * @wq: workqueue of interest * * @wq->unbound_attrs->cpumask contains the cpumask requested by the user which * is masked with wq_unbound_cpumask to determine the effective cpumask. The * default pwq is always mapped to the pool with the current effective cpumask. */ static struct cpumask *unbound_effective_cpumask(struct workqueue_struct *wq) { return unbound_pwq(wq, -1)->pool->attrs->__pod_cpumask; } static unsigned int work_color_to_flags(int color) { return color << WORK_STRUCT_COLOR_SHIFT; } static int get_work_color(unsigned long work_data) { return (work_data >> WORK_STRUCT_COLOR_SHIFT) & ((1 << WORK_STRUCT_COLOR_BITS) - 1); } static int work_next_color(int color) { return (color + 1) % WORK_NR_COLORS; } static unsigned long pool_offq_flags(struct worker_pool *pool) { return (pool->flags & POOL_BH) ? WORK_OFFQ_BH : 0; } /* * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data * contain the pointer to the queued pwq. Once execution starts, the flag * is cleared and the high bits contain OFFQ flags and pool ID. * * set_work_pwq(), set_work_pool_and_clear_pending() and mark_work_canceling() * can be used to set the pwq, pool or clear work->data. These functions should * only be called while the work is owned - ie. while the PENDING bit is set. * * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq * corresponding to a work. Pool is available once the work has been * queued anywhere after initialization until it is sync canceled. pwq is * available only while the work item is queued. */ static inline void set_work_data(struct work_struct *work, unsigned long data) { WARN_ON_ONCE(!work_pending(work)); atomic_long_set(&work->data, data | work_static(work)); } static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq, unsigned long flags) { set_work_data(work, (unsigned long)pwq | WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | flags); } static void set_work_pool_and_keep_pending(struct work_struct *work, int pool_id, unsigned long flags) { set_work_data(work, ((unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT) | WORK_STRUCT_PENDING | flags); } static void set_work_pool_and_clear_pending(struct work_struct *work, int pool_id, unsigned long flags) { /* * The following wmb is paired with the implied mb in * test_and_set_bit(PENDING) and ensures all updates to @work made * here are visible to and precede any updates by the next PENDING * owner. */ smp_wmb(); set_work_data(work, ((unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT) | flags); /* * The following mb guarantees that previous clear of a PENDING bit * will not be reordered with any speculative LOADS or STORES from * work->current_func, which is executed afterwards. This possible * reordering can lead to a missed execution on attempt to queue * the same @work. E.g. consider this case: * * CPU#0 CPU#1 * ---------------------------- -------------------------------- * * 1 STORE event_indicated * 2 queue_work_on() { * 3 test_and_set_bit(PENDING) * 4 } set_..._and_clear_pending() { * 5 set_work_data() # clear bit * 6 smp_mb() * 7 work->current_func() { * 8 LOAD event_indicated * } * * Without an explicit full barrier speculative LOAD on line 8 can * be executed before CPU#0 does STORE on line 1. If that happens, * CPU#0 observes the PENDING bit is still set and new execution of * a @work is not queued in a hope, that CPU#1 will eventually * finish the queued @work. Meanwhile CPU#1 does not see * event_indicated is set, because speculative LOAD was executed * before actual STORE. */ smp_mb(); } static inline struct pool_workqueue *work_struct_pwq(unsigned long data) { return (struct pool_workqueue *)(data & WORK_STRUCT_PWQ_MASK); } static struct pool_workqueue *get_work_pwq(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); if (data & WORK_STRUCT_PWQ) return work_struct_pwq(data); else return NULL; } /** * get_work_pool - return the worker_pool a given work was associated with * @work: the work item of interest * * Pools are created and destroyed under wq_pool_mutex, and allows read * access under RCU read lock. As such, this function should be * called under wq_pool_mutex or inside of a rcu_read_lock() region. * * All fields of the returned pool are accessible as long as the above * mentioned locking is in effect. If the returned pool needs to be used * beyond the critical section, the caller is responsible for ensuring the * returned pool is and stays online. * * Return: The worker_pool @work was last associated with. %NULL if none. */ static struct worker_pool *get_work_pool(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); int pool_id; assert_rcu_or_pool_mutex(); if (data & WORK_STRUCT_PWQ) return work_struct_pwq(data)->pool; pool_id = data >> WORK_OFFQ_POOL_SHIFT; if (pool_id == WORK_OFFQ_POOL_NONE) return NULL; return idr_find(&worker_pool_idr, pool_id); } static unsigned long shift_and_mask(unsigned long v, u32 shift, u32 bits) { return (v >> shift) & ((1U << bits) - 1); } static void work_offqd_unpack(struct work_offq_data *offqd, unsigned long data) { WARN_ON_ONCE(data & WORK_STRUCT_PWQ); offqd->pool_id = shift_and_mask(data, WORK_OFFQ_POOL_SHIFT, WORK_OFFQ_POOL_BITS); offqd->disable = shift_and_mask(data, WORK_OFFQ_DISABLE_SHIFT, WORK_OFFQ_DISABLE_BITS); offqd->flags = data & WORK_OFFQ_FLAG_MASK; } static unsigned long work_offqd_pack_flags(struct work_offq_data *offqd) { return ((unsigned long)offqd->disable << WORK_OFFQ_DISABLE_SHIFT) | ((unsigned long)offqd->flags); } /* * Policy functions. These define the policies on how the global worker * pools are managed. Unless noted otherwise, these functions assume that * they're being called with pool->lock held. */ /* * Need to wake up a worker? Called from anything but currently * running workers. * * Note that, because unbound workers never contribute to nr_running, this * function will always return %true for unbound pools as long as the * worklist isn't empty. */ static bool need_more_worker(struct worker_pool *pool) { return !list_empty(&pool->worklist) && !pool->nr_running; } /* Can I start working? Called from busy but !running workers. */ static bool may_start_working(struct worker_pool *pool) { return pool->nr_idle; } /* Do I need to keep working? Called from currently running workers. */ static bool keep_working(struct worker_pool *pool) { return !list_empty(&pool->worklist) && (pool->nr_running <= 1); } /* Do we need a new worker? Called from manager. */ static bool need_to_create_worker(struct worker_pool *pool) { return need_more_worker(pool) && !may_start_working(pool); } /* Do we have too many workers and should some go away? */ static bool too_many_workers(struct worker_pool *pool) { bool managing = pool->flags & POOL_MANAGER_ACTIVE; int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ int nr_busy = pool->nr_workers - nr_idle; return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; } /** * worker_set_flags - set worker flags and adjust nr_running accordingly * @worker: self * @flags: flags to set * * Set @flags in @worker->flags and adjust nr_running accordingly. */ static inline void worker_set_flags(struct worker *worker, unsigned int flags) { struct worker_pool *pool = worker->pool; lockdep_assert_held(&pool->lock); /* If transitioning into NOT_RUNNING, adjust nr_running. */ if ((flags & WORKER_NOT_RUNNING) && !(worker->flags & WORKER_NOT_RUNNING)) { pool->nr_running--; } worker->flags |= flags; } /** * worker_clr_flags - clear worker flags and adjust nr_running accordingly * @worker: self * @flags: flags to clear * * Clear @flags in @worker->flags and adjust nr_running accordingly. */ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) { struct worker_pool *pool = worker->pool; unsigned int oflags = worker->flags; lockdep_assert_held(&pool->lock); worker->flags &= ~flags; /* * If transitioning out of NOT_RUNNING, increment nr_running. Note * that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask * of multiple flags, not a single flag. */ if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) if (!(worker->flags & WORKER_NOT_RUNNING)) pool->nr_running++; } /* Return the first idle worker. Called with pool->lock held. */ static struct worker *first_idle_worker(struct worker_pool *pool) { if (unlikely(list_empty(&pool->idle_list))) return NULL; return list_first_entry(&pool->idle_list, struct worker, entry); } /** * worker_enter_idle - enter idle state * @worker: worker which is entering idle state * * @worker is entering idle state. Update stats and idle timer if * necessary. * * LOCKING: * raw_spin_lock_irq(pool->lock). */ static void worker_enter_idle(struct worker *worker) { struct worker_pool *pool = worker->pool; if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) || WARN_ON_ONCE(!list_empty(&worker->entry) && (worker->hentry.next || worker->hentry.pprev))) return; /* can't use worker_set_flags(), also called from create_worker() */ worker->flags |= WORKER_IDLE; pool->nr_idle++; worker->last_active = jiffies; /* idle_list is LIFO */ list_add(&worker->entry, &pool->idle_list); if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); /* Sanity check nr_running. */ WARN_ON_ONCE(pool->nr_workers == pool->nr_idle && pool->nr_running); } /** * worker_leave_idle - leave idle state * @worker: worker which is leaving idle state * * @worker is leaving idle state. Update stats. * * LOCKING: * raw_spin_lock_irq(pool->lock). */ static void worker_leave_idle(struct worker *worker) { struct worker_pool *pool = worker->pool; if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE))) return; worker_clr_flags(worker, WORKER_IDLE); pool->nr_idle--; list_del_init(&worker->entry); } /** * find_worker_executing_work - find worker which is executing a work * @pool: pool of interest * @work: work to find worker for * * Find a worker which is executing @work on @pool by searching * @pool->busy_hash which is keyed by the address of @work. For a worker * to match, its current execution should match the address of @work and * its work function. This is to avoid unwanted dependency between * unrelated work executions through a work item being recycled while still * being executed. * * This is a bit tricky. A work item may be freed once its execution * starts and nothing prevents the freed area from being recycled for * another work item. If the same work item address ends up being reused * before the original execution finishes, workqueue will identify the * recycled work item as currently executing and make it wait until the * current execution finishes, introducing an unwanted dependency. * * This function checks the work item address and work function to avoid * false positives. Note that this isn't complete as one may construct a * work function which can introduce dependency onto itself through a * recycled work item. Well, if somebody wants to shoot oneself in the * foot that badly, there's only so much we can do, and if such deadlock * actually occurs, it should be easy to locate the culprit work function. * * CONTEXT: * raw_spin_lock_irq(pool->lock). * * Return: * Pointer to worker which is executing @work if found, %NULL * otherwise. */ static struct worker *find_worker_executing_work(struct worker_pool *pool, struct work_struct *work) { struct worker *worker; hash_for_each_possible(pool->busy_hash, worker, hentry, (unsigned long)work) if (worker->current_work == work && worker->current_func == work->func) return worker; return NULL; } /** * move_linked_works - move linked works to a list * @work: start of series of works to be scheduled * @head: target list to append @work to * @nextp: out parameter for nested worklist walking * * Schedule linked works starting from @work to @head. Work series to be * scheduled starts at @work and includes any consecutive work with * WORK_STRUCT_LINKED set in its predecessor. See assign_work() for details on * @nextp. * * CONTEXT: * raw_spin_lock_irq(pool->lock). */ static void move_linked_works(struct work_struct *work, struct list_head *head, struct work_struct **nextp) { struct work_struct *n; /* * Linked worklist will always end before the end of the list, * use NULL for list head. */ list_for_each_entry_safe_from(work, n, NULL, entry) { list_move_tail(&work->entry, head); if (!(*work_data_bits(work) & WORK_STRUCT_LINKED)) break; } /* * If we're already inside safe list traversal and have moved * multiple works to the scheduled queue, the next position * needs to be updated. */ if (nextp) *nextp = n; } /** * assign_work - assign a work item and its linked work items to a worker * @work: work to assign * @worker: worker to assign to * @nextp: out parameter for nested worklist walking * * Assign @work and its linked work items to @worker. If @work is already being * executed by another worker in the same pool, it'll be punted there. * * If @nextp is not NULL, it's updated to point to the next work of the last * scheduled work. This allows assign_work() to be nested inside * list_for_each_entry_safe(). * * Returns %true if @work was successfully assigned to @worker. %false if @work * was punted to another worker already executing it. */ static bool assign_work(struct work_struct *work, struct worker *worker, struct work_struct **nextp) { struct worker_pool *pool = worker->pool; struct worker *collision; lockdep_assert_held(&pool->lock); /* * A single work shouldn't be executed concurrently by multiple workers. * __queue_work() ensures that @work doesn't jump to a different pool * while still running in the previous pool. Here, we should ensure that * @work is not executed concurrently by multiple workers from the same * pool. Check whether anyone is already processing the work. If so, * defer the work to the currently executing one. */ collision = find_worker_executing_work(pool, work); if (unlikely(collision)) { move_linked_works(work, &collision->scheduled, nextp); return false; } move_linked_works(work, &worker->scheduled, nextp); return true; } static struct irq_work *bh_pool_irq_work(struct worker_pool *pool) { int high = pool->attrs->nice == HIGHPRI_NICE_LEVEL ? 1 : 0; return &per_cpu(bh_pool_irq_works, pool->cpu)[high]; } static void kick_bh_pool(struct worker_pool *pool) { #ifdef CONFIG_SMP /* see drain_dead_softirq_workfn() for BH_DRAINING */ if (unlikely(pool->cpu != smp_processor_id() && !(pool->flags & POOL_BH_DRAINING))) { irq_work_queue_on(bh_pool_irq_work(pool), pool->cpu); return; } #endif if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) raise_softirq_irqoff(HI_SOFTIRQ); else raise_softirq_irqoff(TASKLET_SOFTIRQ); } /** * kick_pool - wake up an idle worker if necessary * @pool: pool to kick * * @pool may have pending work items. Wake up worker if necessary. Returns * whether a worker was woken up. */ static bool kick_pool(struct worker_pool *pool) { struct worker *worker = first_idle_worker(pool); struct task_struct *p; lockdep_assert_held(&pool->lock); if (!need_more_worker(pool) || !worker) return false; if (pool->flags & POOL_BH) { kick_bh_pool(pool); return true; } p = worker->task; #ifdef CONFIG_SMP /* * Idle @worker is about to execute @work and waking up provides an * opportunity to migrate @worker at a lower cost by setting the task's * wake_cpu field. Let's see if we want to move @worker to improve * execution locality. * * We're waking the worker that went idle the latest and there's some * chance that @worker is marked idle but hasn't gone off CPU yet. If * so, setting the wake_cpu won't do anything. As this is a best-effort * optimization and the race window is narrow, let's leave as-is for * now. If this becomes pronounced, we can skip over workers which are * still on cpu when picking an idle worker. * * If @pool has non-strict affinity, @worker might have ended up outside * its affinity scope. Repatriate. */ if (!pool->attrs->affn_strict && !cpumask_test_cpu(p->wake_cpu, pool->attrs->__pod_cpumask)) { struct work_struct *work = list_first_entry(&pool->worklist, struct work_struct, entry); int wake_cpu = cpumask_any_and_distribute(pool->attrs->__pod_cpumask, cpu_online_mask); if (wake_cpu < nr_cpu_ids) { p->wake_cpu = wake_cpu; get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++; } } #endif wake_up_process(p); return true; } #ifdef CONFIG_WQ_CPU_INTENSIVE_REPORT /* * Concurrency-managed per-cpu work items that hog CPU for longer than * wq_cpu_intensive_thresh_us trigger the automatic CPU_INTENSIVE mechanism, * which prevents them from stalling other concurrency-managed work items. If a * work function keeps triggering this mechanism, it's likely that the work item * should be using an unbound workqueue instead. * * wq_cpu_intensive_report() tracks work functions which trigger such conditions * and report them so that they can be examined and converted to use unbound * workqueues as appropriate. To avoid flooding the console, each violating work * function is tracked and reported with exponential backoff. */ #define WCI_MAX_ENTS 128 struct wci_ent { work_func_t func; atomic64_t cnt; struct hlist_node hash_node; }; static struct wci_ent wci_ents[WCI_MAX_ENTS]; static int wci_nr_ents; static DEFINE_RAW_SPINLOCK(wci_lock); static DEFINE_HASHTABLE(wci_hash, ilog2(WCI_MAX_ENTS)); static struct wci_ent *wci_find_ent(work_func_t func) { struct wci_ent *ent; hash_for_each_possible_rcu(wci_hash, ent, hash_node, (unsigned long)func) { if (ent->func == func) return ent; } return NULL; } static void wq_cpu_intensive_report(work_func_t func) { struct wci_ent *ent; restart: ent = wci_find_ent(func); if (ent) { u64 cnt; /* * Start reporting from the warning_thresh and back off * exponentially. */ cnt = atomic64_inc_return_relaxed(&ent->cnt); if (wq_cpu_intensive_warning_thresh && cnt >= wq_cpu_intensive_warning_thresh && is_power_of_2(cnt + 1 - wq_cpu_intensive_warning_thresh)) printk_deferred(KERN_WARNING "workqueue: %ps hogged CPU for >%luus %llu times, consider switching to WQ_UNBOUND\n", ent->func, wq_cpu_intensive_thresh_us, atomic64_read(&ent->cnt)); return; } /* * @func is a new violation. Allocate a new entry for it. If wcn_ents[] * is exhausted, something went really wrong and we probably made enough * noise already. */ if (wci_nr_ents >= WCI_MAX_ENTS) return; raw_spin_lock(&wci_lock); if (wci_nr_ents >= WCI_MAX_ENTS) { raw_spin_unlock(&wci_lock); return; } if (wci_find_ent(func)) { raw_spin_unlock(&wci_lock); goto restart; } ent = &wci_ents[wci_nr_ents++]; ent->func = func; atomic64_set(&ent->cnt, 0); hash_add_rcu(wci_hash, &ent->hash_node, (unsigned long)func); raw_spin_unlock(&wci_lock); goto restart; } #else /* CONFIG_WQ_CPU_INTENSIVE_REPORT */ static void wq_cpu_intensive_report(work_func_t func) {} #endif /* CONFIG_WQ_CPU_INTENSIVE_REPORT */ /** * wq_worker_running - a worker is running again * @task: task waking up * * This function is called when a worker returns from schedule() */ void wq_worker_running(struct task_struct *task) { struct worker *worker = kthread_data(task); if (!READ_ONCE(worker->sleeping)) return; /* * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check * and the nr_running increment below, we may ruin the nr_running reset * and leave with an unexpected pool->nr_running == 1 on the newly unbound * pool. Protect against such race. */ preempt_disable(); if (!(worker->flags & WORKER_NOT_RUNNING)) worker->pool->nr_running++; preempt_enable(); /* * CPU intensive auto-detection cares about how long a work item hogged * CPU without sleeping. Reset the starting timestamp on wakeup. */ worker->current_at = worker->task->se.sum_exec_runtime; WRITE_ONCE(worker->sleeping, 0); } /** * wq_worker_sleeping - a worker is going to sleep * @task: task going to sleep * * This function is called from schedule() when a busy worker is * going to sleep. */ void wq_worker_sleeping(struct task_struct *task) { struct worker *worker = kthread_data(task); struct worker_pool *pool; /* * Rescuers, which may not have all the fields set up like normal * workers, also reach here, let's not access anything before * checking NOT_RUNNING. */ if (worker->flags & WORKER_NOT_RUNNING) return; pool = worker->pool; /* Return if preempted before wq_worker_running() was reached */ if (READ_ONCE(worker->sleeping)) return; WRITE_ONCE(worker->sleeping, 1); raw_spin_lock_irq(&pool->lock); /* * Recheck in case unbind_workers() preempted us. We don't * want to decrement nr_running after the worker is unbound * and nr_running has been reset. */ if (worker->flags & WORKER_NOT_RUNNING) { raw_spin_unlock_irq(&pool->lock); return; } pool->nr_running--; if (kick_pool(pool)) worker->current_pwq->stats[PWQ_STAT_CM_WAKEUP]++; raw_spin_unlock_irq(&pool->lock); } /** * wq_worker_tick - a scheduler tick occurred while a kworker is running * @task: task currently running * * Called from sched_tick(). We're in the IRQ context and the current * worker's fields which follow the 'K' locking rule can be accessed safely. */ void wq_worker_tick(struct task_struct *task) { struct worker *worker = kthread_data(task); struct pool_workqueue *pwq = worker->current_pwq; struct worker_pool *pool = worker->pool; if (!pwq) return; pwq->stats[PWQ_STAT_CPU_TIME] += TICK_USEC; if (!wq_cpu_intensive_thresh_us) return; /* * If the current worker is concurrency managed and hogged the CPU for * longer than wq_cpu_intensive_thresh_us, it's automatically marked * CPU_INTENSIVE to avoid stalling other concurrency-managed work items. * * Set @worker->sleeping means that @worker is in the process of * switching out voluntarily and won't be contributing to * @pool->nr_running until it wakes up. As wq_worker_sleeping() also * decrements ->nr_running, setting CPU_INTENSIVE here can lead to * double decrements. The task is releasing the CPU anyway. Let's skip. * We probably want to make this prettier in the future. */ if ((worker->flags & WORKER_NOT_RUNNING) || READ_ONCE(worker->sleeping) || worker->task->se.sum_exec_runtime - worker->current_at < wq_cpu_intensive_thresh_us * NSEC_PER_USEC) return; raw_spin_lock(&pool->lock); worker_set_flags(worker, WORKER_CPU_INTENSIVE); wq_cpu_intensive_report(worker->current_func); pwq->stats[PWQ_STAT_CPU_INTENSIVE]++; if (kick_pool(pool)) pwq->stats[PWQ_STAT_CM_WAKEUP]++; raw_spin_unlock(&pool->lock); } /** * wq_worker_last_func - retrieve worker's last work function * @task: Task to retrieve last work function of. * * Determine the last function a worker executed. This is called from * the scheduler to get a worker's last known identity. * * CONTEXT: * raw_spin_lock_irq(rq->lock) * * This function is called during schedule() when a kworker is going * to sleep. It's used by psi to identify aggregation workers during * dequeuing, to allow periodic aggregation to shut-off when that * worker is the last task in the system or cgroup to go to sleep. * * As this function doesn't involve any workqueue-related locking, it * only returns stable values when called from inside the scheduler's * queuing and dequeuing paths, when @task, which must be a kworker, * is guaranteed to not be processing any works. * * Return: * The last work function %current executed as a worker, NULL if it * hasn't executed any work yet. */ work_func_t wq_worker_last_func(struct task_struct *task) { struct worker *worker = kthread_data(task); return worker->last_func; } /** * wq_node_nr_active - Determine wq_node_nr_active to use * @wq: workqueue of interest * @node: NUMA node, can be %NUMA_NO_NODE * * Determine wq_node_nr_active to use for @wq on @node. Returns: * * - %NULL for per-cpu workqueues as they don't need to use shared nr_active. * * - node_nr_active[nr_node_ids] if @node is %NUMA_NO_NODE. * * - Otherwise, node_nr_active[@node]. */ static struct wq_node_nr_active *wq_node_nr_active(struct workqueue_struct *wq, int node) { if (!(wq->flags & WQ_UNBOUND)) return NULL; if (node == NUMA_NO_NODE) node = nr_node_ids; return wq->node_nr_active[node]; } /** * wq_update_node_max_active - Update per-node max_actives to use * @wq: workqueue to update * @off_cpu: CPU that's going down, -1 if a CPU is not going down * * Update @wq->node_nr_active[]->max. @wq must be unbound. max_active is * distributed among nodes according to the proportions of numbers of online * cpus. The result is always between @wq->min_active and max_active. */ static void wq_update_node_max_active(struct workqueue_struct *wq, int off_cpu) { struct cpumask *effective = unbound_effective_cpumask(wq); int min_active = READ_ONCE(wq->min_active); int max_active = READ_ONCE(wq->max_active); int total_cpus, node; lockdep_assert_held(&wq->mutex); if (!wq_topo_initialized) return; if (off_cpu >= 0 && !cpumask_test_cpu(off_cpu, effective)) off_cpu = -1; total_cpus = cpumask_weight_and(effective, cpu_online_mask); if (off_cpu >= 0) total_cpus--; /* If all CPUs of the wq get offline, use the default values */ if (unlikely(!total_cpus)) { for_each_node(node) wq_node_nr_active(wq, node)->max = min_active; wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; return; } for_each_node(node) { int node_cpus; node_cpus = cpumask_weight_and(effective, cpumask_of_node(node)); if (off_cpu >= 0 && cpu_to_node(off_cpu) == node) node_cpus--; wq_node_nr_active(wq, node)->max = clamp(DIV_ROUND_UP(max_active * node_cpus, total_cpus), min_active, max_active); } wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; } /** * get_pwq - get an extra reference on the specified pool_workqueue * @pwq: pool_workqueue to get * * Obtain an extra reference on @pwq. The caller should guarantee that * @pwq has positive refcnt and be holding the matching pool->lock. */ static void get_pwq(struct pool_workqueue *pwq) { lockdep_assert_held(&pwq->pool->lock); WARN_ON_ONCE(pwq->refcnt <= 0); pwq->refcnt++; } /** * put_pwq - put a pool_workqueue reference * @pwq: pool_workqueue to put * * Drop a reference of @pwq. If its refcnt reaches zero, schedule its * destruction. The caller should be holding the matching pool->lock. */ static void put_pwq(struct pool_workqueue *pwq) { lockdep_assert_held(&pwq->pool->lock); if (likely(--pwq->refcnt)) return; /* * @pwq can't be released under pool->lock, bounce to a dedicated * kthread_worker to avoid A-A deadlocks. */ kthread_queue_work(pwq_release_worker, &pwq->release_work); } /** * put_pwq_unlocked - put_pwq() with surrounding pool lock/unlock * @pwq: pool_workqueue to put (can be %NULL) * * put_pwq() with locking. This function also allows %NULL @pwq. */ static void put_pwq_unlocked(struct pool_workqueue *pwq) { if (pwq) { /* * As both pwqs and pools are RCU protected, the * following lock operations are safe. */ raw_spin_lock_irq(&pwq->pool->lock); put_pwq(pwq); raw_spin_unlock_irq(&pwq->pool->lock); } } static bool pwq_is_empty(struct pool_workqueue *pwq) { return !pwq->nr_active && list_empty(&pwq->inactive_works); } static void __pwq_activate_work(struct pool_workqueue *pwq, struct work_struct *work) { unsigned long *wdb = work_data_bits(work); WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE)); trace_workqueue_activate_work(work); if (list_empty(&pwq->pool->worklist)) pwq->pool->watchdog_ts = jiffies; move_linked_works(work, &pwq->pool->worklist, NULL); __clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb); } static bool tryinc_node_nr_active(struct wq_node_nr_active *nna) { int max = READ_ONCE(nna->max); int old = atomic_read(&nna->nr); do { if (old >= max) return false; } while (!atomic_try_cmpxchg_relaxed(&nna->nr, &old, old + 1)); return true; } /** * pwq_tryinc_nr_active - Try to increment nr_active for a pwq * @pwq: pool_workqueue of interest * @fill: max_active may have increased, try to increase concurrency level * * Try to increment nr_active for @pwq. Returns %true if an nr_active count is * successfully obtained. %false otherwise. */ static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill) { struct workqueue_struct *wq = pwq->wq; struct worker_pool *pool = pwq->pool; struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node); bool obtained = false; lockdep_assert_held(&pool->lock); if (!nna) { /* BH or per-cpu workqueue, pwq->nr_active is sufficient */ obtained = pwq->nr_active < READ_ONCE(wq->max_active); goto out; } if (unlikely(pwq->plugged)) return false; /* * Unbound workqueue uses per-node shared nr_active $nna. If @pwq is * already waiting on $nna, pwq_dec_nr_active() will maintain the * concurrency level. Don't jump the line. * * We need to ignore the pending test after max_active has increased as * pwq_dec_nr_active() can only maintain the concurrency level but not * increase it. This is indicated by @fill. */ if (!list_empty(&pwq->pending_node) && likely(!fill)) goto out; obtained = tryinc_node_nr_active(nna); if (obtained) goto out; /* * Lockless acquisition failed. Lock, add ourself to $nna->pending_pwqs * and try again. The smp_mb() is paired with the implied memory barrier * of atomic_dec_return() in pwq_dec_nr_active() to ensure that either * we see the decremented $nna->nr or they see non-empty * $nna->pending_pwqs. */ raw_spin_lock(&nna->lock); if (list_empty(&pwq->pending_node)) list_add_tail(&pwq->pending_node, &nna->pending_pwqs); else if (likely(!fill)) goto out_unlock; smp_mb(); obtained = tryinc_node_nr_active(nna); /* * If @fill, @pwq might have already been pending. Being spuriously * pending in cold paths doesn't affect anything. Let's leave it be. */ if (obtained && likely(!fill)) list_del_init(&pwq->pending_node); out_unlock: raw_spin_unlock(&nna->lock); out: if (obtained) pwq->nr_active++; return obtained; } /** * pwq_activate_first_inactive - Activate the first inactive work item on a pwq * @pwq: pool_workqueue of interest * @fill: max_active may have increased, try to increase concurrency level * * Activate the first inactive work item of @pwq if available and allowed by * max_active limit. * * Returns %true if an inactive work item has been activated. %false if no * inactive work item is found or max_active limit is reached. */ static bool pwq_activate_first_inactive(struct pool_workqueue *pwq, bool fill) { struct work_struct *work = list_first_entry_or_null(&pwq->inactive_works, struct work_struct, entry); if (work && pwq_tryinc_nr_active(pwq, fill)) { __pwq_activate_work(pwq, work); return true; } else { return false; } } /** * unplug_oldest_pwq - unplug the oldest pool_workqueue * @wq: workqueue_struct where its oldest pwq is to be unplugged * * This function should only be called for ordered workqueues where only the * oldest pwq is unplugged, the others are plugged to suspend execution to * ensure proper work item ordering:: * * dfl_pwq --------------+ [P] - plugged * | * v * pwqs -> A -> B [P] -> C [P] (newest) * | | | * 1 3 5 * | | | * 2 4 6 * * When the oldest pwq is drained and removed, this function should be called * to unplug the next oldest one to start its work item execution. Note that * pwq's are linked into wq->pwqs with the oldest first, so the first one in * the list is the oldest. */ static void unplug_oldest_pwq(struct workqueue_struct *wq) { struct pool_workqueue *pwq; lockdep_assert_held(&wq->mutex); /* Caller should make sure that pwqs isn't empty before calling */ pwq = list_first_entry_or_null(&wq->pwqs, struct pool_workqueue, pwqs_node); raw_spin_lock_irq(&pwq->pool->lock); if (pwq->plugged) { pwq->plugged = false; if (pwq_activate_first_inactive(pwq, true)) kick_pool(pwq->pool); } raw_spin_unlock_irq(&pwq->pool->lock); } /** * node_activate_pending_pwq - Activate a pending pwq on a wq_node_nr_active * @nna: wq_node_nr_active to activate a pending pwq for * @caller_pool: worker_pool the caller is locking * * Activate a pwq in @nna->pending_pwqs. Called with @caller_pool locked. * @caller_pool may be unlocked and relocked to lock other worker_pools. */ static void node_activate_pending_pwq(struct wq_node_nr_active *nna, struct worker_pool *caller_pool) { struct worker_pool *locked_pool = caller_pool; struct pool_workqueue *pwq; struct work_struct *work; lockdep_assert_held(&caller_pool->lock); raw_spin_lock(&nna->lock); retry: pwq = list_first_entry_or_null(&nna->pending_pwqs, struct pool_workqueue, pending_node); if (!pwq) goto out_unlock; /* * If @pwq is for a different pool than @locked_pool, we need to lock * @pwq->pool->lock. Let's trylock first. If unsuccessful, do the unlock * / lock dance. For that, we also need to release @nna->lock as it's * nested inside pool locks. */ if (pwq->pool != locked_pool) { raw_spin_unlock(&locked_pool->lock); locked_pool = pwq->pool; if (!raw_spin_trylock(&locked_pool->lock)) { raw_spin_unlock(&nna->lock); raw_spin_lock(&locked_pool->lock); raw_spin_lock(&nna->lock); goto retry; } } /* * $pwq may not have any inactive work items due to e.g. cancellations. * Drop it from pending_pwqs and see if there's another one. */ work = list_first_entry_or_null(&pwq->inactive_works, struct work_struct, entry); if (!work) { list_del_init(&pwq->pending_node); goto retry; } /* * Acquire an nr_active count and activate the inactive work item. If * $pwq still has inactive work items, rotate it to the end of the * pending_pwqs so that we round-robin through them. This means that * inactive work items are not activated in queueing order which is fine * given that there has never been any ordering across different pwqs. */ if (likely(tryinc_node_nr_active(nna))) { pwq->nr_active++; __pwq_activate_work(pwq, work); if (list_empty(&pwq->inactive_works)) list_del_init(&pwq->pending_node); else list_move_tail(&pwq->pending_node, &nna->pending_pwqs); /* if activating a foreign pool, make sure it's running */ if (pwq->pool != caller_pool) kick_pool(pwq->pool); } out_unlock: raw_spin_unlock(&nna->lock); if (locked_pool != caller_pool) { raw_spin_unlock(&locked_pool->lock); raw_spin_lock(&caller_pool->lock); } } /** * pwq_dec_nr_active - Retire an active count * @pwq: pool_workqueue of interest * * Decrement @pwq's nr_active and try to activate the first inactive work item. * For unbound workqueues, this function may temporarily drop @pwq->pool->lock. */ static void pwq_dec_nr_active(struct pool_workqueue *pwq) { struct worker_pool *pool = pwq->pool; struct wq_node_nr_active *nna = wq_node_nr_active(pwq->wq, pool->node); lockdep_assert_held(&pool->lock); /* * @pwq->nr_active should be decremented for both percpu and unbound * workqueues. */ pwq->nr_active--; /* * For a percpu workqueue, it's simple. Just need to kick the first * inactive work item on @pwq itself. */ if (!nna) { pwq_activate_first_inactive(pwq, false); return; } /* * If @pwq is for an unbound workqueue, it's more complicated because * multiple pwqs and pools may be sharing the nr_active count. When a * pwq needs to wait for an nr_active count, it puts itself on * $nna->pending_pwqs. The following atomic_dec_return()'s implied * memory barrier is paired with smp_mb() in pwq_tryinc_nr_active() to * guarantee that either we see non-empty pending_pwqs or they see * decremented $nna->nr. * * $nna->max may change as CPUs come online/offline and @pwq->wq's * max_active gets updated. However, it is guaranteed to be equal to or * larger than @pwq->wq->min_active which is above zero unless freezing. * This maintains the forward progress guarantee. */ if (atomic_dec_return(&nna->nr) >= READ_ONCE(nna->max)) return; if (!list_empty(&nna->pending_pwqs)) node_activate_pending_pwq(nna, pool); } /** * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight * @pwq: pwq of interest * @work_data: work_data of work which left the queue * * A work either has completed or is removed from pending queue, * decrement nr_in_flight of its pwq and handle workqueue flushing. * * NOTE: * For unbound workqueues, this function may temporarily drop @pwq->pool->lock * and thus should be called after all other state updates for the in-flight * work item is complete. * * CONTEXT: * raw_spin_lock_irq(pool->lock). */ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data) { int color = get_work_color(work_data); if (!(work_data & WORK_STRUCT_INACTIVE)) pwq_dec_nr_active(pwq); pwq->nr_in_flight[color]--; /* is flush in progress and are we at the flushing tip? */ if (likely(pwq->flush_color != color)) goto out_put; /* are there still in-flight works? */ if (pwq->nr_in_flight[color]) goto out_put; /* this pwq is done, clear flush_color */ pwq->flush_color = -1; /* * If this was the last pwq, wake up the first flusher. It * will handle the rest. */ if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush)) complete(&pwq->wq->first_flusher->done); out_put: put_pwq(pwq); } /** * try_to_grab_pending - steal work item from worklist and disable irq * @work: work item to steal * @cflags: %WORK_CANCEL_ flags * @irq_flags: place to store irq state * * Try to grab PENDING bit of @work. This function can handle @work in any * stable state - idle, on timer or on worklist. * * Return: * * ======== ================================================================ * 1 if @work was pending and we successfully stole PENDING * 0 if @work was idle and we claimed PENDING * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry * ======== ================================================================ * * Note: * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting * interrupted while holding PENDING and @work off queue, irq must be * disabled on entry. This, combined with delayed_work->timer being * irqsafe, ensures that we return -EAGAIN for finite short period of time. * * On successful return, >= 0, irq is disabled and the caller is * responsible for releasing it using local_irq_restore(*@irq_flags). * * This function is safe to call from any context including IRQ handler. */ static int try_to_grab_pending(struct work_struct *work, u32 cflags, unsigned long *irq_flags) { struct worker_pool *pool; struct pool_workqueue *pwq; local_irq_save(*irq_flags); /* try to steal the timer if it exists */ if (cflags & WORK_CANCEL_DELAYED) { struct delayed_work *dwork = to_delayed_work(work); /* * dwork->timer is irqsafe. If timer_delete() fails, it's * guaranteed that the timer is not queued anywhere and not * running on the local CPU. */ if (likely(timer_delete(&dwork->timer))) return 1; } /* try to claim PENDING the normal way */ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) return 0; rcu_read_lock(); /* * The queueing is in progress, or it is already queued. Try to * steal it from ->worklist without clearing WORK_STRUCT_PENDING. */ pool = get_work_pool(work); if (!pool) goto fail; raw_spin_lock(&pool->lock); /* * work->data is guaranteed to point to pwq only while the work * item is queued on pwq->wq, and both updating work->data to point * to pwq on queueing and to pool on dequeueing are done under * pwq->pool->lock. This in turn guarantees that, if work->data * points to pwq which is associated with a locked pool, the work * item is currently queued on that pool. */ pwq = get_work_pwq(work); if (pwq && pwq->pool == pool) { unsigned long work_data = *work_data_bits(work); debug_work_deactivate(work); /* * A cancelable inactive work item must be in the * pwq->inactive_works since a queued barrier can't be * canceled (see the comments in insert_wq_barrier()). * * An inactive work item cannot be deleted directly because * it might have linked barrier work items which, if left * on the inactive_works list, will confuse pwq->nr_active * management later on and cause stall. Move the linked * barrier work items to the worklist when deleting the grabbed * item. Also keep WORK_STRUCT_INACTIVE in work_data, so that * it doesn't participate in nr_active management in later * pwq_dec_nr_in_flight(). */ if (work_data & WORK_STRUCT_INACTIVE) move_linked_works(work, &pwq->pool->worklist, NULL); list_del_init(&work->entry); /* * work->data points to pwq iff queued. Let's point to pool. As * this destroys work->data needed by the next step, stash it. */ set_work_pool_and_keep_pending(work, pool->id, pool_offq_flags(pool)); /* must be the last step, see the function comment */ pwq_dec_nr_in_flight(pwq, work_data); raw_spin_unlock(&pool->lock); rcu_read_unlock(); return 1; } raw_spin_unlock(&pool->lock); fail: rcu_read_unlock(); local_irq_restore(*irq_flags); return -EAGAIN; } /** * work_grab_pending - steal work item from worklist and disable irq * @work: work item to steal * @cflags: %WORK_CANCEL_ flags * @irq_flags: place to store IRQ state * * Grab PENDING bit of @work. @work can be in any stable state - idle, on timer * or on worklist. * * Can be called from any context. IRQ is disabled on return with IRQ state * stored in *@irq_flags. The caller is responsible for re-enabling it using * local_irq_restore(). * * Returns %true if @work was pending. %false if idle. */ static bool work_grab_pending(struct work_struct *work, u32 cflags, unsigned long *irq_flags) { int ret; while (true) { ret = try_to_grab_pending(work, cflags, irq_flags); if (ret >= 0) return ret; cpu_relax(); } } /** * insert_work - insert a work into a pool * @pwq: pwq @work belongs to * @work: work to insert * @head: insertion point * @extra_flags: extra WORK_STRUCT_* flags to set * * Insert @work which belongs to @pwq after @head. @extra_flags is or'd to * work_struct flags. * * CONTEXT: * raw_spin_lock_irq(pool->lock). */ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work, struct list_head *head, unsigned int extra_flags) { debug_work_activate(work); /* record the work call stack in order to print it in KASAN reports */ kasan_record_aux_stack(work); /* we own @work, set data and link */ set_work_pwq(work, pwq, extra_flags); list_add_tail(&work->entry, head); get_pwq(pwq); } /* * Test whether @work is being queued from another work executing on the * same workqueue. */ static bool is_chained_work(struct workqueue_struct *wq) { struct worker *worker; worker = current_wq_worker(); /* * Return %true iff I'm a worker executing a work item on @wq. If * I'm @worker, it's safe to dereference it without locking. */ return worker && worker->current_pwq->wq == wq; } /* * When queueing an unbound work item to a wq, prefer local CPU if allowed * by wq_unbound_cpumask. Otherwise, round robin among the allowed ones to * avoid perturbing sensitive tasks. */ static int wq_select_unbound_cpu(int cpu) { int new_cpu; if (likely(!wq_debug_force_rr_cpu)) { if (cpumask_test_cpu(cpu, wq_unbound_cpumask)) return cpu; } else { pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n"); } new_cpu = __this_cpu_read(wq_rr_cpu_last); new_cpu = cpumask_next_and_wrap(new_cpu, wq_unbound_cpumask, cpu_online_mask); if (unlikely(new_cpu >= nr_cpu_ids)) return cpu; __this_cpu_write(wq_rr_cpu_last, new_cpu); return new_cpu; } static void __queue_work(int cpu, struct workqueue_struct *wq, struct work_struct *work) { struct pool_workqueue *pwq; struct worker_pool *last_pool, *pool; unsigned int work_flags; unsigned int req_cpu = cpu; /* * While a work item is PENDING && off queue, a task trying to * steal the PENDING will busy-loop waiting for it to either get * queued or lose PENDING. Grabbing PENDING and queueing should * happen with IRQ disabled. */ lockdep_assert_irqs_disabled(); /* * For a draining wq, only works from the same workqueue are * allowed. The __WQ_DESTROYING helps to spot the issue that * queues a new work item to a wq after destroy_workqueue(wq). */ if (unlikely(wq->flags & (__WQ_DESTROYING | __WQ_DRAINING) && WARN_ONCE(!is_chained_work(wq), "workqueue: cannot queue %ps on wq %s\n", work->func, wq->name))) { return; } rcu_read_lock(); retry: /* pwq which will be used unless @work is executing elsewhere */ if (req_cpu == WORK_CPU_UNBOUND) { if (wq->flags & WQ_UNBOUND) cpu = wq_select_unbound_cpu(raw_smp_processor_id()); else cpu = raw_smp_processor_id(); } pwq = rcu_dereference(*per_cpu_ptr(wq->cpu_pwq, cpu)); pool = pwq->pool; /* * If @work was previously on a different pool, it might still be * running there, in which case the work needs to be queued on that * pool to guarantee non-reentrancy. * * For ordered workqueue, work items must be queued on the newest pwq * for accurate order management. Guaranteed order also guarantees * non-reentrancy. See the comments above unplug_oldest_pwq(). */ last_pool = get_work_pool(work); if (last_pool && last_pool != pool && !(wq->flags & __WQ_ORDERED)) { struct worker *worker; raw_spin_lock(&last_pool->lock); worker = find_worker_executing_work(last_pool, work); if (worker && worker->current_pwq->wq == wq) { pwq = worker->current_pwq; pool = pwq->pool; WARN_ON_ONCE(pool != last_pool); } else { /* meh... not running there, queue here */ raw_spin_unlock(&last_pool->lock); raw_spin_lock(&pool->lock); } } else { raw_spin_lock(&pool->lock); } /* * pwq is determined and locked. For unbound pools, we could have raced * with pwq release and it could already be dead. If its refcnt is zero, * repeat pwq selection. Note that unbound pwqs never die without * another pwq replacing it in cpu_pwq or while work items are executing * on it, so the retrying is guaranteed to make forward-progress. */ if (unlikely(!pwq->refcnt)) { if (wq->flags & WQ_UNBOUND) { raw_spin_unlock(&pool->lock); cpu_relax(); goto retry; } /* oops */ WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt", wq->name, cpu); } /* pwq determined, queue */ trace_workqueue_queue_work(req_cpu, pwq, work); if (WARN_ON(!list_empty(&work->entry))) goto out; pwq->nr_in_flight[pwq->work_color]++; work_flags = work_color_to_flags(pwq->work_color); /* * Limit the number of concurrently active work items to max_active. * @work must also queue behind existing inactive work items to maintain * ordering when max_active changes. See wq_adjust_max_active(). */ if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) { if (list_empty(&pool->worklist)) pool->watchdog_ts = jiffies; trace_workqueue_activate_work(work); insert_work(pwq, work, &pool->worklist, work_flags); kick_pool(pool); } else { work_flags |= WORK_STRUCT_INACTIVE; insert_work(pwq, work, &pwq->inactive_works, work_flags); } out: raw_spin_unlock(&pool->lock); rcu_read_unlock(); } static bool clear_pending_if_disabled(struct work_struct *work) { unsigned long data = *work_data_bits(work); struct work_offq_data offqd; if (likely((data & WORK_STRUCT_PWQ) || !(data & WORK_OFFQ_DISABLE_MASK))) return false; work_offqd_unpack(&offqd, data); set_work_pool_and_clear_pending(work, offqd.pool_id, work_offqd_pack_flags(&offqd)); return true; } /** * queue_work_on - queue work on specific cpu * @cpu: CPU number to execute work on * @wq: workqueue to use * @work: work to queue * * We queue the work to a specific CPU, the caller must ensure it * can't go away. Callers that fail to ensure that the specified * CPU cannot go away will execute on a randomly chosen CPU. * But note well that callers specifying a CPU that never has been * online will get a splat. * * Return: %false if @work was already on a queue, %true otherwise. */ bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work) { bool ret = false; unsigned long irq_flags; local_irq_save(irq_flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) && !clear_pending_if_disabled(work)) { __queue_work(cpu, wq, work); ret = true; } local_irq_restore(irq_flags); return ret; } EXPORT_SYMBOL(queue_work_on); /** * select_numa_node_cpu - Select a CPU based on NUMA node * @node: NUMA node ID that we want to select a CPU from * * This function will attempt to find a "random" cpu available on a given * node. If there are no CPUs available on the given node it will return * WORK_CPU_UNBOUND indicating that we should just schedule to any * available CPU if we need to schedule this work. */ static int select_numa_node_cpu(int node) { int cpu; /* Delay binding to CPU if node is not valid or online */ if (node < 0 || node >= MAX_NUMNODES || !node_online(node)) return WORK_CPU_UNBOUND; /* Use local node/cpu if we are already there */ cpu = raw_smp_processor_id(); if (node == cpu_to_node(cpu)) return cpu; /* Use "random" otherwise know as "first" online CPU of node */ cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask); /* If CPU is valid return that, otherwise just defer */ return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND; } /** * queue_work_node - queue work on a "random" cpu for a given NUMA node * @node: NUMA node that we are targeting the work for * @wq: workqueue to use * @work: work to queue * * We queue the work to a "random" CPU within a given NUMA node. The basic * idea here is to provide a way to somehow associate work with a given * NUMA node. * * This function will only make a best effort attempt at getting this onto * the right NUMA node. If no node is requested or the requested node is * offline then we just fall back to standard queue_work behavior. * * Currently the "random" CPU ends up being the first available CPU in the * intersection of cpu_online_mask and the cpumask of the node, unless we * are running on the node. In that case we just use the current CPU. * * Return: %false if @work was already on a queue, %true otherwise. */ bool queue_work_node(int node, struct workqueue_struct *wq, struct work_struct *work) { unsigned long irq_flags; bool ret = false; /* * This current implementation is specific to unbound workqueues. * Specifically we only return the first available CPU for a given * node instead of cycling through individual CPUs within the node. * * If this is used with a per-cpu workqueue then the logic in * workqueue_select_cpu_near would need to be updated to allow for * some round robin type logic. */ WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)); local_irq_save(irq_flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) && !clear_pending_if_disabled(work)) { int cpu = select_numa_node_cpu(node); __queue_work(cpu, wq, work); ret = true; } local_irq_restore(irq_flags); return ret; } EXPORT_SYMBOL_GPL(queue_work_node); void delayed_work_timer_fn(struct timer_list *t) { struct delayed_work *dwork = timer_container_of(dwork, t, timer); /* should have been called from irqsafe timer with irq already off */ __queue_work(dwork->cpu, dwork->wq, &dwork->work); } EXPORT_SYMBOL(delayed_work_timer_fn); static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; WARN_ON_ONCE(!wq); WARN_ON_ONCE(timer->function != delayed_work_timer_fn); WARN_ON_ONCE(timer_pending(timer)); WARN_ON_ONCE(!list_empty(&work->entry)); /* * If @delay is 0, queue @dwork->work immediately. This is for * both optimization and correctness. The earliest @timer can * expire is on the closest next tick and delayed_work users depend * on that there's no such delay when @delay is 0. */ if (!delay) { __queue_work(cpu, wq, &dwork->work); return; } WARN_ON_ONCE(cpu != WORK_CPU_UNBOUND && !cpu_online(cpu)); dwork->wq = wq; dwork->cpu = cpu; timer->expires = jiffies + delay; if (housekeeping_enabled(HK_TYPE_TIMER)) { /* If the current cpu is a housekeeping cpu, use it. */ cpu = smp_processor_id(); if (!housekeeping_test_cpu(cpu, HK_TYPE_TIMER)) cpu = housekeeping_any_cpu(HK_TYPE_TIMER); add_timer_on(timer, cpu); } else { if (likely(cpu == WORK_CPU_UNBOUND)) add_timer_global(timer); else add_timer_on(timer, cpu); } } /** * queue_delayed_work_on - queue work on specific CPU after delay * @cpu: CPU number to execute work on * @wq: workqueue to use * @dwork: work to queue * @delay: number of jiffies to wait before queueing * * We queue the delayed_work to a specific CPU, for non-zero delays the * caller must ensure it is online and can't go away. Callers that fail * to ensure this, may get @dwork->timer queued to an offlined CPU and * this will prevent queueing of @dwork->work unless the offlined CPU * becomes online again. * * Return: %false if @work was already on a queue, %true otherwise. If * @delay is zero and @dwork is idle, it will be scheduled for immediate * execution. */ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { struct work_struct *work = &dwork->work; bool ret = false; unsigned long irq_flags; /* read the comment in __queue_work() */ local_irq_save(irq_flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) && !clear_pending_if_disabled(work)) { __queue_delayed_work(cpu, wq, dwork, delay); ret = true; } local_irq_restore(irq_flags); return ret; } EXPORT_SYMBOL(queue_delayed_work_on); /** * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU * @cpu: CPU number to execute work on * @wq: workqueue to use * @dwork: work to queue * @delay: number of jiffies to wait before queueing * * If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise, * modify @dwork's timer so that it expires after @delay. If @delay is * zero, @work is guaranteed to be scheduled immediately regardless of its * current state. * * Return: %false if @dwork was idle and queued, %true if @dwork was * pending and its timer was modified. * * This function is safe to call from any context including IRQ handler. * See try_to_grab_pending() for details. */ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { unsigned long irq_flags; bool ret; ret = work_grab_pending(&dwork->work, WORK_CANCEL_DELAYED, &irq_flags); if (!clear_pending_if_disabled(&dwork->work)) __queue_delayed_work(cpu, wq, dwork, delay); local_irq_restore(irq_flags); return ret; } EXPORT_SYMBOL_GPL(mod_delayed_work_on); static void rcu_work_rcufn(struct rcu_head *rcu) { struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu); /* read the comment in __queue_work() */ local_irq_disable(); __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work); local_irq_enable(); } /** * queue_rcu_work - queue work after a RCU grace period * @wq: workqueue to use * @rwork: work to queue * * Return: %false if @rwork was already pending, %true otherwise. Note * that a full RCU grace period is guaranteed only after a %true return. * While @rwork is guaranteed to be executed after a %false return, the * execution may happen before a full RCU grace period has passed. */ bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork) { struct work_struct *work = &rwork->work; /* * rcu_work can't be canceled or disabled. Warn if the user reached * inside @rwork and disabled the inner work. */ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) && !WARN_ON_ONCE(clear_pending_if_disabled(work))) { rwork->wq = wq; call_rcu_hurry(&rwork->rcu, rcu_work_rcufn); return true; } return false; } EXPORT_SYMBOL(queue_rcu_work); static struct worker *alloc_worker(int node) { struct worker *worker; worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node); if (worker) { INIT_LIST_HEAD(&worker->entry); INIT_LIST_HEAD(&worker->scheduled); INIT_LIST_HEAD(&worker->node); /* on creation a worker is in !idle && prep state */ worker->flags = WORKER_PREP; } return worker; } static cpumask_t *pool_allowed_cpus(struct worker_pool *pool) { if (pool->cpu < 0 && pool->attrs->affn_strict) return pool->attrs->__pod_cpumask; else return pool->attrs->cpumask; } /** * worker_attach_to_pool() - attach a worker to a pool * @worker: worker to be attached * @pool: the target pool * * Attach @worker to @pool. Once attached, the %WORKER_UNBOUND flag and * cpu-binding of @worker are kept coordinated with the pool across * cpu-[un]hotplugs. */ static void worker_attach_to_pool(struct worker *worker, struct worker_pool *pool) { mutex_lock(&wq_pool_attach_mutex); /* * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains stable * across this function. See the comments above the flag definition for * details. BH workers are, while per-CPU, always DISASSOCIATED. */ if (pool->flags & POOL_DISASSOCIATED) { worker->flags |= WORKER_UNBOUND; } else { WARN_ON_ONCE(pool->flags & POOL_BH); kthread_set_per_cpu(worker->task, pool->cpu); } if (worker->rescue_wq) set_cpus_allowed_ptr(worker->task, pool_allowed_cpus(pool)); list_add_tail(&worker->node, &pool->workers); worker->pool = pool; mutex_unlock(&wq_pool_attach_mutex); } static void unbind_worker(struct worker *worker) { lockdep_assert_held(&wq_pool_attach_mutex); kthread_set_per_cpu(worker->task, -1); if (cpumask_intersects(wq_unbound_cpumask, cpu_active_mask)) WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0); else WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0); } static void detach_worker(struct worker *worker) { lockdep_assert_held(&wq_pool_attach_mutex); unbind_worker(worker); list_del(&worker->node); } /** * worker_detach_from_pool() - detach a worker from its pool * @worker: worker which is attached to its pool * * Undo the attaching which had been done in worker_attach_to_pool(). The * caller worker shouldn't access to the pool after detached except it has * other reference to the pool. */ static void worker_detach_from_pool(struct worker *worker) { struct worker_pool *pool = worker->pool; /* there is one permanent BH worker per CPU which should never detach */ WARN_ON_ONCE(pool->flags & POOL_BH); mutex_lock(&wq_pool_attach_mutex); detach_worker(worker); worker->pool = NULL; mutex_unlock(&wq_pool_attach_mutex); /* clear leftover flags without pool->lock after it is detached */ worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND); } static int format_worker_id(char *buf, size_t size, struct worker *worker, struct worker_pool *pool) { if (worker->rescue_wq) return scnprintf(buf, size, "kworker/R-%s", worker->rescue_wq->name); if (pool) { if (pool->cpu >= 0) return scnprintf(buf, size, "kworker/%d:%d%s", pool->cpu, worker->id, pool->attrs->nice < 0 ? "H" : ""); else return scnprintf(buf, size, "kworker/u%d:%d", pool->id, worker->id); } else { return scnprintf(buf, size, "kworker/dying"); } } /** * create_worker - create a new workqueue worker * @pool: pool the new worker will belong to * * Create and start a new worker which is attached to @pool. * * CONTEXT: * Might sleep. Does GFP_KERNEL allocations. * * Return: * Pointer to the newly created worker. */ static struct worker *create_worker(struct worker_pool *pool) { struct worker *worker; int id; /* ID is needed to determine kthread name */ id = ida_alloc(&pool->worker_ida, GFP_KERNEL); if (id < 0) { pr_err_once("workqueue: Failed to allocate a worker ID: %pe\n", ERR_PTR(id)); return NULL; } worker = alloc_worker(pool->node); if (!worker) { pr_err_once("workqueue: Failed to allocate a worker\n"); goto fail; } worker->id = id; if (!(pool->flags & POOL_BH)) { char id_buf[WORKER_ID_LEN]; format_worker_id(id_buf, sizeof(id_buf), worker, pool); worker->task = kthread_create_on_node(worker_thread, worker, pool->node, "%s", id_buf); if (IS_ERR(worker->task)) { if (PTR_ERR(worker->task) == -EINTR) { pr_err("workqueue: Interrupted when creating a worker thread \"%s\"\n", id_buf); } else { pr_err_once("workqueue: Failed to create a worker thread: %pe", worker->task); } goto fail; } set_user_nice(worker->task, pool->attrs->nice); kthread_bind_mask(worker->task, pool_allowed_cpus(pool)); } /* successful, attach the worker to the pool */ worker_attach_to_pool(worker, pool); /* start the newly created worker */ raw_spin_lock_irq(&pool->lock); worker->pool->nr_workers++; worker_enter_idle(worker); /* * @worker is waiting on a completion in kthread() and will trigger hung * check if not woken up soon. As kick_pool() is noop if @pool is empty, * wake it up explicitly. */ if (worker->task) wake_up_process(worker->task); raw_spin_unlock_irq(&pool->lock); return worker; fail: ida_free(&pool->worker_ida, id); kfree(worker); return NULL; } static void detach_dying_workers(struct list_head *cull_list) { struct worker *worker; list_for_each_entry(worker, cull_list, entry) detach_worker(worker); } static void reap_dying_workers(struct list_head *cull_list) { struct worker *worker, *tmp; list_for_each_entry_safe(worker, tmp, cull_list, entry) { list_del_init(&worker->entry); kthread_stop_put(worker->task); kfree(worker); } } /** * set_worker_dying - Tag a worker for destruction * @worker: worker to be destroyed * @list: transfer worker away from its pool->idle_list and into list * * Tag @worker for destruction and adjust @pool stats accordingly. The worker * should be idle. * * CONTEXT: * raw_spin_lock_irq(pool->lock). */ static void set_worker_dying(struct worker *worker, struct list_head *list) { struct worker_pool *pool = worker->pool; lockdep_assert_held(&pool->lock); lockdep_assert_held(&wq_pool_attach_mutex); /* sanity check frenzy */ if (WARN_ON(worker->current_work) || WARN_ON(!list_empty(&worker->scheduled)) || WARN_ON(!(worker->flags & WORKER_IDLE))) return; pool->nr_workers--; pool->nr_idle--; worker->flags |= WORKER_DIE; list_move(&worker->entry, list); /* get an extra task struct reference for later kthread_stop_put() */ get_task_struct(worker->task); } /** * idle_worker_timeout - check if some idle workers can now be deleted. * @t: The pool's idle_timer that just expired * * The timer is armed in worker_enter_idle(). Note that it isn't disarmed in * worker_leave_idle(), as a worker flicking between idle and active while its * pool is at the too_many_workers() tipping point would cause too much timer * housekeeping overhead. Since IDLE_WORKER_TIMEOUT is long enough, we just let * it expire and re-evaluate things from there. */ static void idle_worker_timeout(struct timer_list *t) { struct worker_pool *pool = timer_container_of(pool, t, idle_timer); bool do_cull = false; if (work_pending(&pool->idle_cull_work)) return; raw_spin_lock_irq(&pool->lock); if (too_many_workers(pool)) { struct worker *worker; unsigned long expires; /* idle_list is kept in LIFO order, check the last one */ worker = list_last_entry(&pool->idle_list, struct worker, entry); expires = worker->last_active + IDLE_WORKER_TIMEOUT; do_cull = !time_before(jiffies, expires); if (!do_cull) mod_timer(&pool->idle_timer, expires); } raw_spin_unlock_irq(&pool->lock); if (do_cull) queue_work(system_unbound_wq, &pool->idle_cull_work); } /** * idle_cull_fn - cull workers that have been idle for too long. * @work: the pool's work for handling these idle workers * * This goes through a pool's idle workers and gets rid of those that have been * idle for at least IDLE_WORKER_TIMEOUT seconds. * * We don't want to disturb isolated CPUs because of a pcpu kworker being * culled, so this also resets worker affinity. This requires a sleepable * context, hence the split between timer callback and work item. */ static void idle_cull_fn(struct work_struct *work) { struct worker_pool *pool = container_of(work, struct worker_pool, idle_cull_work); LIST_HEAD(cull_list); /* * Grabbing wq_pool_attach_mutex here ensures an already-running worker * cannot proceed beyong set_pf_worker() in its self-destruct path. * This is required as a previously-preempted worker could run after * set_worker_dying() has happened but before detach_dying_workers() did. */ mutex_lock(&wq_pool_attach_mutex); raw_spin_lock_irq(&pool->lock); while (too_many_workers(pool)) { struct worker *worker; unsigned long expires; worker = list_last_entry(&pool->idle_list, struct worker, entry); expires = worker->last_active + IDLE_WORKER_TIMEOUT; if (time_before(jiffies, expires)) { mod_timer(&pool->idle_timer, expires); break; } set_worker_dying(worker, &cull_list); } raw_spin_unlock_irq(&pool->lock); detach_dying_workers(&cull_list); mutex_unlock(&wq_pool_attach_mutex); reap_dying_workers(&cull_list); } static void send_mayday(struct work_struct *work) { struct pool_workqueue *pwq = get_work_pwq(work); struct workqueue_struct *wq = pwq->wq; lockdep_assert_held(&wq_mayday_lock); if (!wq->rescuer) return; /* mayday mayday mayday */ if (list_empty(&pwq->mayday_node)) { /* * If @pwq is for an unbound wq, its base ref may be put at * any time due to an attribute change. Pin @pwq until the * rescuer is done with it. */ get_pwq(pwq); list_add_tail(&pwq->mayday_node, &wq->maydays); wake_up_process(wq->rescuer->task); pwq->stats[PWQ_STAT_MAYDAY]++; } } static void pool_mayday_timeout(struct timer_list *t) { struct worker_pool *pool = timer_container_of(pool, t, mayday_timer); struct work_struct *work; raw_spin_lock_irq(&pool->lock); raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */ if (need_to_create_worker(pool)) { /* * We've been trying to create a new worker but * haven't been successful. We might be hitting an * allocation deadlock. Send distress signals to * rescuers. */ list_for_each_entry(work, &pool->worklist, entry) send_mayday(work); } raw_spin_unlock(&wq_mayday_lock); raw_spin_unlock_irq(&pool->lock); mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); } /** * maybe_create_worker - create a new worker if necessary * @pool: pool to create a new worker for * * Create a new worker for @pool if necessary. @pool is guaranteed to * have at least one idle worker on return from this function. If * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is * sent to all rescuers with works scheduled on @pool to resolve * possible allocation deadlock. * * On return, need_to_create_worker() is guaranteed to be %false and * may_start_working() %true. * * LOCKING: * raw_spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. Does GFP_KERNEL allocations. Called only from * manager. */ static void maybe_create_worker(struct worker_pool *pool) __releases(&pool->lock) __acquires(&pool->lock) { restart: raw_spin_unlock_irq(&pool->lock); /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */ mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); while (true) { if (create_worker(pool) || !need_to_create_worker(pool)) break; schedule_timeout_interruptible(CREATE_COOLDOWN); if (!need_to_create_worker(pool)) break; } timer_delete_sync(&pool->mayday_timer); raw_spin_lock_irq(&pool->lock); /* * This is necessary even after a new worker was just successfully * created as @pool->lock was dropped and the new worker might have * already become busy. */ if (need_to_create_worker(pool)) goto restart; } /** * manage_workers - manage worker pool * @worker: self * * Assume the manager role and manage the worker pool @worker belongs * to. At any given time, there can be only zero or one manager per * pool. The exclusion is handled automatically by this function. * * The caller can safely start processing works on false return. On * true return, it's guaranteed that need_to_create_worker() is false * and may_start_working() is true. * * CONTEXT: * raw_spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. Does GFP_KERNEL allocations. * * Return: * %false if the pool doesn't need management and the caller can safely * start processing works, %true if management function was performed and * the conditions that the caller verified before calling the function may * no longer be true. */ static bool manage_workers(struct worker *worker) { struct worker_pool *pool = worker->pool; if (pool->flags & POOL_MANAGER_ACTIVE) return false; pool->flags |= POOL_MANAGER_ACTIVE; pool->manager = worker; maybe_create_worker(pool); pool->manager = NULL; pool->flags &= ~POOL_MANAGER_ACTIVE; rcuwait_wake_up(&manager_wait); return true; } /** * process_one_work - process single work * @worker: self * @work: work to process * * Process @work. This function contains all the logics necessary to * process a single work including synchronization against and * interaction with other workers on the same cpu, queueing and * flushing. As long as context requirement is met, any worker can * call this function to process a work. * * CONTEXT: * raw_spin_lock_irq(pool->lock) which is released and regrabbed. */ static void process_one_work(struct worker *worker, struct work_struct *work) __releases(&pool->lock) __acquires(&pool->lock) { struct pool_workqueue *pwq = get_work_pwq(work); struct worker_pool *pool = worker->pool; unsigned long work_data; int lockdep_start_depth, rcu_start_depth; bool bh_draining = pool->flags & POOL_BH_DRAINING; #ifdef CONFIG_LOCKDEP /* * It is permissible to free the struct work_struct from * inside the function that is called from it, this we need to * take into account for lockdep too. To avoid bogus "held * lock freed" warnings as well as problems when looking into * work->lockdep_map, make a copy and use that here. */ struct lockdep_map lockdep_map; lockdep_copy_map(&lockdep_map, &work->lockdep_map); #endif /* ensure we're on the correct CPU */ WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) && raw_smp_processor_id() != pool->cpu); /* claim and dequeue */ debug_work_deactivate(work); hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work); worker->current_work = work; worker->current_func = work->func; worker->current_pwq = pwq; if (worker->task) worker->current_at = worker->task->se.sum_exec_runtime; work_data = *work_data_bits(work); worker->current_color = get_work_color(work_data); /* * Record wq name for cmdline and debug reporting, may get * overridden through set_worker_desc(). */ strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); list_del_init(&work->entry); /* * CPU intensive works don't participate in concurrency management. * They're the scheduler's responsibility. This takes @worker out * of concurrency management and the next code block will chain * execution of the pending work items. */ if (unlikely(pwq->wq->flags & WQ_CPU_INTENSIVE)) worker_set_flags(worker, WORKER_CPU_INTENSIVE); /* * Kick @pool if necessary. It's always noop for per-cpu worker pools * since nr_running would always be >= 1 at this point. This is used to * chain execution of the pending work items for WORKER_NOT_RUNNING * workers such as the UNBOUND and CPU_INTENSIVE ones. */ kick_pool(pool); /* * Record the last pool and clear PENDING which should be the last * update to @work. Also, do this inside @pool->lock so that * PENDING and queued state changes happen together while IRQ is * disabled. */ set_work_pool_and_clear_pending(work, pool->id, pool_offq_flags(pool)); pwq->stats[PWQ_STAT_STARTED]++; raw_spin_unlock_irq(&pool->lock); rcu_start_depth = rcu_preempt_depth(); lockdep_start_depth = lockdep_depth(current); /* see drain_dead_softirq_workfn() */ if (!bh_draining) lock_map_acquire(pwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); /* * Strictly speaking we should mark the invariant state without holding * any locks, that is, before these two lock_map_acquire()'s. * * However, that would result in: * * A(W1) * WFC(C) * A(W1) * C(C) * * Which would create W1->C->W1 dependencies, even though there is no * actual deadlock possible. There are two solutions, using a * read-recursive acquire on the work(queue) 'locks', but this will then * hit the lockdep limitation on recursive locks, or simply discard * these locks. * * AFAICT there is no possible deadlock scenario between the * flush_work() and complete() primitives (except for single-threaded * workqueues), so hiding them isn't a problem. */ lockdep_invariant_state(true); trace_workqueue_execute_start(work); worker->current_func(work); /* * While we must be careful to not use "work" after this, the trace * point will only record its address. */ trace_workqueue_execute_end(work, worker->current_func); lock_map_release(&lockdep_map); if (!bh_draining) lock_map_release(pwq->wq->lockdep_map); if (unlikely((worker->task && in_atomic()) || lockdep_depth(current) != lockdep_start_depth || rcu_preempt_depth() != rcu_start_depth)) { pr_err("BUG: workqueue leaked atomic, lock or RCU: %s[%d]\n" " preempt=0x%08x lock=%d->%d RCU=%d->%d workfn=%ps\n", current->comm, task_pid_nr(current), preempt_count(), lockdep_start_depth, lockdep_depth(current), rcu_start_depth, rcu_preempt_depth(), worker->current_func); debug_show_held_locks(current); dump_stack(); } /* * The following prevents a kworker from hogging CPU on !PREEMPTION * kernels, where a requeueing work item waiting for something to * happen could deadlock with stop_machine as such work item could * indefinitely requeue itself while all other CPUs are trapped in * stop_machine. At the same time, report a quiescent RCU state so * the same condition doesn't freeze RCU. */ if (worker->task) cond_resched(); raw_spin_lock_irq(&pool->lock); pwq->stats[PWQ_STAT_COMPLETED]++; /* * In addition to %WQ_CPU_INTENSIVE, @worker may also have been marked * CPU intensive by wq_worker_tick() if @work hogged CPU longer than * wq_cpu_intensive_thresh_us. Clear it. */ worker_clr_flags(worker, WORKER_CPU_INTENSIVE); /* tag the worker for identification in schedule() */ worker->last_func = worker->current_func; /* we're done with it, release */ hash_del(&worker->hentry); worker->current_work = NULL; worker->current_func = NULL; worker->current_pwq = NULL; worker->current_color = INT_MAX; /* must be the last step, see the function comment */ pwq_dec_nr_in_flight(pwq, work_data); } /** * process_scheduled_works - process scheduled works * @worker: self * * Process all scheduled works. Please note that the scheduled list * may change while processing a work, so this function repeatedly * fetches a work from the top and executes it. * * CONTEXT: * raw_spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. */ static void process_scheduled_works(struct worker *worker) { struct work_struct *work; bool first = true; while ((work = list_first_entry_or_null(&worker->scheduled, struct work_struct, entry))) { if (first) { worker->pool->watchdog_ts = jiffies; first = false; } process_one_work(worker, work); } } static void set_pf_worker(bool val) { mutex_lock(&wq_pool_attach_mutex); if (val) current->flags |= PF_WQ_WORKER; else current->flags &= ~PF_WQ_WORKER; mutex_unlock(&wq_pool_attach_mutex); } /** * worker_thread - the worker thread function * @__worker: self * * The worker thread function. All workers belong to a worker_pool - * either a per-cpu one or dynamic unbound one. These workers process all * work items regardless of their specific target workqueue. The only * exception is work items which belong to workqueues with a rescuer which * will be explained in rescuer_thread(). * * Return: 0 */ static int worker_thread(void *__worker) { struct worker *worker = __worker; struct worker_pool *pool = worker->pool; /* tell the scheduler that this is a workqueue worker */ set_pf_worker(true); woke_up: raw_spin_lock_irq(&pool->lock); /* am I supposed to die? */ if (unlikely(worker->flags & WORKER_DIE)) { raw_spin_unlock_irq(&pool->lock); set_pf_worker(false); /* * The worker is dead and PF_WQ_WORKER is cleared, worker->pool * shouldn't be accessed, reset it to NULL in case otherwise. */ worker->pool = NULL; ida_free(&pool->worker_ida, worker->id); return 0; } worker_leave_idle(worker); recheck: /* no more worker necessary? */ if (!need_more_worker(pool)) goto sleep; /* do we need to manage? */ if (unlikely(!may_start_working(pool)) && manage_workers(worker)) goto recheck; /* * ->scheduled list can only be filled while a worker is * preparing to process a work or actually processing it. * Make sure nobody diddled with it while I was sleeping. */ WARN_ON_ONCE(!list_empty(&worker->scheduled)); /* * Finish PREP stage. We're guaranteed to have at least one idle * worker or that someone else has already assumed the manager * role. This is where @worker starts participating in concurrency * management if applicable and concurrency management is restored * after being rebound. See rebind_workers() for details. */ worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND); do { struct work_struct *work = list_first_entry(&pool->worklist, struct work_struct, entry); if (assign_work(work, worker, NULL)) process_scheduled_works(worker); } while (keep_working(pool)); worker_set_flags(worker, WORKER_PREP); sleep: /* * pool->lock is held and there's no work to process and no need to * manage, sleep. Workers are woken up only while holding * pool->lock or from local cpu, so setting the current state * before releasing pool->lock is enough to prevent losing any * event. */ worker_enter_idle(worker); __set_current_state(TASK_IDLE); raw_spin_unlock_irq(&pool->lock); schedule(); goto woke_up; } /** * rescuer_thread - the rescuer thread function * @__rescuer: self * * Workqueue rescuer thread function. There's one rescuer for each * workqueue which has WQ_MEM_RECLAIM set. * * Regular work processing on a pool may block trying to create a new * worker which uses GFP_KERNEL allocation which has slight chance of * developing into deadlock if some works currently on the same queue * need to be processed to satisfy the GFP_KERNEL allocation. This is * the problem rescuer solves. * * When such condition is possible, the pool summons rescuers of all * workqueues which have works queued on the pool and let them process * those works so that forward progress can be guaranteed. * * This should happen rarely. * * Return: 0 */ static int rescuer_thread(void *__rescuer) { struct worker *rescuer = __rescuer; struct workqueue_struct *wq = rescuer->rescue_wq; bool should_stop; set_user_nice(current, RESCUER_NICE_LEVEL); /* * Mark rescuer as worker too. As WORKER_PREP is never cleared, it * doesn't participate in concurrency management. */ set_pf_worker(true); repeat: set_current_state(TASK_IDLE); /* * By the time the rescuer is requested to stop, the workqueue * shouldn't have any work pending, but @wq->maydays may still have * pwq(s) queued. This can happen by non-rescuer workers consuming * all the work items before the rescuer got to them. Go through * @wq->maydays processing before acting on should_stop so that the * list is always empty on exit. */ should_stop = kthread_should_stop(); /* see whether any pwq is asking for help */ raw_spin_lock_irq(&wq_mayday_lock); while (!list_empty(&wq->maydays)) { struct pool_workqueue *pwq = list_first_entry(&wq->maydays, struct pool_workqueue, mayday_node); struct worker_pool *pool = pwq->pool; struct work_struct *work, *n; __set_current_state(TASK_RUNNING); list_del_init(&pwq->mayday_node); raw_spin_unlock_irq(&wq_mayday_lock); worker_attach_to_pool(rescuer, pool); raw_spin_lock_irq(&pool->lock); /* * Slurp in all works issued via this workqueue and * process'em. */ WARN_ON_ONCE(!list_empty(&rescuer->scheduled)); list_for_each_entry_safe(work, n, &pool->worklist, entry) { if (get_work_pwq(work) == pwq && assign_work(work, rescuer, &n)) pwq->stats[PWQ_STAT_RESCUED]++; } if (!list_empty(&rescuer->scheduled)) { process_scheduled_works(rescuer); /* * The above execution of rescued work items could * have created more to rescue through * pwq_activate_first_inactive() or chained * queueing. Let's put @pwq back on mayday list so * that such back-to-back work items, which may be * being used to relieve memory pressure, don't * incur MAYDAY_INTERVAL delay inbetween. */ if (pwq->nr_active && need_to_create_worker(pool)) { raw_spin_lock(&wq_mayday_lock); /* * Queue iff we aren't racing destruction * and somebody else hasn't queued it already. */ if (wq->rescuer && list_empty(&pwq->mayday_node)) { get_pwq(pwq); list_add_tail(&pwq->mayday_node, &wq->maydays); } raw_spin_unlock(&wq_mayday_lock); } } /* * Leave this pool. Notify regular workers; otherwise, we end up * with 0 concurrency and stalling the execution. */ kick_pool(pool); raw_spin_unlock_irq(&pool->lock); worker_detach_from_pool(rescuer); /* * Put the reference grabbed by send_mayday(). @pool might * go away any time after it. */ put_pwq_unlocked(pwq); raw_spin_lock_irq(&wq_mayday_lock); } raw_spin_unlock_irq(&wq_mayday_lock); if (should_stop) { __set_current_state(TASK_RUNNING); set_pf_worker(false); return 0; } /* rescuers should never participate in concurrency management */ WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); schedule(); goto repeat; } static void bh_worker(struct worker *worker) { struct worker_pool *pool = worker->pool; int nr_restarts = BH_WORKER_RESTARTS; unsigned long end = jiffies + BH_WORKER_JIFFIES; raw_spin_lock_irq(&pool->lock); worker_leave_idle(worker); /* * This function follows the structure of worker_thread(). See there for * explanations on each step. */ if (!need_more_worker(pool)) goto done; WARN_ON_ONCE(!list_empty(&worker->scheduled)); worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND); do { struct work_struct *work = list_first_entry(&pool->worklist, struct work_struct, entry); if (assign_work(work, worker, NULL)) process_scheduled_works(worker); } while (keep_working(pool) && --nr_restarts && time_before(jiffies, end)); worker_set_flags(worker, WORKER_PREP); done: worker_enter_idle(worker); kick_pool(pool); raw_spin_unlock_irq(&pool->lock); } /* * TODO: Convert all tasklet users to workqueue and use softirq directly. * * This is currently called from tasklet[_hi]action() and thus is also called * whenever there are tasklets to run. Let's do an early exit if there's nothing * queued. Once conversion from tasklet is complete, the need_more_worker() test * can be dropped. * * After full conversion, we'll add worker->softirq_action, directly use the * softirq action and obtain the worker pointer from the softirq_action pointer. */ void workqueue_softirq_action(bool highpri) { struct worker_pool *pool = &per_cpu(bh_worker_pools, smp_processor_id())[highpri]; if (need_more_worker(pool)) bh_worker(list_first_entry(&pool->workers, struct worker, node)); } struct wq_drain_dead_softirq_work { struct work_struct work; struct worker_pool *pool; struct completion done; }; static void drain_dead_softirq_workfn(struct work_struct *work) { struct wq_drain_dead_softirq_work *dead_work = container_of(work, struct wq_drain_dead_softirq_work, work); struct worker_pool *pool = dead_work->pool; bool repeat; /* * @pool's CPU is dead and we want to execute its still pending work * items from this BH work item which is running on a different CPU. As * its CPU is dead, @pool can't be kicked and, as work execution path * will be nested, a lockdep annotation needs to be suppressed. Mark * @pool with %POOL_BH_DRAINING for the special treatments. */ raw_spin_lock_irq(&pool->lock); pool->flags |= POOL_BH_DRAINING; raw_spin_unlock_irq(&pool->lock); bh_worker(list_first_entry(&pool->workers, struct worker, node)); raw_spin_lock_irq(&pool->lock); pool->flags &= ~POOL_BH_DRAINING; repeat = need_more_worker(pool); raw_spin_unlock_irq(&pool->lock); /* * bh_worker() might hit consecutive execution limit and bail. If there * still are pending work items, reschedule self and return so that we * don't hog this CPU's BH. */ if (repeat) { if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) queue_work(system_bh_highpri_wq, work); else queue_work(system_bh_wq, work); } else { complete(&dead_work->done); } } /* * @cpu is dead. Drain the remaining BH work items on the current CPU. It's * possible to allocate dead_work per CPU and avoid flushing. However, then we * have to worry about draining overlapping with CPU coming back online or * nesting (one CPU's dead_work queued on another CPU which is also dead and so * on). Let's keep it simple and drain them synchronously. These are BH work * items which shouldn't be requeued on the same pool. Shouldn't take long. */ void workqueue_softirq_dead(unsigned int cpu) { int i; for (i = 0; i < NR_STD_WORKER_POOLS; i++) { struct worker_pool *pool = &per_cpu(bh_worker_pools, cpu)[i]; struct wq_drain_dead_softirq_work dead_work; if (!need_more_worker(pool)) continue; INIT_WORK_ONSTACK(&dead_work.work, drain_dead_softirq_workfn); dead_work.pool = pool; init_completion(&dead_work.done); if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) queue_work(system_bh_highpri_wq, &dead_work.work); else queue_work(system_bh_wq, &dead_work.work); wait_for_completion(&dead_work.done); destroy_work_on_stack(&dead_work.work); } } /** * check_flush_dependency - check for flush dependency sanity * @target_wq: workqueue being flushed * @target_work: work item being flushed (NULL for workqueue flushes) * @from_cancel: are we called from the work cancel path * * %current is trying to flush the whole @target_wq or @target_work on it. * If this is not the cancel path (which implies work being flushed is either * already running, or will not be at all), check if @target_wq doesn't have * %WQ_MEM_RECLAIM and verify that %current is not reclaiming memory or running * on a workqueue which doesn't have %WQ_MEM_RECLAIM as that can break forward- * progress guarantee leading to a deadlock. */ static void check_flush_dependency(struct workqueue_struct *target_wq, struct work_struct *target_work, bool from_cancel) { work_func_t target_func; struct worker *worker; if (from_cancel || target_wq->flags & WQ_MEM_RECLAIM) return; worker = current_wq_worker(); target_func = target_work ? target_work->func : NULL; WARN_ONCE(current->flags & PF_MEMALLOC, "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps", current->pid, current->comm, target_wq->name, target_func); WARN_ONCE(worker && ((worker->current_pwq->wq->flags & (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM), "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps", worker->current_pwq->wq->name, worker->current_func, target_wq->name, target_func); } struct wq_barrier { struct work_struct work; struct completion done; struct task_struct *task; /* purely informational */ }; static void wq_barrier_func(struct work_struct *work) { struct wq_barrier *barr = container_of(work, struct wq_barrier, work); complete(&barr->done); } /** * insert_wq_barrier - insert a barrier work * @pwq: pwq to insert barrier into * @barr: wq_barrier to insert * @target: target work to attach @barr to * @worker: worker currently executing @target, NULL if @target is not executing * * @barr is linked to @target such that @barr is completed only after * @target finishes execution. Please note that the ordering * guarantee is observed only with respect to @target and on the local * cpu. * * Currently, a queued barrier can't be canceled. This is because * try_to_grab_pending() can't determine whether the work to be * grabbed is at the head of the queue and thus can't clear LINKED * flag of the previous work while there must be a valid next work * after a work with LINKED flag set. * * Note that when @worker is non-NULL, @target may be modified * underneath us, so we can't reliably determine pwq from @target. * * CONTEXT: * raw_spin_lock_irq(pool->lock). */ static void insert_wq_barrier(struct pool_workqueue *pwq, struct wq_barrier *barr, struct work_struct *target, struct worker *worker) { static __maybe_unused struct lock_class_key bh_key, thr_key; unsigned int work_flags = 0; unsigned int work_color; struct list_head *head; /* * debugobject calls are safe here even with pool->lock locked * as we know for sure that this will not trigger any of the * checks and call back into the fixup functions where we * might deadlock. * * BH and threaded workqueues need separate lockdep keys to avoid * spuriously triggering "inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} * usage". */ INIT_WORK_ONSTACK_KEY(&barr->work, wq_barrier_func, (pwq->wq->flags & WQ_BH) ? &bh_key : &thr_key); __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); init_completion_map(&barr->done, &target->lockdep_map); barr->task = current; /* The barrier work item does not participate in nr_active. */ work_flags |= WORK_STRUCT_INACTIVE; /* * If @target is currently being executed, schedule the * barrier to the worker; otherwise, put it after @target. */ if (worker) { head = worker->scheduled.next; work_color = worker->current_color; } else { unsigned long *bits = work_data_bits(target); head = target->entry.next; /* there can already be other linked works, inherit and set */ work_flags |= *bits & WORK_STRUCT_LINKED; work_color = get_work_color(*bits); __set_bit(WORK_STRUCT_LINKED_BIT, bits); } pwq->nr_in_flight[work_color]++; work_flags |= work_color_to_flags(work_color); insert_work(pwq, &barr->work, head, work_flags); } /** * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing * @wq: workqueue being flushed * @flush_color: new flush color, < 0 for no-op * @work_color: new work color, < 0 for no-op * * Prepare pwqs for workqueue flushing. * * If @flush_color is non-negative, flush_color on all pwqs should be * -1. If no pwq has in-flight commands at the specified color, all * pwq->flush_color's stay at -1 and %false is returned. If any pwq * has in flight commands, its pwq->flush_color is set to * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq * wakeup logic is armed and %true is returned. * * The caller should have initialized @wq->first_flusher prior to * calling this function with non-negative @flush_color. If * @flush_color is negative, no flush color update is done and %false * is returned. * * If @work_color is non-negative, all pwqs should have the same * work_color which is previous to @work_color and all will be * advanced to @work_color. * * CONTEXT: * mutex_lock(wq->mutex). * * Return: * %true if @flush_color >= 0 and there's something to flush. %false * otherwise. */ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq, int flush_color, int work_color) { bool wait = false; struct pool_workqueue *pwq; struct worker_pool *current_pool = NULL; if (flush_color >= 0) { WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush)); atomic_set(&wq->nr_pwqs_to_flush, 1); } /* * For unbound workqueue, pwqs will map to only a few pools. * Most of the time, pwqs within the same pool will be linked * sequentially to wq->pwqs by cpu index. So in the majority * of pwq iters, the pool is the same, only doing lock/unlock * if the pool has changed. This can largely reduce expensive * lock operations. */ for_each_pwq(pwq, wq) { if (current_pool != pwq->pool) { if (likely(current_pool)) raw_spin_unlock_irq(&current_pool->lock); current_pool = pwq->pool; raw_spin_lock_irq(&current_pool->lock); } if (flush_color >= 0) { WARN_ON_ONCE(pwq->flush_color != -1); if (pwq->nr_in_flight[flush_color]) { pwq->flush_color = flush_color; atomic_inc(&wq->nr_pwqs_to_flush); wait = true; } } if (work_color >= 0) { WARN_ON_ONCE(work_color != work_next_color(pwq->work_color)); pwq->work_color = work_color; } } if (current_pool) raw_spin_unlock_irq(&current_pool->lock); if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush)) complete(&wq->first_flusher->done); return wait; } static void touch_wq_lockdep_map(struct workqueue_struct *wq) { #ifdef CONFIG_LOCKDEP if (unlikely(!wq->lockdep_map)) return; if (wq->flags & WQ_BH) local_bh_disable(); lock_map_acquire(wq->lockdep_map); lock_map_release(wq->lockdep_map); if (wq->flags & WQ_BH) local_bh_enable(); #endif } static void touch_work_lockdep_map(struct work_struct *work, struct workqueue_struct *wq) { #ifdef CONFIG_LOCKDEP if (wq->flags & WQ_BH) local_bh_disable(); lock_map_acquire(&work->lockdep_map); lock_map_release(&work->lockdep_map); if (wq->flags & WQ_BH) local_bh_enable(); #endif } /** * __flush_workqueue - ensure that any scheduled work has run to completion. * @wq: workqueue to flush * * This function sleeps until all work items which were queued on entry * have finished execution, but it is not livelocked by new incoming ones. */ void __flush_workqueue(struct workqueue_struct *wq) { struct wq_flusher this_flusher = { .list = LIST_HEAD_INIT(this_flusher.list), .flush_color = -1, .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, (*wq->lockdep_map)), }; int next_color; if (WARN_ON(!wq_online)) return; touch_wq_lockdep_map(wq); mutex_lock(&wq->mutex); /* * Start-to-wait phase */ next_color = work_next_color(wq->work_color); if (next_color != wq->flush_color) { /* * Color space is not full. The current work_color * becomes our flush_color and work_color is advanced * by one. */ WARN_ON_ONCE(!list_empty(&wq->flusher_overflow)); this_flusher.flush_color = wq->work_color; wq->work_color = next_color; if (!wq->first_flusher) { /* no flush in progress, become the first flusher */ WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color); wq->first_flusher = &this_flusher; if (!flush_workqueue_prep_pwqs(wq, wq->flush_color, wq->work_color)) { /* nothing to flush, done */ wq->flush_color = next_color; wq->first_flusher = NULL; goto out_unlock; } } else { /* wait in queue */ WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color); list_add_tail(&this_flusher.list, &wq->flusher_queue); flush_workqueue_prep_pwqs(wq, -1, wq->work_color); } } else { /* * Oops, color space is full, wait on overflow queue. * The next flush completion will assign us * flush_color and transfer to flusher_queue. */ list_add_tail(&this_flusher.list, &wq->flusher_overflow); } check_flush_dependency(wq, NULL, false); mutex_unlock(&wq->mutex); wait_for_completion(&this_flusher.done); /* * Wake-up-and-cascade phase * * First flushers are responsible for cascading flushes and * handling overflow. Non-first flushers can simply return. */ if (READ_ONCE(wq->first_flusher) != &this_flusher) return; mutex_lock(&wq->mutex); /* we might have raced, check again with mutex held */ if (wq->first_flusher != &this_flusher) goto out_unlock; WRITE_ONCE(wq->first_flusher, NULL); WARN_ON_ONCE(!list_empty(&this_flusher.list)); WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color); while (true) { struct wq_flusher *next, *tmp; /* complete all the flushers sharing the current flush color */ list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) { if (next->flush_color != wq->flush_color) break; list_del_init(&next->list); complete(&next->done); } WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) && wq->flush_color != work_next_color(wq->work_color)); /* this flush_color is finished, advance by one */ wq->flush_color = work_next_color(wq->flush_color); /* one color has been freed, handle overflow queue */ if (!list_empty(&wq->flusher_overflow)) { /* * Assign the same color to all overflowed * flushers, advance work_color and append to * flusher_queue. This is the start-to-wait * phase for these overflowed flushers. */ list_for_each_entry(tmp, &wq->flusher_overflow, list) tmp->flush_color = wq->work_color; wq->work_color = work_next_color(wq->work_color); list_splice_tail_init(&wq->flusher_overflow, &wq->flusher_queue); flush_workqueue_prep_pwqs(wq, -1, wq->work_color); } if (list_empty(&wq->flusher_queue)) { WARN_ON_ONCE(wq->flush_color != wq->work_color); break; } /* * Need to flush more colors. Make the next flusher * the new first flusher and arm pwqs. */ WARN_ON_ONCE(wq->flush_color == wq->work_color); WARN_ON_ONCE(wq->flush_color != next->flush_color); list_del_init(&next->list); wq->first_flusher = next; if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1)) break; /* * Meh... this color is already done, clear first * flusher and repeat cascading. */ wq->first_flusher = NULL; } out_unlock: mutex_unlock(&wq->mutex); } EXPORT_SYMBOL(__flush_workqueue); /** * drain_workqueue - drain a workqueue * @wq: workqueue to drain * * Wait until the workqueue becomes empty. While draining is in progress, * only chain queueing is allowed. IOW, only currently pending or running * work items on @wq can queue further work items on it. @wq is flushed * repeatedly until it becomes empty. The number of flushing is determined * by the depth of chaining and should be relatively short. Whine if it * takes too long. */ void drain_workqueue(struct workqueue_struct *wq) { unsigned int flush_cnt = 0; struct pool_workqueue *pwq; /* * __queue_work() needs to test whether there are drainers, is much * hotter than drain_workqueue() and already looks at @wq->flags. * Use __WQ_DRAINING so that queue doesn't have to check nr_drainers. */ mutex_lock(&wq->mutex); if (!wq->nr_drainers++) wq->flags |= __WQ_DRAINING; mutex_unlock(&wq->mutex); reflush: __flush_workqueue(wq); mutex_lock(&wq->mutex); for_each_pwq(pwq, wq) { bool drained; raw_spin_lock_irq(&pwq->pool->lock); drained = pwq_is_empty(pwq); raw_spin_unlock_irq(&pwq->pool->lock); if (drained) continue; if (++flush_cnt == 10 || (flush_cnt % 100 == 0 && flush_cnt <= 1000)) pr_warn("workqueue %s: %s() isn't complete after %u tries\n", wq->name, __func__, flush_cnt); mutex_unlock(&wq->mutex); goto reflush; } if (!--wq->nr_drainers) wq->flags &= ~__WQ_DRAINING; mutex_unlock(&wq->mutex); } EXPORT_SYMBOL_GPL(drain_workqueue); static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, bool from_cancel) { struct worker *worker = NULL; struct worker_pool *pool; struct pool_workqueue *pwq; struct workqueue_struct *wq; rcu_read_lock(); pool = get_work_pool(work); if (!pool) { rcu_read_unlock(); return false; } raw_spin_lock_irq(&pool->lock); /* see the comment in try_to_grab_pending() with the same code */ pwq = get_work_pwq(work); if (pwq) { if (unlikely(pwq->pool != pool)) goto already_gone; } else { worker = find_worker_executing_work(pool, work); if (!worker) goto already_gone; pwq = worker->current_pwq; } wq = pwq->wq; check_flush_dependency(wq, work, from_cancel); insert_wq_barrier(pwq, barr, work, worker); raw_spin_unlock_irq(&pool->lock); touch_work_lockdep_map(work, wq); /* * Force a lock recursion deadlock when using flush_work() inside a * single-threaded or rescuer equipped workqueue. * * For single threaded workqueues the deadlock happens when the work * is after the work issuing the flush_work(). For rescuer equipped * workqueues the deadlock happens when the rescuer stalls, blocking * forward progress. */ if (!from_cancel && (wq->saved_max_active == 1 || wq->rescuer)) touch_wq_lockdep_map(wq); rcu_read_unlock(); return true; already_gone: raw_spin_unlock_irq(&pool->lock); rcu_read_unlock(); return false; } static bool __flush_work(struct work_struct *work, bool from_cancel) { struct wq_barrier barr; if (WARN_ON(!wq_online)) return false; if (WARN_ON(!work->func)) return false; if (!start_flush_work(work, &barr, from_cancel)) return false; /* * start_flush_work() returned %true. If @from_cancel is set, we know * that @work must have been executing during start_flush_work() and * can't currently be queued. Its data must contain OFFQ bits. If @work * was queued on a BH workqueue, we also know that it was running in the * BH context and thus can be busy-waited. */ if (from_cancel) { unsigned long data = *work_data_bits(work); if (!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) { /* * On RT, prevent a live lock when %current preempted * soft interrupt processing or prevents ksoftirqd from * running by keeping flipping BH. If the BH work item * runs on a different CPU then this has no effect other * than doing the BH disable/enable dance for nothing. * This is copied from * kernel/softirq.c::tasklet_unlock_spin_wait(). */ while (!try_wait_for_completion(&barr.done)) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) { local_bh_disable(); local_bh_enable(); } else { cpu_relax(); } } goto out_destroy; } } wait_for_completion(&barr.done); out_destroy: destroy_work_on_stack(&barr.work); return true; } /** * flush_work - wait for a work to finish executing the last queueing instance * @work: the work to flush * * Wait until @work has finished execution. @work is guaranteed to be idle * on return if it hasn't been requeued since flush started. * * Return: * %true if flush_work() waited for the work to finish execution, * %false if it was already idle. */ bool flush_work(struct work_struct *work) { might_sleep(); return __flush_work(work, false); } EXPORT_SYMBOL_GPL(flush_work); /** * flush_delayed_work - wait for a dwork to finish executing the last queueing * @dwork: the delayed work to flush * * Delayed timer is cancelled and the pending work is queued for * immediate execution. Like flush_work(), this function only * considers the last queueing instance of @dwork. * * Return: * %true if flush_work() waited for the work to finish execution, * %false if it was already idle. */ bool flush_delayed_work(struct delayed_work *dwork) { local_irq_disable(); if (timer_delete_sync(&dwork->timer)) __queue_work(dwork->cpu, dwork->wq, &dwork->work); local_irq_enable(); return flush_work(&dwork->work); } EXPORT_SYMBOL(flush_delayed_work); /** * flush_rcu_work - wait for a rwork to finish executing the last queueing * @rwork: the rcu work to flush * * Return: * %true if flush_rcu_work() waited for the work to finish execution, * %false if it was already idle. */ bool flush_rcu_work(struct rcu_work *rwork) { if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) { rcu_barrier(); flush_work(&rwork->work); return true; } else { return flush_work(&rwork->work); } } EXPORT_SYMBOL(flush_rcu_work); static void work_offqd_disable(struct work_offq_data *offqd) { const unsigned long max = (1lu << WORK_OFFQ_DISABLE_BITS) - 1; if (likely(offqd->disable < max)) offqd->disable++; else WARN_ONCE(true, "workqueue: work disable count overflowed\n"); } static void work_offqd_enable(struct work_offq_data *offqd) { if (likely(offqd->disable > 0)) offqd->disable--; else WARN_ONCE(true, "workqueue: work disable count underflowed\n"); } static bool __cancel_work(struct work_struct *work, u32 cflags) { struct work_offq_data offqd; unsigned long irq_flags; int ret; ret = work_grab_pending(work, cflags, &irq_flags); work_offqd_unpack(&offqd, *work_data_bits(work)); if (cflags & WORK_CANCEL_DISABLE) work_offqd_disable(&offqd); set_work_pool_and_clear_pending(work, offqd.pool_id, work_offqd_pack_flags(&offqd)); local_irq_restore(irq_flags); return ret; } static bool __cancel_work_sync(struct work_struct *work, u32 cflags) { bool ret; ret = __cancel_work(work, cflags | WORK_CANCEL_DISABLE); if (*work_data_bits(work) & WORK_OFFQ_BH) WARN_ON_ONCE(in_hardirq()); else might_sleep(); /* * Skip __flush_work() during early boot when we know that @work isn't * executing. This allows canceling during early boot. */ if (wq_online) __flush_work(work, true); if (!(cflags & WORK_CANCEL_DISABLE)) enable_work(work); return ret; } /* * See cancel_delayed_work() */ bool cancel_work(struct work_struct *work) { return __cancel_work(work, 0); } EXPORT_SYMBOL(cancel_work); /** * cancel_work_sync - cancel a work and wait for it to finish * @work: the work to cancel * * Cancel @work and wait for its execution to finish. This function can be used * even if the work re-queues itself or migrates to another workqueue. On return * from this function, @work is guaranteed to be not pending or executing on any * CPU as long as there aren't racing enqueues. * * cancel_work_sync(&delayed_work->work) must not be used for delayed_work's. * Use cancel_delayed_work_sync() instead. * * Must be called from a sleepable context if @work was last queued on a non-BH * workqueue. Can also be called from non-hardirq atomic contexts including BH * if @work was last queued on a BH workqueue. * * Returns %true if @work was pending, %false otherwise. */ bool cancel_work_sync(struct work_struct *work) { return __cancel_work_sync(work, 0); } EXPORT_SYMBOL_GPL(cancel_work_sync); /** * cancel_delayed_work - cancel a delayed work * @dwork: delayed_work to cancel * * Kill off a pending delayed_work. * * Return: %true if @dwork was pending and canceled; %false if it wasn't * pending. * * Note: * The work callback function may still be running on return, unless * it returns %true and the work doesn't re-arm itself. Explicitly flush or * use cancel_delayed_work_sync() to wait on it. * * This function is safe to call from any context including IRQ handler. */ bool cancel_delayed_work(struct delayed_work *dwork) { return __cancel_work(&dwork->work, WORK_CANCEL_DELAYED); } EXPORT_SYMBOL(cancel_delayed_work); /** * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish * @dwork: the delayed work cancel * * This is cancel_work_sync() for delayed works. * * Return: * %true if @dwork was pending, %false otherwise. */ bool cancel_delayed_work_sync(struct delayed_work *dwork) { return __cancel_work_sync(&dwork->work, WORK_CANCEL_DELAYED); } EXPORT_SYMBOL(cancel_delayed_work_sync); /** * disable_work - Disable and cancel a work item * @work: work item to disable * * Disable @work by incrementing its disable count and cancel it if currently * pending. As long as the disable count is non-zero, any attempt to queue @work * will fail and return %false. The maximum supported disable depth is 2 to the * power of %WORK_OFFQ_DISABLE_BITS, currently 65536. * * Can be called from any context. Returns %true if @work was pending, %false * otherwise. */ bool disable_work(struct work_struct *work) { return __cancel_work(work, WORK_CANCEL_DISABLE); } EXPORT_SYMBOL_GPL(disable_work); /** * disable_work_sync - Disable, cancel and drain a work item * @work: work item to disable * * Similar to disable_work() but also wait for @work to finish if currently * executing. * * Must be called from a sleepable context if @work was last queued on a non-BH * workqueue. Can also be called from non-hardirq atomic contexts including BH * if @work was last queued on a BH workqueue. * * Returns %true if @work was pending, %false otherwise. */ bool disable_work_sync(struct work_struct *work) { return __cancel_work_sync(work, WORK_CANCEL_DISABLE); } EXPORT_SYMBOL_GPL(disable_work_sync); /** * enable_work - Enable a work item * @work: work item to enable * * Undo disable_work[_sync]() by decrementing @work's disable count. @work can * only be queued if its disable count is 0. * * Can be called from any context. Returns %true if the disable count reached 0. * Otherwise, %false. */ bool enable_work(struct work_struct *work) { struct work_offq_data offqd; unsigned long irq_flags; work_grab_pending(work, 0, &irq_flags); work_offqd_unpack(&offqd, *work_data_bits(work)); work_offqd_enable(&offqd); set_work_pool_and_clear_pending(work, offqd.pool_id, work_offqd_pack_flags(&offqd)); local_irq_restore(irq_flags); return !offqd.disable; } EXPORT_SYMBOL_GPL(enable_work); /** * disable_delayed_work - Disable and cancel a delayed work item * @dwork: delayed work item to disable * * disable_work() for delayed work items. */ bool disable_delayed_work(struct delayed_work *dwork) { return __cancel_work(&dwork->work, WORK_CANCEL_DELAYED | WORK_CANCEL_DISABLE); } EXPORT_SYMBOL_GPL(disable_delayed_work); /** * disable_delayed_work_sync - Disable, cancel and drain a delayed work item * @dwork: delayed work item to disable * * disable_work_sync() for delayed work items. */ bool disable_delayed_work_sync(struct delayed_work *dwork) { return __cancel_work_sync(&dwork->work, WORK_CANCEL_DELAYED | WORK_CANCEL_DISABLE); } EXPORT_SYMBOL_GPL(disable_delayed_work_sync); /** * enable_delayed_work - Enable a delayed work item * @dwork: delayed work item to enable * * enable_work() for delayed work items. */ bool enable_delayed_work(struct delayed_work *dwork) { return enable_work(&dwork->work); } EXPORT_SYMBOL_GPL(enable_delayed_work); /** * schedule_on_each_cpu - execute a function synchronously on each online CPU * @func: the function to call * * schedule_on_each_cpu() executes @func on each online CPU using the * system workqueue and blocks until all CPUs have completed. * schedule_on_each_cpu() is very slow. * * Return: * 0 on success, -errno on failure. */ int schedule_on_each_cpu(work_func_t func) { int cpu; struct work_struct __percpu *works; works = alloc_percpu(struct work_struct); if (!works) return -ENOMEM; cpus_read_lock(); for_each_online_cpu(cpu) { struct work_struct *work = per_cpu_ptr(works, cpu); INIT_WORK(work, func); schedule_work_on(cpu, work); } for_each_online_cpu(cpu) flush_work(per_cpu_ptr(works, cpu)); cpus_read_unlock(); free_percpu(works); return 0; } /** * execute_in_process_context - reliably execute the routine with user context * @fn: the function to execute * @ew: guaranteed storage for the execute work structure (must * be available when the work executes) * * Executes the function immediately if process context is available, * otherwise schedules the function for delayed execution. * * Return: 0 - function was executed * 1 - function was scheduled for execution */ int execute_in_process_context(work_func_t fn, struct execute_work *ew) { if (!in_interrupt()) { fn(&ew->work); return 0; } INIT_WORK(&ew->work, fn); schedule_work(&ew->work); return 1; } EXPORT_SYMBOL_GPL(execute_in_process_context); /** * free_workqueue_attrs - free a workqueue_attrs * @attrs: workqueue_attrs to free * * Undo alloc_workqueue_attrs(). */ void free_workqueue_attrs(struct workqueue_attrs *attrs) { if (attrs) { free_cpumask_var(attrs->cpumask); free_cpumask_var(attrs->__pod_cpumask); kfree(attrs); } } /** * alloc_workqueue_attrs - allocate a workqueue_attrs * * Allocate a new workqueue_attrs, initialize with default settings and * return it. * * Return: The allocated new workqueue_attr on success. %NULL on failure. */ struct workqueue_attrs *alloc_workqueue_attrs_noprof(void) { struct workqueue_attrs *attrs; attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); if (!attrs) goto fail; if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL)) goto fail; if (!alloc_cpumask_var(&attrs->__pod_cpumask, GFP_KERNEL)) goto fail; cpumask_copy(attrs->cpumask, cpu_possible_mask); attrs->affn_scope = WQ_AFFN_DFL; return attrs; fail: free_workqueue_attrs(attrs); return NULL; } static void copy_workqueue_attrs(struct workqueue_attrs *to, const struct workqueue_attrs *from) { to->nice = from->nice; cpumask_copy(to->cpumask, from->cpumask); cpumask_copy(to->__pod_cpumask, from->__pod_cpumask); to->affn_strict = from->affn_strict; /* * Unlike hash and equality test, copying shouldn't ignore wq-only * fields as copying is used for both pool and wq attrs. Instead, * get_unbound_pool() explicitly clears the fields. */ to->affn_scope = from->affn_scope; to->ordered = from->ordered; } /* * Some attrs fields are workqueue-only. Clear them for worker_pool's. See the * comments in 'struct workqueue_attrs' definition. */ static void wqattrs_clear_for_pool(struct workqueue_attrs *attrs) { attrs->affn_scope = WQ_AFFN_NR_TYPES; attrs->ordered = false; if (attrs->affn_strict) cpumask_copy(attrs->cpumask, cpu_possible_mask); } /* hash value of the content of @attr */ static u32 wqattrs_hash(const struct workqueue_attrs *attrs) { u32 hash = 0; hash = jhash_1word(attrs->nice, hash); hash = jhash_1word(attrs->affn_strict, hash); hash = jhash(cpumask_bits(attrs->__pod_cpumask), BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash); if (!attrs->affn_strict) hash = jhash(cpumask_bits(attrs->cpumask), BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash); return hash; } /* content equality test */ static bool wqattrs_equal(const struct workqueue_attrs *a, const struct workqueue_attrs *b) { if (a->nice != b->nice) return false; if (a->affn_strict != b->affn_strict) return false; if (!cpumask_equal(a->__pod_cpumask, b->__pod_cpumask)) return false; if (!a->affn_strict && !cpumask_equal(a->cpumask, b->cpumask)) return false; return true; } /* Update @attrs with actually available CPUs */ static void wqattrs_actualize_cpumask(struct workqueue_attrs *attrs, const cpumask_t *unbound_cpumask) { /* * Calculate the effective CPU mask of @attrs given @unbound_cpumask. If * @attrs->cpumask doesn't overlap with @unbound_cpumask, we fallback to * @unbound_cpumask. */ cpumask_and(attrs->cpumask, attrs->cpumask, unbound_cpumask); if (unlikely(cpumask_empty(attrs->cpumask))) cpumask_copy(attrs->cpumask, unbound_cpumask); } /* find wq_pod_type to use for @attrs */ static const struct wq_pod_type * wqattrs_pod_type(const struct workqueue_attrs *attrs) { enum wq_affn_scope scope; struct wq_pod_type *pt; /* to synchronize access to wq_affn_dfl */ lockdep_assert_held(&wq_pool_mutex); if (attrs->affn_scope == WQ_AFFN_DFL) scope = wq_affn_dfl; else scope = attrs->affn_scope; pt = &wq_pod_types[scope]; if (!WARN_ON_ONCE(attrs->affn_scope == WQ_AFFN_NR_TYPES) && likely(pt->nr_pods)) return pt; /* * Before workqueue_init_topology(), only SYSTEM is available which is * initialized in workqueue_init_early(). */ pt = &wq_pod_types[WQ_AFFN_SYSTEM]; BUG_ON(!pt->nr_pods); return pt; } /** * init_worker_pool - initialize a newly zalloc'd worker_pool * @pool: worker_pool to initialize * * Initialize a newly zalloc'd @pool. It also allocates @pool->attrs. * * Return: 0 on success, -errno on failure. Even on failure, all fields * inside @pool proper are initialized and put_unbound_pool() can be called * on @pool safely to release it. */ static int init_worker_pool(struct worker_pool *pool) { raw_spin_lock_init(&pool->lock); pool->id = -1; pool->cpu = -1; pool->node = NUMA_NO_NODE; pool->flags |= POOL_DISASSOCIATED; pool->watchdog_ts = jiffies; INIT_LIST_HEAD(&pool->worklist); INIT_LIST_HEAD(&pool->idle_list); hash_init(pool->busy_hash); timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE); INIT_WORK(&pool->idle_cull_work, idle_cull_fn); timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0); INIT_LIST_HEAD(&pool->workers); ida_init(&pool->worker_ida); INIT_HLIST_NODE(&pool->hash_node); pool->refcnt = 1; /* shouldn't fail above this point */ pool->attrs = alloc_workqueue_attrs(); if (!pool->attrs) return -ENOMEM; wqattrs_clear_for_pool(pool->attrs); return 0; } #ifdef CONFIG_LOCKDEP static void wq_init_lockdep(struct workqueue_struct *wq) { char *lock_name; lockdep_register_key(&wq->key); lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name); if (!lock_name) lock_name = wq->name; wq->lock_name = lock_name; wq->lockdep_map = &wq->__lockdep_map; lockdep_init_map(wq->lockdep_map, lock_name, &wq->key, 0); } static void wq_unregister_lockdep(struct workqueue_struct *wq) { if (wq->lockdep_map != &wq->__lockdep_map) return; lockdep_unregister_key(&wq->key); } static void wq_free_lockdep(struct workqueue_struct *wq) { if (wq->lockdep_map != &wq->__lockdep_map) return; if (wq->lock_name != wq->name) kfree(wq->lock_name); } #else static void wq_init_lockdep(struct workqueue_struct *wq) { } static void wq_unregister_lockdep(struct workqueue_struct *wq) { } static void wq_free_lockdep(struct workqueue_struct *wq) { } #endif static void free_node_nr_active(struct wq_node_nr_active **nna_ar) { int node; for_each_node(node) { kfree(nna_ar[node]); nna_ar[node] = NULL; } kfree(nna_ar[nr_node_ids]); nna_ar[nr_node_ids] = NULL; } static void init_node_nr_active(struct wq_node_nr_active *nna) { nna->max = WQ_DFL_MIN_ACTIVE; atomic_set(&nna->nr, 0); raw_spin_lock_init(&nna->lock); INIT_LIST_HEAD(&nna->pending_pwqs); } /* * Each node's nr_active counter will be accessed mostly from its own node and * should be allocated in the node. */ static int alloc_node_nr_active(struct wq_node_nr_active **nna_ar) { struct wq_node_nr_active *nna; int node; for_each_node(node) { nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, node); if (!nna) goto err_free; init_node_nr_active(nna); nna_ar[node] = nna; } /* [nr_node_ids] is used as the fallback */ nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, NUMA_NO_NODE); if (!nna) goto err_free; init_node_nr_active(nna); nna_ar[nr_node_ids] = nna; return 0; err_free: free_node_nr_active(nna_ar); return -ENOMEM; } static void rcu_free_wq(struct rcu_head *rcu) { struct workqueue_struct *wq = container_of(rcu, struct workqueue_struct, rcu); if (wq->flags & WQ_UNBOUND) free_node_nr_active(wq->node_nr_active); wq_free_lockdep(wq); free_percpu(wq->cpu_pwq); free_workqueue_attrs(wq->unbound_attrs); kfree(wq); } static void rcu_free_pool(struct rcu_head *rcu) { struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu); ida_destroy(&pool->worker_ida); free_workqueue_attrs(pool->attrs); kfree(pool); } /** * put_unbound_pool - put a worker_pool * @pool: worker_pool to put * * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU * safe manner. get_unbound_pool() calls this function on its failure path * and this function should be able to release pools which went through, * successfully or not, init_worker_pool(). * * Should be called with wq_pool_mutex held. */ static void put_unbound_pool(struct worker_pool *pool) { struct worker *worker; LIST_HEAD(cull_list); lockdep_assert_held(&wq_pool_mutex); if (--pool->refcnt) return; /* sanity checks */ if (WARN_ON(!(pool->cpu < 0)) || WARN_ON(!list_empty(&pool->worklist))) return; /* release id and unhash */ if (pool->id >= 0) idr_remove(&worker_pool_idr, pool->id); hash_del(&pool->hash_node); /* * Become the manager and destroy all workers. This prevents * @pool's workers from blocking on attach_mutex. We're the last * manager and @pool gets freed with the flag set. * * Having a concurrent manager is quite unlikely to happen as we can * only get here with * pwq->refcnt == pool->refcnt == 0 * which implies no work queued to the pool, which implies no worker can * become the manager. However a worker could have taken the role of * manager before the refcnts dropped to 0, since maybe_create_worker() * drops pool->lock */ while (true) { rcuwait_wait_event(&manager_wait, !(pool->flags & POOL_MANAGER_ACTIVE), TASK_UNINTERRUPTIBLE); mutex_lock(&wq_pool_attach_mutex); raw_spin_lock_irq(&pool->lock); if (!(pool->flags & POOL_MANAGER_ACTIVE)) { pool->flags |= POOL_MANAGER_ACTIVE; break; } raw_spin_unlock_irq(&pool->lock); mutex_unlock(&wq_pool_attach_mutex); } while ((worker = first_idle_worker(pool))) set_worker_dying(worker, &cull_list); WARN_ON(pool->nr_workers || pool->nr_idle); raw_spin_unlock_irq(&pool->lock); detach_dying_workers(&cull_list); mutex_unlock(&wq_pool_attach_mutex); reap_dying_workers(&cull_list); /* shut down the timers */ timer_delete_sync(&pool->idle_timer); cancel_work_sync(&pool->idle_cull_work); timer_delete_sync(&pool->mayday_timer); /* RCU protected to allow dereferences from get_work_pool() */ call_rcu(&pool->rcu, rcu_free_pool); } /** * get_unbound_pool - get a worker_pool with the specified attributes * @attrs: the attributes of the worker_pool to get * * Obtain a worker_pool which has the same attributes as @attrs, bump the * reference count and return it. If there already is a matching * worker_pool, it will be used; otherwise, this function attempts to * create a new one. * * Should be called with wq_pool_mutex held. * * Return: On success, a worker_pool with the same attributes as @attrs. * On failure, %NULL. */ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) { struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_NUMA]; u32 hash = wqattrs_hash(attrs); struct worker_pool *pool; int pod, node = NUMA_NO_NODE; lockdep_assert_held(&wq_pool_mutex); /* do we already have a matching pool? */ hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) { if (wqattrs_equal(pool->attrs, attrs)) { pool->refcnt++; return pool; } } /* If __pod_cpumask is contained inside a NUMA pod, that's our node */ for (pod = 0; pod < pt->nr_pods; pod++) { if (cpumask_subset(attrs->__pod_cpumask, pt->pod_cpus[pod])) { node = pt->pod_node[pod]; break; } } /* nope, create a new one */ pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, node); if (!pool || init_worker_pool(pool) < 0) goto fail; pool->node = node; copy_workqueue_attrs(pool->attrs, attrs); wqattrs_clear_for_pool(pool->attrs); if (worker_pool_assign_id(pool) < 0) goto fail; /* create and start the initial worker */ if (wq_online && !create_worker(pool)) goto fail; /* install */ hash_add(unbound_pool_hash, &pool->hash_node, hash); return pool; fail: if (pool) put_unbound_pool(pool); return NULL; } /* * Scheduled on pwq_release_worker by put_pwq() when an unbound pwq hits zero * refcnt and needs to be destroyed. */ static void pwq_release_workfn(struct kthread_work *work) { struct pool_workqueue *pwq = container_of(work, struct pool_workqueue, release_work); struct workqueue_struct *wq = pwq->wq; struct worker_pool *pool = pwq->pool; bool is_last = false; /* * When @pwq is not linked, it doesn't hold any reference to the * @wq, and @wq is invalid to access. */ if (!list_empty(&pwq->pwqs_node)) { mutex_lock(&wq->mutex); list_del_rcu(&pwq->pwqs_node); is_last = list_empty(&wq->pwqs); /* * For ordered workqueue with a plugged dfl_pwq, restart it now. */ if (!is_last && (wq->flags & __WQ_ORDERED)) unplug_oldest_pwq(wq); mutex_unlock(&wq->mutex); } if (wq->flags & WQ_UNBOUND) { mutex_lock(&wq_pool_mutex); put_unbound_pool(pool); mutex_unlock(&wq_pool_mutex); } if (!list_empty(&pwq->pending_node)) { struct wq_node_nr_active *nna = wq_node_nr_active(pwq->wq, pwq->pool->node); raw_spin_lock_irq(&nna->lock); list_del_init(&pwq->pending_node); raw_spin_unlock_irq(&nna->lock); } kfree_rcu(pwq, rcu); /* * If we're the last pwq going away, @wq is already dead and no one * is gonna access it anymore. Schedule RCU free. */ if (is_last) { wq_unregister_lockdep(wq); call_rcu(&wq->rcu, rcu_free_wq); } } /* initialize newly allocated @pwq which is associated with @wq and @pool */ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq, struct worker_pool *pool) { BUG_ON((unsigned long)pwq & ~WORK_STRUCT_PWQ_MASK); memset(pwq, 0, sizeof(*pwq)); pwq->pool = pool; pwq->wq = wq; pwq->flush_color = -1; pwq->refcnt = 1; INIT_LIST_HEAD(&pwq->inactive_works); INIT_LIST_HEAD(&pwq->pending_node); INIT_LIST_HEAD(&pwq->pwqs_node); INIT_LIST_HEAD(&pwq->mayday_node); kthread_init_work(&pwq->release_work, pwq_release_workfn); } /* sync @pwq with the current state of its associated wq and link it */ static void link_pwq(struct pool_workqueue *pwq) { struct workqueue_struct *wq = pwq->wq; lockdep_assert_held(&wq->mutex); /* may be called multiple times, ignore if already linked */ if (!list_empty(&pwq->pwqs_node)) return; /* set the matching work_color */ pwq->work_color = wq->work_color; /* link in @pwq */ list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs); } /* obtain a pool matching @attr and create a pwq associating the pool and @wq */ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq, const struct workqueue_attrs *attrs) { struct worker_pool *pool; struct pool_workqueue *pwq; lockdep_assert_held(&wq_pool_mutex); pool = get_unbound_pool(attrs); if (!pool) return NULL; pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node); if (!pwq) { put_unbound_pool(pool); return NULL; } init_pwq(pwq, wq, pool); return pwq; } static void apply_wqattrs_lock(void) { mutex_lock(&wq_pool_mutex); } static void apply_wqattrs_unlock(void) { mutex_unlock(&wq_pool_mutex); } /** * wq_calc_pod_cpumask - calculate a wq_attrs' cpumask for a pod * @attrs: the wq_attrs of the default pwq of the target workqueue * @cpu: the target CPU * * Calculate the cpumask a workqueue with @attrs should use on @pod. * The result is stored in @attrs->__pod_cpumask. * * If pod affinity is not enabled, @attrs->cpumask is always used. If enabled * and @pod has online CPUs requested by @attrs, the returned cpumask is the * intersection of the possible CPUs of @pod and @attrs->cpumask. * * The caller is responsible for ensuring that the cpumask of @pod stays stable. */ static void wq_calc_pod_cpumask(struct workqueue_attrs *attrs, int cpu) { const struct wq_pod_type *pt = wqattrs_pod_type(attrs); int pod = pt->cpu_pod[cpu]; /* calculate possible CPUs in @pod that @attrs wants */ cpumask_and(attrs->__pod_cpumask, pt->pod_cpus[pod], attrs->cpumask); /* does @pod have any online CPUs @attrs wants? */ if (!cpumask_intersects(attrs->__pod_cpumask, wq_online_cpumask)) { cpumask_copy(attrs->__pod_cpumask, attrs->cpumask); return; } } /* install @pwq into @wq and return the old pwq, @cpu < 0 for dfl_pwq */ static struct pool_workqueue *install_unbound_pwq(struct workqueue_struct *wq, int cpu, struct pool_workqueue *pwq) { struct pool_workqueue __rcu **slot = unbound_pwq_slot(wq, cpu); struct pool_workqueue *old_pwq; lockdep_assert_held(&wq_pool_mutex); lockdep_assert_held(&wq->mutex); /* link_pwq() can handle duplicate calls */ link_pwq(pwq); old_pwq = rcu_access_pointer(*slot); rcu_assign_pointer(*slot, pwq); return old_pwq; } /* context to store the prepared attrs & pwqs before applying */ struct apply_wqattrs_ctx { struct workqueue_struct *wq; /* target workqueue */ struct workqueue_attrs *attrs; /* attrs to apply */ struct list_head list; /* queued for batching commit */ struct pool_workqueue *dfl_pwq; struct pool_workqueue *pwq_tbl[]; }; /* free the resources after success or abort */ static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx) { if (ctx) { int cpu; for_each_possible_cpu(cpu) put_pwq_unlocked(ctx->pwq_tbl[cpu]); put_pwq_unlocked(ctx->dfl_pwq); free_workqueue_attrs(ctx->attrs); kfree(ctx); } } /* allocate the attrs and pwqs for later installation */ static struct apply_wqattrs_ctx * apply_wqattrs_prepare(struct workqueue_struct *wq, const struct workqueue_attrs *attrs, const cpumask_var_t unbound_cpumask) { struct apply_wqattrs_ctx *ctx; struct workqueue_attrs *new_attrs; int cpu; lockdep_assert_held(&wq_pool_mutex); if (WARN_ON(attrs->affn_scope < 0 || attrs->affn_scope >= WQ_AFFN_NR_TYPES)) return ERR_PTR(-EINVAL); ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_cpu_ids), GFP_KERNEL); new_attrs = alloc_workqueue_attrs(); if (!ctx || !new_attrs) goto out_free; /* * If something goes wrong during CPU up/down, we'll fall back to * the default pwq covering whole @attrs->cpumask. Always create * it even if we don't use it immediately. */ copy_workqueue_attrs(new_attrs, attrs); wqattrs_actualize_cpumask(new_attrs, unbound_cpumask); cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask); ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs); if (!ctx->dfl_pwq) goto out_free; for_each_possible_cpu(cpu) { if (new_attrs->ordered) { ctx->dfl_pwq->refcnt++; ctx->pwq_tbl[cpu] = ctx->dfl_pwq; } else { wq_calc_pod_cpumask(new_attrs, cpu); ctx->pwq_tbl[cpu] = alloc_unbound_pwq(wq, new_attrs); if (!ctx->pwq_tbl[cpu]) goto out_free; } } /* save the user configured attrs and sanitize it. */ copy_workqueue_attrs(new_attrs, attrs); cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask); cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask); ctx->attrs = new_attrs; /* * For initialized ordered workqueues, there should only be one pwq * (dfl_pwq). Set the plugged flag of ctx->dfl_pwq to suspend execution * of newly queued work items until execution of older work items in * the old pwq's have completed. */ if ((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)) ctx->dfl_pwq->plugged = true; ctx->wq = wq; return ctx; out_free: free_workqueue_attrs(new_attrs); apply_wqattrs_cleanup(ctx); return ERR_PTR(-ENOMEM); } /* set attrs and install prepared pwqs, @ctx points to old pwqs on return */ static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx) { int cpu; /* all pwqs have been created successfully, let's install'em */ mutex_lock(&ctx->wq->mutex); copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs); /* save the previous pwqs and install the new ones */ for_each_possible_cpu(cpu) ctx->pwq_tbl[cpu] = install_unbound_pwq(ctx->wq, cpu, ctx->pwq_tbl[cpu]); ctx->dfl_pwq = install_unbound_pwq(ctx->wq, -1, ctx->dfl_pwq); /* update node_nr_active->max */ wq_update_node_max_active(ctx->wq, -1); /* rescuer needs to respect wq cpumask changes */ if (ctx->wq->rescuer) set_cpus_allowed_ptr(ctx->wq->rescuer->task, unbound_effective_cpumask(ctx->wq)); mutex_unlock(&ctx->wq->mutex); } static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, const struct workqueue_attrs *attrs) { struct apply_wqattrs_ctx *ctx; /* only unbound workqueues can change attributes */ if (WARN_ON(!(wq->flags & WQ_UNBOUND))) return -EINVAL; ctx = apply_wqattrs_prepare(wq, attrs, wq_unbound_cpumask); if (IS_ERR(ctx)) return PTR_ERR(ctx); /* the ctx has been prepared successfully, let's commit it */ apply_wqattrs_commit(ctx); apply_wqattrs_cleanup(ctx); return 0; } /** * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue * @wq: the target workqueue * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs() * * Apply @attrs to an unbound workqueue @wq. Unless disabled, this function maps * a separate pwq to each CPU pod with possibles CPUs in @attrs->cpumask so that * work items are affine to the pod it was issued on. Older pwqs are released as * in-flight work items finish. Note that a work item which repeatedly requeues * itself back-to-back will stay on its current pwq. * * Performs GFP_KERNEL allocations. * * Return: 0 on success and -errno on failure. */ int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs) { int ret; mutex_lock(&wq_pool_mutex); ret = apply_workqueue_attrs_locked(wq, attrs); mutex_unlock(&wq_pool_mutex); return ret; } /** * unbound_wq_update_pwq - update a pwq slot for CPU hot[un]plug * @wq: the target workqueue * @cpu: the CPU to update the pwq slot for * * This function is to be called from %CPU_DOWN_PREPARE, %CPU_ONLINE and * %CPU_DOWN_FAILED. @cpu is in the same pod of the CPU being hot[un]plugged. * * * If pod affinity can't be adjusted due to memory allocation failure, it falls * back to @wq->dfl_pwq which may not be optimal but is always correct. * * Note that when the last allowed CPU of a pod goes offline for a workqueue * with a cpumask spanning multiple pods, the workers which were already * executing the work items for the workqueue will lose their CPU affinity and * may execute on any CPU. This is similar to how per-cpu workqueues behave on * CPU_DOWN. If a workqueue user wants strict affinity, it's the user's * responsibility to flush the work item from CPU_DOWN_PREPARE. */ static void unbound_wq_update_pwq(struct workqueue_struct *wq, int cpu) { struct pool_workqueue *old_pwq = NULL, *pwq; struct workqueue_attrs *target_attrs; lockdep_assert_held(&wq_pool_mutex); if (!(wq->flags & WQ_UNBOUND) || wq->unbound_attrs->ordered) return; /* * We don't wanna alloc/free wq_attrs for each wq for each CPU. * Let's use a preallocated one. The following buf is protected by * CPU hotplug exclusion. */ target_attrs = unbound_wq_update_pwq_attrs_buf; copy_workqueue_attrs(target_attrs, wq->unbound_attrs); wqattrs_actualize_cpumask(target_attrs, wq_unbound_cpumask); /* nothing to do if the target cpumask matches the current pwq */ wq_calc_pod_cpumask(target_attrs, cpu); if (wqattrs_equal(target_attrs, unbound_pwq(wq, cpu)->pool->attrs)) return; /* create a new pwq */ pwq = alloc_unbound_pwq(wq, target_attrs); if (!pwq) { pr_warn("workqueue: allocation failed while updating CPU pod affinity of \"%s\"\n", wq->name); goto use_dfl_pwq; } /* Install the new pwq. */ mutex_lock(&wq->mutex); old_pwq = install_unbound_pwq(wq, cpu, pwq); goto out_unlock; use_dfl_pwq: mutex_lock(&wq->mutex); pwq = unbound_pwq(wq, -1); raw_spin_lock_irq(&pwq->pool->lock); get_pwq(pwq); raw_spin_unlock_irq(&pwq->pool->lock); old_pwq = install_unbound_pwq(wq, cpu, pwq); out_unlock: mutex_unlock(&wq->mutex); put_pwq_unlocked(old_pwq); } static int alloc_and_link_pwqs(struct workqueue_struct *wq) { bool highpri = wq->flags & WQ_HIGHPRI; int cpu, ret; lockdep_assert_held(&wq_pool_mutex); wq->cpu_pwq = alloc_percpu(struct pool_workqueue *); if (!wq->cpu_pwq) goto enomem; if (!(wq->flags & WQ_UNBOUND)) { struct worker_pool __percpu *pools; if (wq->flags & WQ_BH) pools = bh_worker_pools; else pools = cpu_worker_pools; for_each_possible_cpu(cpu) { struct pool_workqueue **pwq_p; struct worker_pool *pool; pool = &(per_cpu_ptr(pools, cpu)[highpri]); pwq_p = per_cpu_ptr(wq->cpu_pwq, cpu); *pwq_p = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node); if (!*pwq_p) goto enomem; init_pwq(*pwq_p, wq, pool); mutex_lock(&wq->mutex); link_pwq(*pwq_p); mutex_unlock(&wq->mutex); } return 0; } if (wq->flags & __WQ_ORDERED) { struct pool_workqueue *dfl_pwq; ret = apply_workqueue_attrs_locked(wq, ordered_wq_attrs[highpri]); /* there should only be single pwq for ordering guarantee */ dfl_pwq = rcu_access_pointer(wq->dfl_pwq); WARN(!ret && (wq->pwqs.next != &dfl_pwq->pwqs_node || wq->pwqs.prev != &dfl_pwq->pwqs_node), "ordering guarantee broken for workqueue %s\n", wq->name); } else { ret = apply_workqueue_attrs_locked(wq, unbound_std_wq_attrs[highpri]); } return ret; enomem: if (wq->cpu_pwq) { for_each_possible_cpu(cpu) { struct pool_workqueue *pwq = *per_cpu_ptr(wq->cpu_pwq, cpu); if (pwq) kmem_cache_free(pwq_cache, pwq); } free_percpu(wq->cpu_pwq); wq->cpu_pwq = NULL; } return -ENOMEM; } static int wq_clamp_max_active(int max_active, unsigned int flags, const char *name) { if (max_active < 1 || max_active > WQ_MAX_ACTIVE) pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n", max_active, name, 1, WQ_MAX_ACTIVE); return clamp_val(max_active, 1, WQ_MAX_ACTIVE); } /* * Workqueues which may be used during memory reclaim should have a rescuer * to guarantee forward progress. */ static int init_rescuer(struct workqueue_struct *wq) { struct worker *rescuer; char id_buf[WORKER_ID_LEN]; int ret; lockdep_assert_held(&wq_pool_mutex); if (!(wq->flags & WQ_MEM_RECLAIM)) return 0; rescuer = alloc_worker(NUMA_NO_NODE); if (!rescuer) { pr_err("workqueue: Failed to allocate a rescuer for wq \"%s\"\n", wq->name); return -ENOMEM; } rescuer->rescue_wq = wq; format_worker_id(id_buf, sizeof(id_buf), rescuer, NULL); rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", id_buf); if (IS_ERR(rescuer->task)) { ret = PTR_ERR(rescuer->task); pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe", wq->name, ERR_PTR(ret)); kfree(rescuer); return ret; } wq->rescuer = rescuer; if (wq->flags & WQ_UNBOUND) kthread_bind_mask(rescuer->task, unbound_effective_cpumask(wq)); else kthread_bind_mask(rescuer->task, cpu_possible_mask); wake_up_process(rescuer->task); return 0; } /** * wq_adjust_max_active - update a wq's max_active to the current setting * @wq: target workqueue * * If @wq isn't freezing, set @wq->max_active to the saved_max_active and * activate inactive work items accordingly. If @wq is freezing, clear * @wq->max_active to zero. */ static void wq_adjust_max_active(struct workqueue_struct *wq) { bool activated; int new_max, new_min; lockdep_assert_held(&wq->mutex); if ((wq->flags & WQ_FREEZABLE) && workqueue_freezing) { new_max = 0; new_min = 0; } else { new_max = wq->saved_max_active; new_min = wq->saved_min_active; } if (wq->max_active == new_max && wq->min_active == new_min) return; /* * Update @wq->max/min_active and then kick inactive work items if more * active work items are allowed. This doesn't break work item ordering * because new work items are always queued behind existing inactive * work items if there are any. */ WRITE_ONCE(wq->max_active, new_max); WRITE_ONCE(wq->min_active, new_min); if (wq->flags & WQ_UNBOUND) wq_update_node_max_active(wq, -1); if (new_max == 0) return; /* * Round-robin through pwq's activating the first inactive work item * until max_active is filled. */ do { struct pool_workqueue *pwq; activated = false; for_each_pwq(pwq, wq) { unsigned long irq_flags; /* can be called during early boot w/ irq disabled */ raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags); if (pwq_activate_first_inactive(pwq, true)) { activated = true; kick_pool(pwq->pool); } raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags); } } while (activated); } __printf(1, 0) static struct workqueue_struct *__alloc_workqueue(const char *fmt, unsigned int flags, int max_active, va_list args) { struct workqueue_struct *wq; size_t wq_size; int name_len; if (flags & WQ_BH) { if (WARN_ON_ONCE(flags & ~__WQ_BH_ALLOWS)) return NULL; if (WARN_ON_ONCE(max_active)) return NULL; } /* see the comment above the definition of WQ_POWER_EFFICIENT */ if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient) flags |= WQ_UNBOUND; /* allocate wq and format name */ if (flags & WQ_UNBOUND) wq_size = struct_size(wq, node_nr_active, nr_node_ids + 1); else wq_size = sizeof(*wq); wq = kzalloc_noprof(wq_size, GFP_KERNEL); if (!wq) return NULL; if (flags & WQ_UNBOUND) { wq->unbound_attrs = alloc_workqueue_attrs_noprof(); if (!wq->unbound_attrs) goto err_free_wq; } name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args); if (name_len >= WQ_NAME_LEN) pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n", wq->name); if (flags & WQ_BH) { /* * BH workqueues always share a single execution context per CPU * and don't impose any max_active limit. */ max_active = INT_MAX; } else { max_active = max_active ?: WQ_DFL_ACTIVE; max_active = wq_clamp_max_active(max_active, flags, wq->name); } /* init wq */ wq->flags = flags; wq->max_active = max_active; wq->min_active = min(max_active, WQ_DFL_MIN_ACTIVE); wq->saved_max_active = wq->max_active; wq->saved_min_active = wq->min_active; mutex_init(&wq->mutex); atomic_set(&wq->nr_pwqs_to_flush, 0); INIT_LIST_HEAD(&wq->pwqs); INIT_LIST_HEAD(&wq->flusher_queue); INIT_LIST_HEAD(&wq->flusher_overflow); INIT_LIST_HEAD(&wq->maydays); INIT_LIST_HEAD(&wq->list); if (flags & WQ_UNBOUND) { if (alloc_node_nr_active(wq->node_nr_active) < 0) goto err_free_wq; } /* * wq_pool_mutex protects the workqueues list, allocations of PWQs, * and the global freeze state. */ apply_wqattrs_lock(); if (alloc_and_link_pwqs(wq) < 0) goto err_unlock_free_node_nr_active; mutex_lock(&wq->mutex); wq_adjust_max_active(wq); mutex_unlock(&wq->mutex); list_add_tail_rcu(&wq->list, &workqueues); if (wq_online && init_rescuer(wq) < 0) goto err_unlock_destroy; apply_wqattrs_unlock(); if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq)) goto err_destroy; return wq; err_unlock_free_node_nr_active: apply_wqattrs_unlock(); /* * Failed alloc_and_link_pwqs() may leave pending pwq->release_work, * flushing the pwq_release_worker ensures that the pwq_release_workfn() * completes before calling kfree(wq). */ if (wq->flags & WQ_UNBOUND) { kthread_flush_worker(pwq_release_worker); free_node_nr_active(wq->node_nr_active); } err_free_wq: free_workqueue_attrs(wq->unbound_attrs); kfree(wq); return NULL; err_unlock_destroy: apply_wqattrs_unlock(); err_destroy: destroy_workqueue(wq); return NULL; } __printf(1, 4) struct workqueue_struct *alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...) { struct workqueue_struct *wq; va_list args; va_start(args, max_active); wq = __alloc_workqueue(fmt, flags, max_active, args); va_end(args); if (!wq) return NULL; wq_init_lockdep(wq); return wq; } EXPORT_SYMBOL_GPL(alloc_workqueue_noprof); #ifdef CONFIG_LOCKDEP __printf(1, 5) struct workqueue_struct * alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, struct lockdep_map *lockdep_map, ...) { struct workqueue_struct *wq; va_list args; va_start(args, lockdep_map); wq = __alloc_workqueue(fmt, flags, max_active, args); va_end(args); if (!wq) return NULL; wq->lockdep_map = lockdep_map; return wq; } EXPORT_SYMBOL_GPL(alloc_workqueue_lockdep_map); #endif static bool pwq_busy(struct pool_workqueue *pwq) { int i; for (i = 0; i < WORK_NR_COLORS; i++) if (pwq->nr_in_flight[i]) return true; if ((pwq != rcu_access_pointer(pwq->wq->dfl_pwq)) && (pwq->refcnt > 1)) return true; if (!pwq_is_empty(pwq)) return true; return false; } /** * destroy_workqueue - safely terminate a workqueue * @wq: target workqueue * * Safely destroy a workqueue. All work currently pending will be done first. * * This function does NOT guarantee that non-pending work that has been * submitted with queue_delayed_work() and similar functions will be done * before destroying the workqueue. The fundamental problem is that, currently, * the workqueue has no way of accessing non-pending delayed_work. delayed_work * is only linked on the timer-side. All delayed_work must, therefore, be * canceled before calling this function. * * TODO: It would be better if the problem described above wouldn't exist and * destroy_workqueue() would cleanly cancel all pending and non-pending * delayed_work. */ void destroy_workqueue(struct workqueue_struct *wq) { struct pool_workqueue *pwq; int cpu; /* * Remove it from sysfs first so that sanity check failure doesn't * lead to sysfs name conflicts. */ workqueue_sysfs_unregister(wq); /* mark the workqueue destruction is in progress */ mutex_lock(&wq->mutex); wq->flags |= __WQ_DESTROYING; mutex_unlock(&wq->mutex); /* drain it before proceeding with destruction */ drain_workqueue(wq); /* kill rescuer, if sanity checks fail, leave it w/o rescuer */ if (wq->rescuer) { struct worker *rescuer = wq->rescuer; /* this prevents new queueing */ raw_spin_lock_irq(&wq_mayday_lock); wq->rescuer = NULL; raw_spin_unlock_irq(&wq_mayday_lock); /* rescuer will empty maydays list before exiting */ kthread_stop(rescuer->task); kfree(rescuer); } /* * Sanity checks - grab all the locks so that we wait for all * in-flight operations which may do put_pwq(). */ mutex_lock(&wq_pool_mutex); mutex_lock(&wq->mutex); for_each_pwq(pwq, wq) { raw_spin_lock_irq(&pwq->pool->lock); if (WARN_ON(pwq_busy(pwq))) { pr_warn("%s: %s has the following busy pwq\n", __func__, wq->name); show_pwq(pwq); raw_spin_unlock_irq(&pwq->pool->lock); mutex_unlock(&wq->mutex); mutex_unlock(&wq_pool_mutex); show_one_workqueue(wq); return; } raw_spin_unlock_irq(&pwq->pool->lock); } mutex_unlock(&wq->mutex); /* * wq list is used to freeze wq, remove from list after * flushing is complete in case freeze races us. */ list_del_rcu(&wq->list); mutex_unlock(&wq_pool_mutex); /* * We're the sole accessor of @wq. Directly access cpu_pwq and dfl_pwq * to put the base refs. @wq will be auto-destroyed from the last * pwq_put. RCU read lock prevents @wq from going away from under us. */ rcu_read_lock(); for_each_possible_cpu(cpu) { put_pwq_unlocked(unbound_pwq(wq, cpu)); RCU_INIT_POINTER(*unbound_pwq_slot(wq, cpu), NULL); } put_pwq_unlocked(unbound_pwq(wq, -1)); RCU_INIT_POINTER(*unbound_pwq_slot(wq, -1), NULL); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(destroy_workqueue); /** * workqueue_set_max_active - adjust max_active of a workqueue * @wq: target workqueue * @max_active: new max_active value. * * Set max_active of @wq to @max_active. See the alloc_workqueue() function * comment. * * CONTEXT: * Don't call from IRQ context. */ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) { /* max_active doesn't mean anything for BH workqueues */ if (WARN_ON(wq->flags & WQ_BH)) return; /* disallow meddling with max_active for ordered workqueues */ if (WARN_ON(wq->flags & __WQ_ORDERED)) return; max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); mutex_lock(&wq->mutex); wq->saved_max_active = max_active; if (wq->flags & WQ_UNBOUND) wq->saved_min_active = min(wq->saved_min_active, max_active); wq_adjust_max_active(wq); mutex_unlock(&wq->mutex); } EXPORT_SYMBOL_GPL(workqueue_set_max_active); /** * workqueue_set_min_active - adjust min_active of an unbound workqueue * @wq: target unbound workqueue * @min_active: new min_active value * * Set min_active of an unbound workqueue. Unlike other types of workqueues, an * unbound workqueue is not guaranteed to be able to process max_active * interdependent work items. Instead, an unbound workqueue is guaranteed to be * able to process min_active number of interdependent work items which is * %WQ_DFL_MIN_ACTIVE by default. * * Use this function to adjust the min_active value between 0 and the current * max_active. */ void workqueue_set_min_active(struct workqueue_struct *wq, int min_active) { /* min_active is only meaningful for non-ordered unbound workqueues */ if (WARN_ON((wq->flags & (WQ_BH | WQ_UNBOUND | __WQ_ORDERED)) != WQ_UNBOUND)) return; mutex_lock(&wq->mutex); wq->saved_min_active = clamp(min_active, 0, wq->saved_max_active); wq_adjust_max_active(wq); mutex_unlock(&wq->mutex); } /** * current_work - retrieve %current task's work struct * * Determine if %current task is a workqueue worker and what it's working on. * Useful to find out the context that the %current task is running in. * * Return: work struct if %current task is a workqueue worker, %NULL otherwise. */ struct work_struct *current_work(void) { struct worker *worker = current_wq_worker(); return worker ? worker->current_work : NULL; } EXPORT_SYMBOL(current_work); /** * current_is_workqueue_rescuer - is %current workqueue rescuer? * * Determine whether %current is a workqueue rescuer. Can be used from * work functions to determine whether it's being run off the rescuer task. * * Return: %true if %current is a workqueue rescuer. %false otherwise. */ bool current_is_workqueue_rescuer(void) { struct worker *worker = current_wq_worker(); return worker && worker->rescue_wq; } /** * workqueue_congested - test whether a workqueue is congested * @cpu: CPU in question * @wq: target workqueue * * Test whether @wq's cpu workqueue for @cpu is congested. There is * no synchronization around this function and the test result is * unreliable and only useful as advisory hints or for debugging. * * If @cpu is WORK_CPU_UNBOUND, the test is performed on the local CPU. * * With the exception of ordered workqueues, all workqueues have per-cpu * pool_workqueues, each with its own congested state. A workqueue being * congested on one CPU doesn't mean that the workqueue is contested on any * other CPUs. * * Return: * %true if congested, %false otherwise. */ bool workqueue_congested(int cpu, struct workqueue_struct *wq) { struct pool_workqueue *pwq; bool ret; rcu_read_lock(); preempt_disable(); if (cpu == WORK_CPU_UNBOUND) cpu = smp_processor_id(); pwq = *per_cpu_ptr(wq->cpu_pwq, cpu); ret = !list_empty(&pwq->inactive_works); preempt_enable(); rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(workqueue_congested); /** * work_busy - test whether a work is currently pending or running * @work: the work to be tested * * Test whether @work is currently pending or running. There is no * synchronization around this function and the test result is * unreliable and only useful as advisory hints or for debugging. * * Return: * OR'd bitmask of WORK_BUSY_* bits. */ unsigned int work_busy(struct work_struct *work) { struct worker_pool *pool; unsigned long irq_flags; unsigned int ret = 0; if (work_pending(work)) ret |= WORK_BUSY_PENDING; rcu_read_lock(); pool = get_work_pool(work); if (pool) { raw_spin_lock_irqsave(&pool->lock, irq_flags); if (find_worker_executing_work(pool, work)) ret |= WORK_BUSY_RUNNING; raw_spin_unlock_irqrestore(&pool->lock, irq_flags); } rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(work_busy); /** * set_worker_desc - set description for the current work item * @fmt: printf-style format string * @...: arguments for the format string * * This function can be called by a running work function to describe what * the work item is about. If the worker task gets dumped, this * information will be printed out together to help debugging. The * description can be at most WORKER_DESC_LEN including the trailing '\0'. */ void set_worker_desc(const char *fmt, ...) { struct worker *worker = current_wq_worker(); va_list args; if (worker) { va_start(args, fmt); vsnprintf(worker->desc, sizeof(worker->desc), fmt, args); va_end(args); } } EXPORT_SYMBOL_GPL(set_worker_desc); /** * print_worker_info - print out worker information and description * @log_lvl: the log level to use when printing * @task: target task * * If @task is a worker and currently executing a work item, print out the * name of the workqueue being serviced and worker description set with * set_worker_desc() by the currently executing work item. * * This function can be safely called on any task as long as the * task_struct itself is accessible. While safe, this function isn't * synchronized and may print out mixups or garbages of limited length. */ void print_worker_info(const char *log_lvl, struct task_struct *task) { work_func_t *fn = NULL; char name[WQ_NAME_LEN] = { }; char desc[WORKER_DESC_LEN] = { }; struct pool_workqueue *pwq = NULL; struct workqueue_struct *wq = NULL; struct worker *worker; if (!(task->flags & PF_WQ_WORKER)) return; /* * This function is called without any synchronization and @task * could be in any state. Be careful with dereferences. */ worker = kthread_probe_data(task); /* * Carefully copy the associated workqueue's workfn, name and desc. * Keep the original last '\0' in case the original is garbage. */ copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn)); copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq)); copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq)); copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1); copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1); if (fn || name[0] || desc[0]) { printk("%sWorkqueue: %s %ps", log_lvl, name, fn); if (strcmp(name, desc)) pr_cont(" (%s)", desc); pr_cont("\n"); } } static void pr_cont_pool_info(struct worker_pool *pool) { pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask); if (pool->node != NUMA_NO_NODE) pr_cont(" node=%d", pool->node); pr_cont(" flags=0x%x", pool->flags); if (pool->flags & POOL_BH) pr_cont(" bh%s", pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); else pr_cont(" nice=%d", pool->attrs->nice); } static void pr_cont_worker_id(struct worker *worker) { struct worker_pool *pool = worker->pool; if (pool->flags & WQ_BH) pr_cont("bh%s", pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); else pr_cont("%d%s", task_pid_nr(worker->task), worker->rescue_wq ? "(RESCUER)" : ""); } struct pr_cont_work_struct { bool comma; work_func_t func; long ctr; }; static void pr_cont_work_flush(bool comma, work_func_t func, struct pr_cont_work_struct *pcwsp) { if (!pcwsp->ctr) goto out_record; if (func == pcwsp->func) { pcwsp->ctr++; return; } if (pcwsp->ctr == 1) pr_cont("%s %ps", pcwsp->comma ? "," : "", pcwsp->func); else pr_cont("%s %ld*%ps", pcwsp->comma ? "," : "", pcwsp->ctr, pcwsp->func); pcwsp->ctr = 0; out_record: if ((long)func == -1L) return; pcwsp->comma = comma; pcwsp->func = func; pcwsp->ctr = 1; } static void pr_cont_work(bool comma, struct work_struct *work, struct pr_cont_work_struct *pcwsp) { if (work->func == wq_barrier_func) { struct wq_barrier *barr; barr = container_of(work, struct wq_barrier, work); pr_cont_work_flush(comma, (work_func_t)-1, pcwsp); pr_cont("%s BAR(%d)", comma ? "," : "", task_pid_nr(barr->task)); } else { if (!comma) pr_cont_work_flush(comma, (work_func_t)-1, pcwsp); pr_cont_work_flush(comma, work->func, pcwsp); } } static void show_pwq(struct pool_workqueue *pwq) { struct pr_cont_work_struct pcws = { .ctr = 0, }; struct worker_pool *pool = pwq->pool; struct work_struct *work; struct worker *worker; bool has_in_flight = false, has_pending = false; int bkt; pr_info(" pwq %d:", pool->id); pr_cont_pool_info(pool); pr_cont(" active=%d refcnt=%d%s\n", pwq->nr_active, pwq->refcnt, !list_empty(&pwq->mayday_node) ? " MAYDAY" : ""); hash_for_each(pool->busy_hash, bkt, worker, hentry) { if (worker->current_pwq == pwq) { has_in_flight = true; break; } } if (has_in_flight) { bool comma = false; pr_info(" in-flight:"); hash_for_each(pool->busy_hash, bkt, worker, hentry) { if (worker->current_pwq != pwq) continue; pr_cont(" %s", comma ? "," : ""); pr_cont_worker_id(worker); pr_cont(":%ps", worker->current_func); list_for_each_entry(work, &worker->scheduled, entry) pr_cont_work(false, work, &pcws); pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); comma = true; } pr_cont("\n"); } list_for_each_entry(work, &pool->worklist, entry) { if (get_work_pwq(work) == pwq) { has_pending = true; break; } } if (has_pending) { bool comma = false; pr_info(" pending:"); list_for_each_entry(work, &pool->worklist, entry) { if (get_work_pwq(work) != pwq) continue; pr_cont_work(comma, work, &pcws); comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED); } pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); pr_cont("\n"); } if (!list_empty(&pwq->inactive_works)) { bool comma = false; pr_info(" inactive:"); list_for_each_entry(work, &pwq->inactive_works, entry) { pr_cont_work(comma, work, &pcws); comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED); } pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); pr_cont("\n"); } } /** * show_one_workqueue - dump state of specified workqueue * @wq: workqueue whose state will be printed */ void show_one_workqueue(struct workqueue_struct *wq) { struct pool_workqueue *pwq; bool idle = true; unsigned long irq_flags; for_each_pwq(pwq, wq) { if (!pwq_is_empty(pwq)) { idle = false; break; } } if (idle) /* Nothing to print for idle workqueue */ return; pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); for_each_pwq(pwq, wq) { raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags); if (!pwq_is_empty(pwq)) { /* * Defer printing to avoid deadlocks in console * drivers that queue work while holding locks * also taken in their write paths. */ printk_deferred_enter(); show_pwq(pwq); printk_deferred_exit(); } raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags); /* * We could be printing a lot from atomic context, e.g. * sysrq-t -> show_all_workqueues(). Avoid triggering * hard lockup. */ touch_nmi_watchdog(); } } /** * show_one_worker_pool - dump state of specified worker pool * @pool: worker pool whose state will be printed */ static void show_one_worker_pool(struct worker_pool *pool) { struct worker *worker; bool first = true; unsigned long irq_flags; unsigned long hung = 0; raw_spin_lock_irqsave(&pool->lock, irq_flags); if (pool->nr_workers == pool->nr_idle) goto next_pool; /* How long the first pending work is waiting for a worker. */ if (!list_empty(&pool->worklist)) hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000; /* * Defer printing to avoid deadlocks in console drivers that * queue work while holding locks also taken in their write * paths. */ printk_deferred_enter(); pr_info("pool %d:", pool->id); pr_cont_pool_info(pool); pr_cont(" hung=%lus workers=%d", hung, pool->nr_workers); if (pool->manager) pr_cont(" manager: %d", task_pid_nr(pool->manager->task)); list_for_each_entry(worker, &pool->idle_list, entry) { pr_cont(" %s", first ? "idle: " : ""); pr_cont_worker_id(worker); first = false; } pr_cont("\n"); printk_deferred_exit(); next_pool: raw_spin_unlock_irqrestore(&pool->lock, irq_flags); /* * We could be printing a lot from atomic context, e.g. * sysrq-t -> show_all_workqueues(). Avoid triggering * hard lockup. */ touch_nmi_watchdog(); } /** * show_all_workqueues - dump workqueue state * * Called from a sysrq handler and prints out all busy workqueues and pools. */ void show_all_workqueues(void) { struct workqueue_struct *wq; struct worker_pool *pool; int pi; rcu_read_lock(); pr_info("Showing busy workqueues and worker pools:\n"); list_for_each_entry_rcu(wq, &workqueues, list) show_one_workqueue(wq); for_each_pool(pool, pi) show_one_worker_pool(pool); rcu_read_unlock(); } /** * show_freezable_workqueues - dump freezable workqueue state * * Called from try_to_freeze_tasks() and prints out all freezable workqueues * still busy. */ void show_freezable_workqueues(void) { struct workqueue_struct *wq; rcu_read_lock(); pr_info("Showing freezable workqueues that are still busy:\n"); list_for_each_entry_rcu(wq, &workqueues, list) { if (!(wq->flags & WQ_FREEZABLE)) continue; show_one_workqueue(wq); } rcu_read_unlock(); } /* used to show worker information through /proc/PID/{comm,stat,status} */ void wq_worker_comm(char *buf, size_t size, struct task_struct *task) { /* stabilize PF_WQ_WORKER and worker pool association */ mutex_lock(&wq_pool_attach_mutex); if (task->flags & PF_WQ_WORKER) { struct worker *worker = kthread_data(task); struct worker_pool *pool = worker->pool; int off; off = format_worker_id(buf, size, worker, pool); if (pool) { raw_spin_lock_irq(&pool->lock); /* * ->desc tracks information (wq name or * set_worker_desc()) for the latest execution. If * current, prepend '+', otherwise '-'. */ if (worker->desc[0] != '\0') { if (worker->current_work) scnprintf(buf + off, size - off, "+%s", worker->desc); else scnprintf(buf + off, size - off, "-%s", worker->desc); } raw_spin_unlock_irq(&pool->lock); } } else { strscpy(buf, task->comm, size); } mutex_unlock(&wq_pool_attach_mutex); } #ifdef CONFIG_SMP /* * CPU hotplug. * * There are two challenges in supporting CPU hotplug. Firstly, there * are a lot of assumptions on strong associations among work, pwq and * pool which make migrating pending and scheduled works very * difficult to implement without impacting hot paths. Secondly, * worker pools serve mix of short, long and very long running works making * blocked draining impractical. * * This is solved by allowing the pools to be disassociated from the CPU * running as an unbound one and allowing it to be reattached later if the * cpu comes back online. */ static void unbind_workers(int cpu) { struct worker_pool *pool; struct worker *worker; for_each_cpu_worker_pool(pool, cpu) { mutex_lock(&wq_pool_attach_mutex); raw_spin_lock_irq(&pool->lock); /* * We've blocked all attach/detach operations. Make all workers * unbound and set DISASSOCIATED. Before this, all workers * must be on the cpu. After this, they may become diasporas. * And the preemption disabled section in their sched callbacks * are guaranteed to see WORKER_UNBOUND since the code here * is on the same cpu. */ for_each_pool_worker(worker, pool) worker->flags |= WORKER_UNBOUND; pool->flags |= POOL_DISASSOCIATED; /* * The handling of nr_running in sched callbacks are disabled * now. Zap nr_running. After this, nr_running stays zero and * need_more_worker() and keep_working() are always true as * long as the worklist is not empty. This pool now behaves as * an unbound (in terms of concurrency management) pool which * are served by workers tied to the pool. */ pool->nr_running = 0; /* * With concurrency management just turned off, a busy * worker blocking could lead to lengthy stalls. Kick off * unbound chain execution of currently pending work items. */ kick_pool(pool); raw_spin_unlock_irq(&pool->lock); for_each_pool_worker(worker, pool) unbind_worker(worker); mutex_unlock(&wq_pool_attach_mutex); } } /** * rebind_workers - rebind all workers of a pool to the associated CPU * @pool: pool of interest * * @pool->cpu is coming online. Rebind all workers to the CPU. */ static void rebind_workers(struct worker_pool *pool) { struct worker *worker; lockdep_assert_held(&wq_pool_attach_mutex); /* * Restore CPU affinity of all workers. As all idle workers should * be on the run-queue of the associated CPU before any local * wake-ups for concurrency management happen, restore CPU affinity * of all workers first and then clear UNBOUND. As we're called * from CPU_ONLINE, the following shouldn't fail. */ for_each_pool_worker(worker, pool) { kthread_set_per_cpu(worker->task, pool->cpu); WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, pool_allowed_cpus(pool)) < 0); } raw_spin_lock_irq(&pool->lock); pool->flags &= ~POOL_DISASSOCIATED; for_each_pool_worker(worker, pool) { unsigned int worker_flags = worker->flags; /* * We want to clear UNBOUND but can't directly call * worker_clr_flags() or adjust nr_running. Atomically * replace UNBOUND with another NOT_RUNNING flag REBOUND. * @worker will clear REBOUND using worker_clr_flags() when * it initiates the next execution cycle thus restoring * concurrency management. Note that when or whether * @worker clears REBOUND doesn't affect correctness. * * WRITE_ONCE() is necessary because @worker->flags may be * tested without holding any lock in * wq_worker_running(). Without it, NOT_RUNNING test may * fail incorrectly leading to premature concurrency * management operations. */ WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND)); worker_flags |= WORKER_REBOUND; worker_flags &= ~WORKER_UNBOUND; WRITE_ONCE(worker->flags, worker_flags); } raw_spin_unlock_irq(&pool->lock); } /** * restore_unbound_workers_cpumask - restore cpumask of unbound workers * @pool: unbound pool of interest * @cpu: the CPU which is coming up * * An unbound pool may end up with a cpumask which doesn't have any online * CPUs. When a worker of such pool get scheduled, the scheduler resets * its cpus_allowed. If @cpu is in @pool's cpumask which didn't have any * online CPU before, cpus_allowed of all its workers should be restored. */ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) { static cpumask_t cpumask; struct worker *worker; lockdep_assert_held(&wq_pool_attach_mutex); /* is @cpu allowed for @pool? */ if (!cpumask_test_cpu(cpu, pool->attrs->cpumask)) return; cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask); /* as we're called from CPU_ONLINE, the following shouldn't fail */ for_each_pool_worker(worker, pool) WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0); } int workqueue_prepare_cpu(unsigned int cpu) { struct worker_pool *pool; for_each_cpu_worker_pool(pool, cpu) { if (pool->nr_workers) continue; if (!create_worker(pool)) return -ENOMEM; } return 0; } int workqueue_online_cpu(unsigned int cpu) { struct worker_pool *pool; struct workqueue_struct *wq; int pi; mutex_lock(&wq_pool_mutex); cpumask_set_cpu(cpu, wq_online_cpumask); for_each_pool(pool, pi) { /* BH pools aren't affected by hotplug */ if (pool->flags & POOL_BH) continue; mutex_lock(&wq_pool_attach_mutex); if (pool->cpu == cpu) rebind_workers(pool); else if (pool->cpu < 0) restore_unbound_workers_cpumask(pool, cpu); mutex_unlock(&wq_pool_attach_mutex); } /* update pod affinity of unbound workqueues */ list_for_each_entry(wq, &workqueues, list) { struct workqueue_attrs *attrs = wq->unbound_attrs; if (attrs) { const struct wq_pod_type *pt = wqattrs_pod_type(attrs); int tcpu; for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) unbound_wq_update_pwq(wq, tcpu); mutex_lock(&wq->mutex); wq_update_node_max_active(wq, -1); mutex_unlock(&wq->mutex); } } mutex_unlock(&wq_pool_mutex); return 0; } int workqueue_offline_cpu(unsigned int cpu) { struct workqueue_struct *wq; /* unbinding per-cpu workers should happen on the local CPU */ if (WARN_ON(cpu != smp_processor_id())) return -1; unbind_workers(cpu); /* update pod affinity of unbound workqueues */ mutex_lock(&wq_pool_mutex); cpumask_clear_cpu(cpu, wq_online_cpumask); list_for_each_entry(wq, &workqueues, list) { struct workqueue_attrs *attrs = wq->unbound_attrs; if (attrs) { const struct wq_pod_type *pt = wqattrs_pod_type(attrs); int tcpu; for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) unbound_wq_update_pwq(wq, tcpu); mutex_lock(&wq->mutex); wq_update_node_max_active(wq, cpu); mutex_unlock(&wq->mutex); } } mutex_unlock(&wq_pool_mutex); return 0; } struct work_for_cpu { struct work_struct work; long (*fn)(void *); void *arg; long ret; }; static void work_for_cpu_fn(struct work_struct *work) { struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work); wfc->ret = wfc->fn(wfc->arg); } /** * work_on_cpu_key - run a function in thread context on a particular cpu * @cpu: the cpu to run on * @fn: the function to run * @arg: the function arg * @key: The lock class key for lock debugging purposes * * It is up to the caller to ensure that the cpu doesn't go offline. * The caller must not hold any locks which would prevent @fn from completing. * * Return: The value @fn returns. */ long work_on_cpu_key(int cpu, long (*fn)(void *), void *arg, struct lock_class_key *key) { struct work_for_cpu wfc = { .fn = fn, .arg = arg }; INIT_WORK_ONSTACK_KEY(&wfc.work, work_for_cpu_fn, key); schedule_work_on(cpu, &wfc.work); flush_work(&wfc.work); destroy_work_on_stack(&wfc.work); return wfc.ret; } EXPORT_SYMBOL_GPL(work_on_cpu_key); #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER /** * freeze_workqueues_begin - begin freezing workqueues * * Start freezing workqueues. After this function returns, all freezable * workqueues will queue new works to their inactive_works list instead of * pool->worklist. * * CONTEXT: * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's. */ void freeze_workqueues_begin(void) { struct workqueue_struct *wq; mutex_lock(&wq_pool_mutex); WARN_ON_ONCE(workqueue_freezing); workqueue_freezing = true; list_for_each_entry(wq, &workqueues, list) { mutex_lock(&wq->mutex); wq_adjust_max_active(wq); mutex_unlock(&wq->mutex); } mutex_unlock(&wq_pool_mutex); } /** * freeze_workqueues_busy - are freezable workqueues still busy? * * Check whether freezing is complete. This function must be called * between freeze_workqueues_begin() and thaw_workqueues(). * * CONTEXT: * Grabs and releases wq_pool_mutex. * * Return: * %true if some freezable workqueues are still busy. %false if freezing * is complete. */ bool freeze_workqueues_busy(void) { bool busy = false; struct workqueue_struct *wq; struct pool_workqueue *pwq; mutex_lock(&wq_pool_mutex); WARN_ON_ONCE(!workqueue_freezing); list_for_each_entry(wq, &workqueues, list) { if (!(wq->flags & WQ_FREEZABLE)) continue; /* * nr_active is monotonically decreasing. It's safe * to peek without lock. */ rcu_read_lock(); for_each_pwq(pwq, wq) { WARN_ON_ONCE(pwq->nr_active < 0); if (pwq->nr_active) { busy = true; rcu_read_unlock(); goto out_unlock; } } rcu_read_unlock(); } out_unlock: mutex_unlock(&wq_pool_mutex); return busy; } /** * thaw_workqueues - thaw workqueues * * Thaw workqueues. Normal queueing is restored and all collected * frozen works are transferred to their respective pool worklists. * * CONTEXT: * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's. */ void thaw_workqueues(void) { struct workqueue_struct *wq; mutex_lock(&wq_pool_mutex); if (!workqueue_freezing) goto out_unlock; workqueue_freezing = false; /* restore max_active and repopulate worklist */ list_for_each_entry(wq, &workqueues, list) { mutex_lock(&wq->mutex); wq_adjust_max_active(wq); mutex_unlock(&wq->mutex); } out_unlock: mutex_unlock(&wq_pool_mutex); } #endif /* CONFIG_FREEZER */ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask) { LIST_HEAD(ctxs); int ret = 0; struct workqueue_struct *wq; struct apply_wqattrs_ctx *ctx, *n; lockdep_assert_held(&wq_pool_mutex); list_for_each_entry(wq, &workqueues, list) { if (!(wq->flags & WQ_UNBOUND) || (wq->flags & __WQ_DESTROYING)) continue; ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); break; } list_add_tail(&ctx->list, &ctxs); } list_for_each_entry_safe(ctx, n, &ctxs, list) { if (!ret) apply_wqattrs_commit(ctx); apply_wqattrs_cleanup(ctx); } if (!ret) { mutex_lock(&wq_pool_attach_mutex); cpumask_copy(wq_unbound_cpumask, unbound_cpumask); mutex_unlock(&wq_pool_attach_mutex); } return ret; } /** * workqueue_unbound_exclude_cpumask - Exclude given CPUs from unbound cpumask * @exclude_cpumask: the cpumask to be excluded from wq_unbound_cpumask * * This function can be called from cpuset code to provide a set of isolated * CPUs that should be excluded from wq_unbound_cpumask. */ int workqueue_unbound_exclude_cpumask(cpumask_var_t exclude_cpumask) { cpumask_var_t cpumask; int ret = 0; if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL)) return -ENOMEM; mutex_lock(&wq_pool_mutex); /* * If the operation fails, it will fall back to * wq_requested_unbound_cpumask which is initially set to * (HK_TYPE_WQ ∩ HK_TYPE_DOMAIN) house keeping mask and rewritten * by any subsequent write to workqueue/cpumask sysfs file. */ if (!cpumask_andnot(cpumask, wq_requested_unbound_cpumask, exclude_cpumask)) cpumask_copy(cpumask, wq_requested_unbound_cpumask); if (!cpumask_equal(cpumask, wq_unbound_cpumask)) ret = workqueue_apply_unbound_cpumask(cpumask); /* Save the current isolated cpumask & export it via sysfs */ if (!ret) cpumask_copy(wq_isolated_cpumask, exclude_cpumask); mutex_unlock(&wq_pool_mutex); free_cpumask_var(cpumask); return ret; } static int parse_affn_scope(const char *val) { int i; for (i = 0; i < ARRAY_SIZE(wq_affn_names); i++) { if (!strncasecmp(val, wq_affn_names[i], strlen(wq_affn_names[i]))) return i; } return -EINVAL; } static int wq_affn_dfl_set(const char *val, const struct kernel_param *kp) { struct workqueue_struct *wq; int affn, cpu; affn = parse_affn_scope(val); if (affn < 0) return affn; if (affn == WQ_AFFN_DFL) return -EINVAL; cpus_read_lock(); mutex_lock(&wq_pool_mutex); wq_affn_dfl = affn; list_for_each_entry(wq, &workqueues, list) { for_each_online_cpu(cpu) unbound_wq_update_pwq(wq, cpu); } mutex_unlock(&wq_pool_mutex); cpus_read_unlock(); return 0; } static int wq_affn_dfl_get(char *buffer, const struct kernel_param *kp) { return scnprintf(buffer, PAGE_SIZE, "%s\n", wq_affn_names[wq_affn_dfl]); } static const struct kernel_param_ops wq_affn_dfl_ops = { .set = wq_affn_dfl_set, .get = wq_affn_dfl_get, }; module_param_cb(default_affinity_scope, &wq_affn_dfl_ops, NULL, 0644); #ifdef CONFIG_SYSFS /* * Workqueues with WQ_SYSFS flag set is visible to userland via * /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the * following attributes. * * per_cpu RO bool : whether the workqueue is per-cpu or unbound * max_active RW int : maximum number of in-flight work items * * Unbound workqueues have the following extra attributes. * * nice RW int : nice value of the workers * cpumask RW mask : bitmask of allowed CPUs for the workers * affinity_scope RW str : worker CPU affinity scope (cache, numa, none) * affinity_strict RW bool : worker CPU affinity is strict */ struct wq_device { struct workqueue_struct *wq; struct device dev; }; static struct workqueue_struct *dev_to_wq(struct device *dev) { struct wq_device *wq_dev = container_of(dev, struct wq_device, dev); return wq_dev->wq; } static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr, char *buf) { struct workqueue_struct *wq = dev_to_wq(dev); return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND)); } static DEVICE_ATTR_RO(per_cpu); static ssize_t max_active_show(struct device *dev, struct device_attribute *attr, char *buf) { struct workqueue_struct *wq = dev_to_wq(dev); return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active); } static ssize_t max_active_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct workqueue_struct *wq = dev_to_wq(dev); int val; if (sscanf(buf, "%d", &val) != 1 || val <= 0) return -EINVAL; workqueue_set_max_active(wq, val); return count; } static DEVICE_ATTR_RW(max_active); static struct attribute *wq_sysfs_attrs[] = { &dev_attr_per_cpu.attr, &dev_attr_max_active.attr, NULL, }; ATTRIBUTE_GROUPS(wq_sysfs); static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr, char *buf) { struct workqueue_struct *wq = dev_to_wq(dev); int written; mutex_lock(&wq->mutex); written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice); mutex_unlock(&wq->mutex); return written; } /* prepare workqueue_attrs for sysfs store operations */ static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq) { struct workqueue_attrs *attrs; lockdep_assert_held(&wq_pool_mutex); attrs = alloc_workqueue_attrs(); if (!attrs) return NULL; copy_workqueue_attrs(attrs, wq->unbound_attrs); return attrs; } static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct workqueue_struct *wq = dev_to_wq(dev); struct workqueue_attrs *attrs; int ret = -ENOMEM; apply_wqattrs_lock(); attrs = wq_sysfs_prep_attrs(wq); if (!attrs) goto out_unlock; if (sscanf(buf, "%d", &attrs->nice) == 1 && attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE) ret = apply_workqueue_attrs_locked(wq, attrs); else ret = -EINVAL; out_unlock: apply_wqattrs_unlock(); free_workqueue_attrs(attrs); return ret ?: count; } static ssize_t wq_cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) { struct workqueue_struct *wq = dev_to_wq(dev); int written; mutex_lock(&wq->mutex); written = scnprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(wq->unbound_attrs->cpumask)); mutex_unlock(&wq->mutex); return written; } static ssize_t wq_cpumask_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct workqueue_struct *wq = dev_to_wq(dev); struct workqueue_attrs *attrs; int ret = -ENOMEM; apply_wqattrs_lock(); attrs = wq_sysfs_prep_attrs(wq); if (!attrs) goto out_unlock; ret = cpumask_parse(buf, attrs->cpumask); if (!ret) ret = apply_workqueue_attrs_locked(wq, attrs); out_unlock: apply_wqattrs_unlock(); free_workqueue_attrs(attrs); return ret ?: count; } static ssize_t wq_affn_scope_show(struct device *dev, struct device_attribute *attr, char *buf) { struct workqueue_struct *wq = dev_to_wq(dev); int written; mutex_lock(&wq->mutex); if (wq->unbound_attrs->affn_scope == WQ_AFFN_DFL) written = scnprintf(buf, PAGE_SIZE, "%s (%s)\n", wq_affn_names[WQ_AFFN_DFL], wq_affn_names[wq_affn_dfl]); else written = scnprintf(buf, PAGE_SIZE, "%s\n", wq_affn_names[wq->unbound_attrs->affn_scope]); mutex_unlock(&wq->mutex); return written; } static ssize_t wq_affn_scope_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct workqueue_struct *wq = dev_to_wq(dev); struct workqueue_attrs *attrs; int affn, ret = -ENOMEM; affn = parse_affn_scope(buf); if (affn < 0) return affn; apply_wqattrs_lock(); attrs = wq_sysfs_prep_attrs(wq); if (attrs) { attrs->affn_scope = affn; ret = apply_workqueue_attrs_locked(wq, attrs); } apply_wqattrs_unlock(); free_workqueue_attrs(attrs); return ret ?: count; } static ssize_t wq_affinity_strict_show(struct device *dev, struct device_attribute *attr, char *buf) { struct workqueue_struct *wq = dev_to_wq(dev); return scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->affn_strict); } static ssize_t wq_affinity_strict_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct workqueue_struct *wq = dev_to_wq(dev); struct workqueue_attrs *attrs; int v, ret = -ENOMEM; if (sscanf(buf, "%d", &v) != 1) return -EINVAL; apply_wqattrs_lock(); attrs = wq_sysfs_prep_attrs(wq); if (attrs) { attrs->affn_strict = (bool)v; ret = apply_workqueue_attrs_locked(wq, attrs); } apply_wqattrs_unlock(); free_workqueue_attrs(attrs); return ret ?: count; } static struct device_attribute wq_sysfs_unbound_attrs[] = { __ATTR(nice, 0644, wq_nice_show, wq_nice_store), __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store), __ATTR(affinity_scope, 0644, wq_affn_scope_show, wq_affn_scope_store), __ATTR(affinity_strict, 0644, wq_affinity_strict_show, wq_affinity_strict_store), __ATTR_NULL, }; static const struct bus_type wq_subsys = { .name = "workqueue", .dev_groups = wq_sysfs_groups, }; /** * workqueue_set_unbound_cpumask - Set the low-level unbound cpumask * @cpumask: the cpumask to set * * The low-level workqueues cpumask is a global cpumask that limits * the affinity of all unbound workqueues. This function check the @cpumask * and apply it to all unbound workqueues and updates all pwqs of them. * * Return: 0 - Success * -EINVAL - Invalid @cpumask * -ENOMEM - Failed to allocate memory for attrs or pwqs. */ static int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) { int ret = -EINVAL; /* * Not excluding isolated cpus on purpose. * If the user wishes to include them, we allow that. */ cpumask_and(cpumask, cpumask, cpu_possible_mask); if (!cpumask_empty(cpumask)) { ret = 0; apply_wqattrs_lock(); if (!cpumask_equal(cpumask, wq_unbound_cpumask)) ret = workqueue_apply_unbound_cpumask(cpumask); if (!ret) cpumask_copy(wq_requested_unbound_cpumask, cpumask); apply_wqattrs_unlock(); } return ret; } static ssize_t __wq_cpumask_show(struct device *dev, struct device_attribute *attr, char *buf, cpumask_var_t mask) { int written; mutex_lock(&wq_pool_mutex); written = scnprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); mutex_unlock(&wq_pool_mutex); return written; } static ssize_t cpumask_requested_show(struct device *dev, struct device_attribute *attr, char *buf) { return __wq_cpumask_show(dev, attr, buf, wq_requested_unbound_cpumask); } static DEVICE_ATTR_RO(cpumask_requested); static ssize_t cpumask_isolated_show(struct device *dev, struct device_attribute *attr, char *buf) { return __wq_cpumask_show(dev, attr, buf, wq_isolated_cpumask); } static DEVICE_ATTR_RO(cpumask_isolated); static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) { return __wq_cpumask_show(dev, attr, buf, wq_unbound_cpumask); } static ssize_t cpumask_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { cpumask_var_t cpumask; int ret; if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL)) return -ENOMEM; ret = cpumask_parse(buf, cpumask); if (!ret) ret = workqueue_set_unbound_cpumask(cpumask); free_cpumask_var(cpumask); return ret ? ret : count; } static DEVICE_ATTR_RW(cpumask); static struct attribute *wq_sysfs_cpumask_attrs[] = { &dev_attr_cpumask.attr, &dev_attr_cpumask_requested.attr, &dev_attr_cpumask_isolated.attr, NULL, }; ATTRIBUTE_GROUPS(wq_sysfs_cpumask); static int __init wq_sysfs_init(void) { return subsys_virtual_register(&wq_subsys, wq_sysfs_cpumask_groups); } core_initcall(wq_sysfs_init); static void wq_device_release(struct device *dev) { struct wq_device *wq_dev = container_of(dev, struct wq_device, dev); kfree(wq_dev); } /** * workqueue_sysfs_register - make a workqueue visible in sysfs * @wq: the workqueue to register * * Expose @wq in sysfs under /sys/bus/workqueue/devices. * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set * which is the preferred method. * * Workqueue user should use this function directly iff it wants to apply * workqueue_attrs before making the workqueue visible in sysfs; otherwise, * apply_workqueue_attrs() may race against userland updating the * attributes. * * Return: 0 on success, -errno on failure. */ int workqueue_sysfs_register(struct workqueue_struct *wq) { struct wq_device *wq_dev; int ret; /* * Adjusting max_active breaks ordering guarantee. Disallow exposing * ordered workqueues. */ if (WARN_ON(wq->flags & __WQ_ORDERED)) return -EINVAL; wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); if (!wq_dev) return -ENOMEM; wq_dev->wq = wq; wq_dev->dev.bus = &wq_subsys; wq_dev->dev.release = wq_device_release; dev_set_name(&wq_dev->dev, "%s", wq->name); /* * unbound_attrs are created separately. Suppress uevent until * everything is ready. */ dev_set_uevent_suppress(&wq_dev->dev, true); ret = device_register(&wq_dev->dev); if (ret) { put_device(&wq_dev->dev); wq->wq_dev = NULL; return ret; } if (wq->flags & WQ_UNBOUND) { struct device_attribute *attr; for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) { ret = device_create_file(&wq_dev->dev, attr); if (ret) { device_unregister(&wq_dev->dev); wq->wq_dev = NULL; return ret; } } } dev_set_uevent_suppress(&wq_dev->dev, false); kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); return 0; } /** * workqueue_sysfs_unregister - undo workqueue_sysfs_register() * @wq: the workqueue to unregister * * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister. */ static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { struct wq_device *wq_dev = wq->wq_dev; if (!wq->wq_dev) return; wq->wq_dev = NULL; device_unregister(&wq_dev->dev); } #else /* CONFIG_SYSFS */ static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { } #endif /* CONFIG_SYSFS */ /* * Workqueue watchdog. * * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal * flush dependency, a concurrency managed work item which stays RUNNING * indefinitely. Workqueue stalls can be very difficult to debug as the * usual warning mechanisms don't trigger and internal workqueue state is * largely opaque. * * Workqueue watchdog monitors all worker pools periodically and dumps * state if some pools failed to make forward progress for a while where * forward progress is defined as the first item on ->worklist changing. * * This mechanism is controlled through the kernel parameter * "workqueue.watchdog_thresh" which can be updated at runtime through the * corresponding sysfs parameter file. */ #ifdef CONFIG_WQ_WATCHDOG static unsigned long wq_watchdog_thresh = 30; static struct timer_list wq_watchdog_timer; static unsigned long wq_watchdog_touched = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES; static unsigned int wq_panic_on_stall; module_param_named(panic_on_stall, wq_panic_on_stall, uint, 0644); /* * Show workers that might prevent the processing of pending work items. * The only candidates are CPU-bound workers in the running state. * Pending work items should be handled by another idle worker * in all other situations. */ static void show_cpu_pool_hog(struct worker_pool *pool) { struct worker *worker; unsigned long irq_flags; int bkt; raw_spin_lock_irqsave(&pool->lock, irq_flags); hash_for_each(pool->busy_hash, bkt, worker, hentry) { if (task_is_running(worker->task)) { /* * Defer printing to avoid deadlocks in console * drivers that queue work while holding locks * also taken in their write paths. */ printk_deferred_enter(); pr_info("pool %d:\n", pool->id); sched_show_task(worker->task); printk_deferred_exit(); } } raw_spin_unlock_irqrestore(&pool->lock, irq_flags); } static void show_cpu_pools_hogs(void) { struct worker_pool *pool; int pi; pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n"); rcu_read_lock(); for_each_pool(pool, pi) { if (pool->cpu_stall) show_cpu_pool_hog(pool); } rcu_read_unlock(); } static void panic_on_wq_watchdog(void) { static unsigned int wq_stall; if (wq_panic_on_stall) { wq_stall++; BUG_ON(wq_stall >= wq_panic_on_stall); } } static void wq_watchdog_reset_touched(void) { int cpu; wq_watchdog_touched = jiffies; for_each_possible_cpu(cpu) per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; } static void wq_watchdog_timer_fn(struct timer_list *unused) { unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; bool lockup_detected = false; bool cpu_pool_stall = false; unsigned long now = jiffies; struct worker_pool *pool; int pi; if (!thresh) return; rcu_read_lock(); for_each_pool(pool, pi) { unsigned long pool_ts, touched, ts; pool->cpu_stall = false; if (list_empty(&pool->worklist)) continue; /* * If a virtual machine is stopped by the host it can look to * the watchdog like a stall. */ kvm_check_and_clear_guest_paused(); /* get the latest of pool and touched timestamps */ if (pool->cpu >= 0) touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu)); else touched = READ_ONCE(wq_watchdog_touched); pool_ts = READ_ONCE(pool->watchdog_ts); if (time_after(pool_ts, touched)) ts = pool_ts; else ts = touched; /* did we stall? */ if (time_after(now, ts + thresh)) { lockup_detected = true; if (pool->cpu >= 0 && !(pool->flags & POOL_BH)) { pool->cpu_stall = true; cpu_pool_stall = true; } pr_emerg("BUG: workqueue lockup - pool"); pr_cont_pool_info(pool); pr_cont(" stuck for %us!\n", jiffies_to_msecs(now - pool_ts) / 1000); } } rcu_read_unlock(); if (lockup_detected) show_all_workqueues(); if (cpu_pool_stall) show_cpu_pools_hogs(); if (lockup_detected) panic_on_wq_watchdog(); wq_watchdog_reset_touched(); mod_timer(&wq_watchdog_timer, jiffies + thresh); } notrace void wq_watchdog_touch(int cpu) { unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; unsigned long touch_ts = READ_ONCE(wq_watchdog_touched); unsigned long now = jiffies; if (cpu >= 0) per_cpu(wq_watchdog_touched_cpu, cpu) = now; else WARN_ONCE(1, "%s should be called with valid CPU", __func__); /* Don't unnecessarily store to global cacheline */ if (time_after(now, touch_ts + thresh / 4)) WRITE_ONCE(wq_watchdog_touched, jiffies); } static void wq_watchdog_set_thresh(unsigned long thresh) { wq_watchdog_thresh = 0; timer_delete_sync(&wq_watchdog_timer); if (thresh) { wq_watchdog_thresh = thresh; wq_watchdog_reset_touched(); mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ); } } static int wq_watchdog_param_set_thresh(const char *val, const struct kernel_param *kp) { unsigned long thresh; int ret; ret = kstrtoul(val, 0, &thresh); if (ret) return ret; if (system_wq) wq_watchdog_set_thresh(thresh); else wq_watchdog_thresh = thresh; return 0; } static const struct kernel_param_ops wq_watchdog_thresh_ops = { .set = wq_watchdog_param_set_thresh, .get = param_get_ulong, }; module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh, 0644); static void wq_watchdog_init(void) { timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE); wq_watchdog_set_thresh(wq_watchdog_thresh); } #else /* CONFIG_WQ_WATCHDOG */ static inline void wq_watchdog_init(void) { } #endif /* CONFIG_WQ_WATCHDOG */ static void bh_pool_kick_normal(struct irq_work *irq_work) { raise_softirq_irqoff(TASKLET_SOFTIRQ); } static void bh_pool_kick_highpri(struct irq_work *irq_work) { raise_softirq_irqoff(HI_SOFTIRQ); } static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask) { if (!cpumask_intersects(wq_unbound_cpumask, mask)) { pr_warn("workqueue: Restricting unbound_cpumask (%*pb) with %s (%*pb) leaves no CPU, ignoring\n", cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask)); return; } cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, mask); } static void __init init_cpu_worker_pool(struct worker_pool *pool, int cpu, int nice) { BUG_ON(init_worker_pool(pool)); pool->cpu = cpu; cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu)); cpumask_copy(pool->attrs->__pod_cpumask, cpumask_of(cpu)); pool->attrs->nice = nice; pool->attrs->affn_strict = true; pool->node = cpu_to_node(cpu); /* alloc pool ID */ mutex_lock(&wq_pool_mutex); BUG_ON(worker_pool_assign_id(pool)); mutex_unlock(&wq_pool_mutex); } /** * workqueue_init_early - early init for workqueue subsystem * * This is the first step of three-staged workqueue subsystem initialization and * invoked as soon as the bare basics - memory allocation, cpumasks and idr are * up. It sets up all the data structures and system workqueues and allows early * boot code to create workqueues and queue/cancel work items. Actual work item * execution starts only after kthreads can be created and scheduled right * before early initcalls. */ void __init workqueue_init_early(void) { struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_SYSTEM]; int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL }; void (*irq_work_fns[2])(struct irq_work *) = { bh_pool_kick_normal, bh_pool_kick_highpri }; int i, cpu; BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); BUG_ON(!alloc_cpumask_var(&wq_online_cpumask, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&wq_requested_unbound_cpumask, GFP_KERNEL)); BUG_ON(!zalloc_cpumask_var(&wq_isolated_cpumask, GFP_KERNEL)); cpumask_copy(wq_online_cpumask, cpu_online_mask); cpumask_copy(wq_unbound_cpumask, cpu_possible_mask); restrict_unbound_cpumask("HK_TYPE_WQ", housekeeping_cpumask(HK_TYPE_WQ)); restrict_unbound_cpumask("HK_TYPE_DOMAIN", housekeeping_cpumask(HK_TYPE_DOMAIN)); if (!cpumask_empty(&wq_cmdline_cpumask)) restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask); cpumask_copy(wq_requested_unbound_cpumask, wq_unbound_cpumask); cpumask_andnot(wq_isolated_cpumask, cpu_possible_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); unbound_wq_update_pwq_attrs_buf = alloc_workqueue_attrs(); BUG_ON(!unbound_wq_update_pwq_attrs_buf); /* * If nohz_full is enabled, set power efficient workqueue as unbound. * This allows workqueue items to be moved to HK CPUs. */ if (housekeeping_enabled(HK_TYPE_TICK)) wq_power_efficient = true; /* initialize WQ_AFFN_SYSTEM pods */ pt->pod_cpus = kcalloc(1, sizeof(pt->pod_cpus[0]), GFP_KERNEL); pt->pod_node = kcalloc(1, sizeof(pt->pod_node[0]), GFP_KERNEL); pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL); BUG_ON(!pt->pod_cpus || !pt->pod_node || !pt->cpu_pod); BUG_ON(!zalloc_cpumask_var_node(&pt->pod_cpus[0], GFP_KERNEL, NUMA_NO_NODE)); pt->nr_pods = 1; cpumask_copy(pt->pod_cpus[0], cpu_possible_mask); pt->pod_node[0] = NUMA_NO_NODE; pt->cpu_pod[0] = 0; /* initialize BH and CPU pools */ for_each_possible_cpu(cpu) { struct worker_pool *pool; i = 0; for_each_bh_worker_pool(pool, cpu) { init_cpu_worker_pool(pool, cpu, std_nice[i]); pool->flags |= POOL_BH; init_irq_work(bh_pool_irq_work(pool), irq_work_fns[i]); i++; } i = 0; for_each_cpu_worker_pool(pool, cpu) init_cpu_worker_pool(pool, cpu, std_nice[i++]); } /* create default unbound and ordered wq attrs */ for (i = 0; i < NR_STD_WORKER_POOLS; i++) { struct workqueue_attrs *attrs; BUG_ON(!(attrs = alloc_workqueue_attrs())); attrs->nice = std_nice[i]; unbound_std_wq_attrs[i] = attrs; /* * An ordered wq should have only one pwq as ordering is * guaranteed by max_active which is enforced by pwqs. */ BUG_ON(!(attrs = alloc_workqueue_attrs())); attrs->nice = std_nice[i]; attrs->ordered = true; ordered_wq_attrs[i] = attrs; } system_wq = alloc_workqueue("events", 0, 0); system_percpu_wq = alloc_workqueue("events", 0, 0); system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0); system_long_wq = alloc_workqueue("events_long", 0, 0); system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE); system_dfl_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE); system_freezable_wq = alloc_workqueue("events_freezable", WQ_FREEZABLE, 0); system_power_efficient_wq = alloc_workqueue("events_power_efficient", WQ_POWER_EFFICIENT, 0); system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_pwr_efficient", WQ_FREEZABLE | WQ_POWER_EFFICIENT, 0); system_bh_wq = alloc_workqueue("events_bh", WQ_BH, 0); system_bh_highpri_wq = alloc_workqueue("events_bh_highpri", WQ_BH | WQ_HIGHPRI, 0); BUG_ON(!system_wq || !system_percpu_wq|| !system_highpri_wq || !system_long_wq || !system_unbound_wq || !system_freezable_wq || !system_dfl_wq || !system_power_efficient_wq || !system_freezable_power_efficient_wq || !system_bh_wq || !system_bh_highpri_wq); } static void __init wq_cpu_intensive_thresh_init(void) { unsigned long thresh; unsigned long bogo; pwq_release_worker = kthread_run_worker(0, "pool_workqueue_release"); BUG_ON(IS_ERR(pwq_release_worker)); /* if the user set it to a specific value, keep it */ if (wq_cpu_intensive_thresh_us != ULONG_MAX) return; /* * The default of 10ms is derived from the fact that most modern (as of * 2023) processors can do a lot in 10ms and that it's just below what * most consider human-perceivable. However, the kernel also runs on a * lot slower CPUs including microcontrollers where the threshold is way * too low. * * Let's scale up the threshold upto 1 second if BogoMips is below 4000. * This is by no means accurate but it doesn't have to be. The mechanism * is still useful even when the threshold is fully scaled up. Also, as * the reports would usually be applicable to everyone, some machines * operating on longer thresholds won't significantly diminish their * usefulness. */ thresh = 10 * USEC_PER_MSEC; /* see init/calibrate.c for lpj -> BogoMIPS calculation */ bogo = max_t(unsigned long, loops_per_jiffy / 500000 * HZ, 1); if (bogo < 4000) thresh = min_t(unsigned long, thresh * 4000 / bogo, USEC_PER_SEC); pr_debug("wq_cpu_intensive_thresh: lpj=%lu BogoMIPS=%lu thresh_us=%lu\n", loops_per_jiffy, bogo, thresh); wq_cpu_intensive_thresh_us = thresh; } /** * workqueue_init - bring workqueue subsystem fully online * * This is the second step of three-staged workqueue subsystem initialization * and invoked as soon as kthreads can be created and scheduled. Workqueues have * been created and work items queued on them, but there are no kworkers * executing the work items yet. Populate the worker pools with the initial * workers and enable future kworker creations. */ void __init workqueue_init(void) { struct workqueue_struct *wq; struct worker_pool *pool; int cpu, bkt; wq_cpu_intensive_thresh_init(); mutex_lock(&wq_pool_mutex); /* * Per-cpu pools created earlier could be missing node hint. Fix them * up. Also, create a rescuer for workqueues that requested it. */ for_each_possible_cpu(cpu) { for_each_bh_worker_pool(pool, cpu) pool->node = cpu_to_node(cpu); for_each_cpu_worker_pool(pool, cpu) pool->node = cpu_to_node(cpu); } list_for_each_entry(wq, &workqueues, list) { WARN(init_rescuer(wq), "workqueue: failed to create early rescuer for %s", wq->name); } mutex_unlock(&wq_pool_mutex); /* * Create the initial workers. A BH pool has one pseudo worker that * represents the shared BH execution context and thus doesn't get * affected by hotplug events. Create the BH pseudo workers for all * possible CPUs here. */ for_each_possible_cpu(cpu) for_each_bh_worker_pool(pool, cpu) BUG_ON(!create_worker(pool)); for_each_online_cpu(cpu) { for_each_cpu_worker_pool(pool, cpu) { pool->flags &= ~POOL_DISASSOCIATED; BUG_ON(!create_worker(pool)); } } hash_for_each(unbound_pool_hash, bkt, pool, hash_node) BUG_ON(!create_worker(pool)); wq_online = true; wq_watchdog_init(); } /* * Initialize @pt by first initializing @pt->cpu_pod[] with pod IDs according to * @cpu_shares_pod(). Each subset of CPUs that share a pod is assigned a unique * and consecutive pod ID. The rest of @pt is initialized accordingly. */ static void __init init_pod_type(struct wq_pod_type *pt, bool (*cpus_share_pod)(int, int)) { int cur, pre, cpu, pod; pt->nr_pods = 0; /* init @pt->cpu_pod[] according to @cpus_share_pod() */ pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL); BUG_ON(!pt->cpu_pod); for_each_possible_cpu(cur) { for_each_possible_cpu(pre) { if (pre >= cur) { pt->cpu_pod[cur] = pt->nr_pods++; break; } if (cpus_share_pod(cur, pre)) { pt->cpu_pod[cur] = pt->cpu_pod[pre]; break; } } } /* init the rest to match @pt->cpu_pod[] */ pt->pod_cpus = kcalloc(pt->nr_pods, sizeof(pt->pod_cpus[0]), GFP_KERNEL); pt->pod_node = kcalloc(pt->nr_pods, sizeof(pt->pod_node[0]), GFP_KERNEL); BUG_ON(!pt->pod_cpus || !pt->pod_node); for (pod = 0; pod < pt->nr_pods; pod++) BUG_ON(!zalloc_cpumask_var(&pt->pod_cpus[pod], GFP_KERNEL)); for_each_possible_cpu(cpu) { cpumask_set_cpu(cpu, pt->pod_cpus[pt->cpu_pod[cpu]]); pt->pod_node[pt->cpu_pod[cpu]] = cpu_to_node(cpu); } } static bool __init cpus_dont_share(int cpu0, int cpu1) { return false; } static bool __init cpus_share_smt(int cpu0, int cpu1) { #ifdef CONFIG_SCHED_SMT return cpumask_test_cpu(cpu0, cpu_smt_mask(cpu1)); #else return false; #endif } static bool __init cpus_share_numa(int cpu0, int cpu1) { return cpu_to_node(cpu0) == cpu_to_node(cpu1); } /** * workqueue_init_topology - initialize CPU pods for unbound workqueues * * This is the third step of three-staged workqueue subsystem initialization and * invoked after SMP and topology information are fully initialized. It * initializes the unbound CPU pods accordingly. */ void __init workqueue_init_topology(void) { struct workqueue_struct *wq; int cpu; init_pod_type(&wq_pod_types[WQ_AFFN_CPU], cpus_dont_share); init_pod_type(&wq_pod_types[WQ_AFFN_SMT], cpus_share_smt); init_pod_type(&wq_pod_types[WQ_AFFN_CACHE], cpus_share_cache); init_pod_type(&wq_pod_types[WQ_AFFN_NUMA], cpus_share_numa); wq_topo_initialized = true; mutex_lock(&wq_pool_mutex); /* * Workqueues allocated earlier would have all CPUs sharing the default * worker pool. Explicitly call unbound_wq_update_pwq() on all workqueue * and CPU combinations to apply per-pod sharing. */ list_for_each_entry(wq, &workqueues, list) { for_each_online_cpu(cpu) unbound_wq_update_pwq(wq, cpu); if (wq->flags & WQ_UNBOUND) { mutex_lock(&wq->mutex); wq_update_node_max_active(wq, -1); mutex_unlock(&wq->mutex); } } mutex_unlock(&wq_pool_mutex); } void __warn_flushing_systemwide_wq(void) { pr_warn("WARNING: Flushing system-wide workqueues will be prohibited in near future.\n"); dump_stack(); } EXPORT_SYMBOL(__warn_flushing_systemwide_wq); static int __init workqueue_unbound_cpus_setup(char *str) { if (cpulist_parse(str, &wq_cmdline_cpumask) < 0) { cpumask_clear(&wq_cmdline_cpumask); pr_warn("workqueue.unbound_cpus: incorrect CPU range, using default\n"); } return 1; } __setup("workqueue.unbound_cpus=", workqueue_unbound_cpus_setup);
554 556 558 554 421 311 50 66 538 66 66 11 11 11 7 13 66 335 17 6 14 11 11 11 11 13 4 2 6 7 7 535 235 423 213 117 332 118 423 422 13 13 423 423 538 2576 2580 117 414 413 411 213 215 423 14 14 14 422 421 422 43 44 44 309 312 2 313 312 6 6 30 30 30 2 2 6 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 // SPDX-License-Identifier: GPL-2.0 /* * drivers/base/devres.c - device resource management * * Copyright (c) 2006 SUSE Linux Products GmbH * Copyright (c) 2006 Tejun Heo <teheo@suse.de> */ #include <linux/device.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/percpu.h> #include <asm/sections.h> #include "base.h" #include "trace.h" struct devres_node { struct list_head entry; dr_release_t release; const char *name; size_t size; }; struct devres { struct devres_node node; /* * Some archs want to perform DMA into kmalloc caches * and need a guaranteed alignment larger than * the alignment of a 64-bit integer. * Thus we use ARCH_DMA_MINALIGN for data[] which will force the same * alignment for struct devres when allocated by kmalloc(). */ u8 __aligned(ARCH_DMA_MINALIGN) data[]; }; struct devres_group { struct devres_node node[2]; void *id; int color; /* -- 8 pointers */ }; static void set_node_dbginfo(struct devres_node *node, const char *name, size_t size) { node->name = name; node->size = size; } #ifdef CONFIG_DEBUG_DEVRES static int log_devres = 0; module_param_named(log, log_devres, int, S_IRUGO | S_IWUSR); static void devres_dbg(struct device *dev, struct devres_node *node, const char *op) { if (unlikely(log_devres)) dev_err(dev, "DEVRES %3s %p %s (%zu bytes)\n", op, node, node->name, node->size); } #else /* CONFIG_DEBUG_DEVRES */ #define devres_dbg(dev, node, op) do {} while (0) #endif /* CONFIG_DEBUG_DEVRES */ static void devres_log(struct device *dev, struct devres_node *node, const char *op) { trace_devres_log(dev, op, node, node->name, node->size); devres_dbg(dev, node, op); } /* * Release functions for devres group. These callbacks are used only * for identification. */ static void group_open_release(struct device *dev, void *res) { /* noop */ } static void group_close_release(struct device *dev, void *res) { /* noop */ } static struct devres_group *node_to_group(struct devres_node *node) { if (node->release == &group_open_release) return container_of(node, struct devres_group, node[0]); if (node->release == &group_close_release) return container_of(node, struct devres_group, node[1]); return NULL; } static bool check_dr_size(size_t size, size_t *tot_size) { /* We must catch any near-SIZE_MAX cases that could overflow. */ if (unlikely(check_add_overflow(sizeof(struct devres), size, tot_size))) return false; /* Actually allocate the full kmalloc bucket size. */ *tot_size = kmalloc_size_roundup(*tot_size); return true; } static __always_inline struct devres *alloc_dr(dr_release_t release, size_t size, gfp_t gfp, int nid) { size_t tot_size; struct devres *dr; if (!check_dr_size(size, &tot_size)) return NULL; dr = kmalloc_node_track_caller(tot_size, gfp, nid); if (unlikely(!dr)) return NULL; /* No need to clear memory twice */ if (!(gfp & __GFP_ZERO)) memset(dr, 0, offsetof(struct devres, data)); INIT_LIST_HEAD(&dr->node.entry); dr->node.release = release; return dr; } static void add_dr(struct device *dev, struct devres_node *node) { devres_log(dev, node, "ADD"); BUG_ON(!list_empty(&node->entry)); list_add_tail(&node->entry, &dev->devres_head); } static void replace_dr(struct device *dev, struct devres_node *old, struct devres_node *new) { devres_log(dev, old, "REPLACE"); BUG_ON(!list_empty(&new->entry)); list_replace(&old->entry, &new->entry); } /** * __devres_alloc_node - Allocate device resource data * @release: Release function devres will be associated with * @size: Allocation size * @gfp: Allocation flags * @nid: NUMA node * @name: Name of the resource * * Allocate devres of @size bytes. The allocated area is zeroed, then * associated with @release. The returned pointer can be passed to * other devres_*() functions. * * RETURNS: * Pointer to allocated devres on success, NULL on failure. */ void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid, const char *name) { struct devres *dr; dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid); if (unlikely(!dr)) return NULL; set_node_dbginfo(&dr->node, name, size); return dr->data; } EXPORT_SYMBOL_GPL(__devres_alloc_node); /** * devres_for_each_res - Resource iterator * @dev: Device to iterate resource from * @release: Look for resources associated with this release function * @match: Match function (optional) * @match_data: Data for the match function * @fn: Function to be called for each matched resource. * @data: Data for @fn, the 3rd parameter of @fn * * Call @fn for each devres of @dev which is associated with @release * and for which @match returns 1. * * RETURNS: * void */ void devres_for_each_res(struct device *dev, dr_release_t release, dr_match_t match, void *match_data, void (*fn)(struct device *, void *, void *), void *data) { struct devres_node *node; struct devres_node *tmp; unsigned long flags; if (!fn) return; spin_lock_irqsave(&dev->devres_lock, flags); list_for_each_entry_safe_reverse(node, tmp, &dev->devres_head, entry) { struct devres *dr = container_of(node, struct devres, node); if (node->release != release) continue; if (match && !match(dev, dr->data, match_data)) continue; fn(dev, dr->data, data); } spin_unlock_irqrestore(&dev->devres_lock, flags); } EXPORT_SYMBOL_GPL(devres_for_each_res); /** * devres_free - Free device resource data * @res: Pointer to devres data to free * * Free devres created with devres_alloc(). */ void devres_free(void *res) { if (res) { struct devres *dr = container_of(res, struct devres, data); BUG_ON(!list_empty(&dr->node.entry)); kfree(dr); } } EXPORT_SYMBOL_GPL(devres_free); /** * devres_add - Register device resource * @dev: Device to add resource to * @res: Resource to register * * Register devres @res to @dev. @res should have been allocated * using devres_alloc(). On driver detach, the associated release * function will be invoked and devres will be freed automatically. */ void devres_add(struct device *dev, void *res) { struct devres *dr = container_of(res, struct devres, data); unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); add_dr(dev, &dr->node); spin_unlock_irqrestore(&dev->devres_lock, flags); } EXPORT_SYMBOL_GPL(devres_add); static struct devres *find_dr(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { struct devres_node *node; list_for_each_entry_reverse(node, &dev->devres_head, entry) { struct devres *dr = container_of(node, struct devres, node); if (node->release != release) continue; if (match && !match(dev, dr->data, match_data)) continue; return dr; } return NULL; } /** * devres_find - Find device resource * @dev: Device to lookup resource from * @release: Look for resources associated with this release function * @match: Match function (optional) * @match_data: Data for the match function * * Find the latest devres of @dev which is associated with @release * and for which @match returns 1. If @match is NULL, it's considered * to match all. * * RETURNS: * Pointer to found devres, NULL if not found. */ void *devres_find(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { struct devres *dr; unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); dr = find_dr(dev, release, match, match_data); spin_unlock_irqrestore(&dev->devres_lock, flags); if (dr) return dr->data; return NULL; } EXPORT_SYMBOL_GPL(devres_find); /** * devres_get - Find devres, if non-existent, add one atomically * @dev: Device to lookup or add devres for * @new_res: Pointer to new initialized devres to add if not found * @match: Match function (optional) * @match_data: Data for the match function * * Find the latest devres of @dev which has the same release function * as @new_res and for which @match return 1. If found, @new_res is * freed; otherwise, @new_res is added atomically. * * RETURNS: * Pointer to found or added devres. */ void *devres_get(struct device *dev, void *new_res, dr_match_t match, void *match_data) { struct devres *new_dr = container_of(new_res, struct devres, data); struct devres *dr; unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); dr = find_dr(dev, new_dr->node.release, match, match_data); if (!dr) { add_dr(dev, &new_dr->node); dr = new_dr; new_res = NULL; } spin_unlock_irqrestore(&dev->devres_lock, flags); devres_free(new_res); return dr->data; } EXPORT_SYMBOL_GPL(devres_get); /** * devres_remove - Find a device resource and remove it * @dev: Device to find resource from * @release: Look for resources associated with this release function * @match: Match function (optional) * @match_data: Data for the match function * * Find the latest devres of @dev associated with @release and for * which @match returns 1. If @match is NULL, it's considered to * match all. If found, the resource is removed atomically and * returned. * * RETURNS: * Pointer to removed devres on success, NULL if not found. */ void *devres_remove(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { struct devres *dr; unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); dr = find_dr(dev, release, match, match_data); if (dr) { list_del_init(&dr->node.entry); devres_log(dev, &dr->node, "REM"); } spin_unlock_irqrestore(&dev->devres_lock, flags); if (dr) return dr->data; return NULL; } EXPORT_SYMBOL_GPL(devres_remove); /** * devres_destroy - Find a device resource and destroy it * @dev: Device to find resource from * @release: Look for resources associated with this release function * @match: Match function (optional) * @match_data: Data for the match function * * Find the latest devres of @dev associated with @release and for * which @match returns 1. If @match is NULL, it's considered to * match all. If found, the resource is removed atomically and freed. * * Note that the release function for the resource will not be called, * only the devres-allocated data will be freed. The caller becomes * responsible for freeing any other data. * * RETURNS: * 0 if devres is found and freed, -ENOENT if not found. */ int devres_destroy(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { void *res; res = devres_remove(dev, release, match, match_data); if (unlikely(!res)) return -ENOENT; devres_free(res); return 0; } EXPORT_SYMBOL_GPL(devres_destroy); /** * devres_release - Find a device resource and destroy it, calling release * @dev: Device to find resource from * @release: Look for resources associated with this release function * @match: Match function (optional) * @match_data: Data for the match function * * Find the latest devres of @dev associated with @release and for * which @match returns 1. If @match is NULL, it's considered to * match all. If found, the resource is removed atomically, the * release function called and the resource freed. * * RETURNS: * 0 if devres is found and freed, -ENOENT if not found. */ int devres_release(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { void *res; res = devres_remove(dev, release, match, match_data); if (unlikely(!res)) return -ENOENT; (*release)(dev, res); devres_free(res); return 0; } EXPORT_SYMBOL_GPL(devres_release); static int remove_nodes(struct device *dev, struct list_head *first, struct list_head *end, struct list_head *todo) { struct devres_node *node, *n; int cnt = 0, nr_groups = 0; /* First pass - move normal devres entries to @todo and clear * devres_group colors. */ node = list_entry(first, struct devres_node, entry); list_for_each_entry_safe_from(node, n, end, entry) { struct devres_group *grp; grp = node_to_group(node); if (grp) { /* clear color of group markers in the first pass */ grp->color = 0; nr_groups++; } else { /* regular devres entry */ if (&node->entry == first) first = first->next; list_move_tail(&node->entry, todo); cnt++; } } if (!nr_groups) return cnt; /* Second pass - Scan groups and color them. A group gets * color value of two iff the group is wholly contained in * [current node, end). That is, for a closed group, both opening * and closing markers should be in the range, while just the * opening marker is enough for an open group. */ node = list_entry(first, struct devres_node, entry); list_for_each_entry_safe_from(node, n, end, entry) { struct devres_group *grp; grp = node_to_group(node); BUG_ON(!grp || list_empty(&grp->node[0].entry)); grp->color++; if (list_empty(&grp->node[1].entry)) grp->color++; BUG_ON(grp->color <= 0 || grp->color > 2); if (grp->color == 2) { /* No need to update current node or end. The removed * nodes are always before both. */ list_move_tail(&grp->node[0].entry, todo); list_del_init(&grp->node[1].entry); } } return cnt; } static void release_nodes(struct device *dev, struct list_head *todo) { struct devres *dr, *tmp; /* Release. Note that both devres and devres_group are * handled as devres in the following loop. This is safe. */ list_for_each_entry_safe_reverse(dr, tmp, todo, node.entry) { devres_log(dev, &dr->node, "REL"); dr->node.release(dev, dr->data); kfree(dr); } } /** * devres_release_all - Release all managed resources * @dev: Device to release resources for * * Release all resources associated with @dev. This function is * called on driver detach. */ int devres_release_all(struct device *dev) { unsigned long flags; LIST_HEAD(todo); int cnt; /* Looks like an uninitialized device structure */ if (WARN_ON(dev->devres_head.next == NULL)) return -ENODEV; /* Nothing to release if list is empty */ if (list_empty(&dev->devres_head)) return 0; spin_lock_irqsave(&dev->devres_lock, flags); cnt = remove_nodes(dev, dev->devres_head.next, &dev->devres_head, &todo); spin_unlock_irqrestore(&dev->devres_lock, flags); release_nodes(dev, &todo); return cnt; } /** * devres_open_group - Open a new devres group * @dev: Device to open devres group for * @id: Separator ID * @gfp: Allocation flags * * Open a new devres group for @dev with @id. For @id, using a * pointer to an object which won't be used for another group is * recommended. If @id is NULL, address-wise unique ID is created. * * RETURNS: * ID of the new group, NULL on failure. */ void *devres_open_group(struct device *dev, void *id, gfp_t gfp) { struct devres_group *grp; unsigned long flags; grp = kmalloc(sizeof(*grp), gfp); if (unlikely(!grp)) return NULL; grp->node[0].release = &group_open_release; grp->node[1].release = &group_close_release; INIT_LIST_HEAD(&grp->node[0].entry); INIT_LIST_HEAD(&grp->node[1].entry); set_node_dbginfo(&grp->node[0], "grp<", 0); set_node_dbginfo(&grp->node[1], "grp>", 0); grp->id = grp; if (id) grp->id = id; grp->color = 0; spin_lock_irqsave(&dev->devres_lock, flags); add_dr(dev, &grp->node[0]); spin_unlock_irqrestore(&dev->devres_lock, flags); return grp->id; } EXPORT_SYMBOL_GPL(devres_open_group); /* * Find devres group with ID @id. If @id is NULL, look for the latest open * group. */ static struct devres_group *find_group(struct device *dev, void *id) { struct devres_node *node; list_for_each_entry_reverse(node, &dev->devres_head, entry) { struct devres_group *grp; if (node->release != &group_open_release) continue; grp = container_of(node, struct devres_group, node[0]); if (id) { if (grp->id == id) return grp; } else if (list_empty(&grp->node[1].entry)) return grp; } return NULL; } /** * devres_close_group - Close a devres group * @dev: Device to close devres group for * @id: ID of target group, can be NULL * * Close the group identified by @id. If @id is NULL, the latest open * group is selected. */ void devres_close_group(struct device *dev, void *id) { struct devres_group *grp; unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); grp = find_group(dev, id); if (grp) add_dr(dev, &grp->node[1]); else WARN_ON(1); spin_unlock_irqrestore(&dev->devres_lock, flags); } EXPORT_SYMBOL_GPL(devres_close_group); /** * devres_remove_group - Remove a devres group * @dev: Device to remove group for * @id: ID of target group, can be NULL * * Remove the group identified by @id. If @id is NULL, the latest * open group is selected. Note that removing a group doesn't affect * any other resources. */ void devres_remove_group(struct device *dev, void *id) { struct devres_group *grp; unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); grp = find_group(dev, id); if (grp) { list_del_init(&grp->node[0].entry); list_del_init(&grp->node[1].entry); devres_log(dev, &grp->node[0], "REM"); } else WARN_ON(1); spin_unlock_irqrestore(&dev->devres_lock, flags); kfree(grp); } EXPORT_SYMBOL_GPL(devres_remove_group); /** * devres_release_group - Release resources in a devres group * @dev: Device to release group for * @id: ID of target group, can be NULL * * Release all resources in the group identified by @id. If @id is * NULL, the latest open group is selected. The selected group and * groups properly nested inside the selected group are removed. * * RETURNS: * The number of released non-group resources. */ int devres_release_group(struct device *dev, void *id) { struct devres_group *grp; unsigned long flags; LIST_HEAD(todo); int cnt = 0; spin_lock_irqsave(&dev->devres_lock, flags); grp = find_group(dev, id); if (grp) { struct list_head *first = &grp->node[0].entry; struct list_head *end = &dev->devres_head; if (!list_empty(&grp->node[1].entry)) end = grp->node[1].entry.next; cnt = remove_nodes(dev, first, end, &todo); spin_unlock_irqrestore(&dev->devres_lock, flags); release_nodes(dev, &todo); } else if (list_empty(&dev->devres_head)) { /* * dev is probably dying via devres_release_all(): groups * have already been removed and are on the process of * being released - don't touch and don't warn. */ spin_unlock_irqrestore(&dev->devres_lock, flags); } else { WARN_ON(1); spin_unlock_irqrestore(&dev->devres_lock, flags); } return cnt; } EXPORT_SYMBOL_GPL(devres_release_group); /* * Custom devres actions allow inserting a simple function call * into the teardown sequence. */ struct action_devres { void *data; void (*action)(void *); }; static int devm_action_match(struct device *dev, void *res, void *p) { struct action_devres *devres = res; struct action_devres *target = p; return devres->action == target->action && devres->data == target->data; } static void devm_action_release(struct device *dev, void *res) { struct action_devres *devres = res; devres->action(devres->data); } /** * __devm_add_action() - add a custom action to list of managed resources * @dev: Device that owns the action * @action: Function that should be called * @data: Pointer to data passed to @action implementation * @name: Name of the resource (for debugging purposes) * * This adds a custom action to the list of managed resources so that * it gets executed as part of standard resource unwinding. */ int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name) { struct action_devres *devres; devres = __devres_alloc_node(devm_action_release, sizeof(struct action_devres), GFP_KERNEL, NUMA_NO_NODE, name); if (!devres) return -ENOMEM; devres->data = data; devres->action = action; devres_add(dev, devres); return 0; } EXPORT_SYMBOL_GPL(__devm_add_action); bool devm_is_action_added(struct device *dev, void (*action)(void *), void *data) { struct action_devres devres = { .data = data, .action = action, }; return devres_find(dev, devm_action_release, devm_action_match, &devres); } EXPORT_SYMBOL_GPL(devm_is_action_added); /** * devm_remove_action_nowarn() - removes previously added custom action * @dev: Device that owns the action * @action: Function implementing the action * @data: Pointer to data passed to @action implementation * * Removes instance of @action previously added by devm_add_action(). * Both action and data should match one of the existing entries. * * In contrast to devm_remove_action(), this function does not WARN() if no * entry could have been found. * * This should only be used if the action is contained in an object with * independent lifetime management, e.g. the Devres rust abstraction. * * Causing the warning from regular driver code most likely indicates an abuse * of the devres API. * * Returns: 0 on success, -ENOENT if no entry could have been found. */ int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data) { struct action_devres devres = { .data = data, .action = action, }; return devres_destroy(dev, devm_action_release, devm_action_match, &devres); } EXPORT_SYMBOL_GPL(devm_remove_action_nowarn); /** * devm_release_action() - release previously added custom action * @dev: Device that owns the action * @action: Function implementing the action * @data: Pointer to data passed to @action implementation * * Releases and removes instance of @action previously added by * devm_add_action(). Both action and data should match one of the * existing entries. */ void devm_release_action(struct device *dev, void (*action)(void *), void *data) { struct action_devres devres = { .data = data, .action = action, }; WARN_ON(devres_release(dev, devm_action_release, devm_action_match, &devres)); } EXPORT_SYMBOL_GPL(devm_release_action); /* * Managed kmalloc/kfree */ static void devm_kmalloc_release(struct device *dev, void *res) { /* noop */ } static int devm_kmalloc_match(struct device *dev, void *res, void *data) { return res == data; } /** * devm_kmalloc - Resource-managed kmalloc * @dev: Device to allocate memory for * @size: Allocation size * @gfp: Allocation gfp flags * * Managed kmalloc. Memory allocated with this function is * automatically freed on driver detach. Like all other devres * resources, guaranteed alignment is unsigned long long. * * RETURNS: * Pointer to allocated memory on success, NULL on failure. */ void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) { struct devres *dr; if (unlikely(!size)) return ZERO_SIZE_PTR; /* use raw alloc_dr for kmalloc caller tracing */ dr = alloc_dr(devm_kmalloc_release, size, gfp, dev_to_node(dev)); if (unlikely(!dr)) return NULL; /* * This is named devm_kzalloc_release for historical reasons * The initial implementation did not support kmalloc, only kzalloc */ set_node_dbginfo(&dr->node, "devm_kzalloc_release", size); devres_add(dev, dr->data); return dr->data; } EXPORT_SYMBOL_GPL(devm_kmalloc); /** * devm_krealloc - Resource-managed krealloc() * @dev: Device to re-allocate memory for * @ptr: Pointer to the memory chunk to re-allocate * @new_size: New allocation size * @gfp: Allocation gfp flags * * Managed krealloc(). Resizes the memory chunk allocated with devm_kmalloc(). * Behaves similarly to regular krealloc(): if @ptr is NULL or ZERO_SIZE_PTR, * it's the equivalent of devm_kmalloc(). If new_size is zero, it frees the * previously allocated memory and returns ZERO_SIZE_PTR. This function doesn't * change the order in which the release callback for the re-alloc'ed devres * will be called (except when falling back to devm_kmalloc() or when freeing * resources when new_size is zero). The contents of the memory are preserved * up to the lesser of new and old sizes. */ void *devm_krealloc(struct device *dev, void *ptr, size_t new_size, gfp_t gfp) { size_t total_new_size, total_old_size; struct devres *old_dr, *new_dr; unsigned long flags; if (unlikely(!new_size)) { devm_kfree(dev, ptr); return ZERO_SIZE_PTR; } if (unlikely(ZERO_OR_NULL_PTR(ptr))) return devm_kmalloc(dev, new_size, gfp); if (WARN_ON(is_kernel_rodata((unsigned long)ptr))) /* * We cannot reliably realloc a const string returned by * devm_kstrdup_const(). */ return NULL; if (!check_dr_size(new_size, &total_new_size)) return NULL; total_old_size = ksize(container_of(ptr, struct devres, data)); if (total_old_size == 0) { WARN(1, "Pointer doesn't point to dynamically allocated memory."); return NULL; } /* * If new size is smaller or equal to the actual number of bytes * allocated previously - just return the same pointer. */ if (total_new_size <= total_old_size) return ptr; /* * Otherwise: allocate new, larger chunk. We need to allocate before * taking the lock as most probably the caller uses GFP_KERNEL. * alloc_dr() will call check_dr_size() to reserve extra memory * for struct devres automatically, so size @new_size user request * is delivered to it directly as devm_kmalloc() does. */ new_dr = alloc_dr(devm_kmalloc_release, new_size, gfp, dev_to_node(dev)); if (!new_dr) return NULL; /* * The spinlock protects the linked list against concurrent * modifications but not the resource itself. */ spin_lock_irqsave(&dev->devres_lock, flags); old_dr = find_dr(dev, devm_kmalloc_release, devm_kmalloc_match, ptr); if (!old_dr) { spin_unlock_irqrestore(&dev->devres_lock, flags); kfree(new_dr); WARN(1, "Memory chunk not managed or managed by a different device."); return NULL; } replace_dr(dev, &old_dr->node, &new_dr->node); spin_unlock_irqrestore(&dev->devres_lock, flags); /* * We can copy the memory contents after releasing the lock as we're * no longer modifying the list links. */ memcpy(new_dr->data, old_dr->data, total_old_size - offsetof(struct devres, data)); /* * Same for releasing the old devres - it's now been removed from the * list. This is also the reason why we must not use devm_kfree() - the * links are no longer valid. */ kfree(old_dr); return new_dr->data; } EXPORT_SYMBOL_GPL(devm_krealloc); /** * devm_kstrdup - Allocate resource managed space and * copy an existing string into that. * @dev: Device to allocate memory for * @s: the string to duplicate * @gfp: the GFP mask used in the devm_kmalloc() call when * allocating memory * RETURNS: * Pointer to allocated string on success, NULL on failure. */ char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) { if (!s) return NULL; return devm_kmemdup(dev, s, strlen(s) + 1, gfp); } EXPORT_SYMBOL_GPL(devm_kstrdup); /** * devm_kstrdup_const - resource managed conditional string duplication * @dev: device for which to duplicate the string * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory * * Strings allocated by devm_kstrdup_const will be automatically freed when * the associated device is detached. * * RETURNS: * Source string if it is in .rodata section otherwise it falls back to * devm_kstrdup. */ const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp) { if (is_kernel_rodata((unsigned long)s)) return s; return devm_kstrdup(dev, s, gfp); } EXPORT_SYMBOL_GPL(devm_kstrdup_const); /** * devm_kvasprintf - Allocate resource managed space and format a string * into that. * @dev: Device to allocate memory for * @gfp: the GFP mask used in the devm_kmalloc() call when * allocating memory * @fmt: The printf()-style format string * @ap: Arguments for the format string * RETURNS: * Pointer to allocated string on success, NULL on failure. */ char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, va_list ap) { unsigned int len; char *p; va_list aq; va_copy(aq, ap); len = vsnprintf(NULL, 0, fmt, aq); va_end(aq); p = devm_kmalloc(dev, len+1, gfp); if (!p) return NULL; vsnprintf(p, len+1, fmt, ap); return p; } EXPORT_SYMBOL(devm_kvasprintf); /** * devm_kasprintf - Allocate resource managed space and format a string * into that. * @dev: Device to allocate memory for * @gfp: the GFP mask used in the devm_kmalloc() call when * allocating memory * @fmt: The printf()-style format string * @...: Arguments for the format string * RETURNS: * Pointer to allocated string on success, NULL on failure. */ char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...) { va_list ap; char *p; va_start(ap, fmt); p = devm_kvasprintf(dev, gfp, fmt, ap); va_end(ap); return p; } EXPORT_SYMBOL_GPL(devm_kasprintf); /** * devm_kfree - Resource-managed kfree * @dev: Device this memory belongs to * @p: Memory to free * * Free memory allocated with devm_kmalloc(). */ void devm_kfree(struct device *dev, const void *p) { int rc; /* * Special cases: pointer to a string in .rodata returned by * devm_kstrdup_const() or NULL/ZERO ptr. */ if (unlikely(is_kernel_rodata((unsigned long)p) || ZERO_OR_NULL_PTR(p))) return; rc = devres_destroy(dev, devm_kmalloc_release, devm_kmalloc_match, (void *)p); WARN_ON(rc); } EXPORT_SYMBOL_GPL(devm_kfree); /** * devm_kmemdup - Resource-managed kmemdup * @dev: Device this memory belongs to * @src: Memory region to duplicate * @len: Memory region length * @gfp: GFP mask to use * * Duplicate region of a memory using resource managed kmalloc */ void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp) { void *p; p = devm_kmalloc(dev, len, gfp); if (p) memcpy(p, src, len); return p; } EXPORT_SYMBOL_GPL(devm_kmemdup); struct pages_devres { unsigned long addr; unsigned int order; }; static int devm_pages_match(struct device *dev, void *res, void *p) { struct pages_devres *devres = res; struct pages_devres *target = p; return devres->addr == target->addr; } static void devm_pages_release(struct device *dev, void *res) { struct pages_devres *devres = res; free_pages(devres->addr, devres->order); } /** * devm_get_free_pages - Resource-managed __get_free_pages * @dev: Device to allocate memory for * @gfp_mask: Allocation gfp flags * @order: Allocation size is (1 << order) pages * * Managed get_free_pages. Memory allocated with this function is * automatically freed on driver detach. * * RETURNS: * Address of allocated memory on success, 0 on failure. */ unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order) { struct pages_devres *devres; unsigned long addr; addr = __get_free_pages(gfp_mask, order); if (unlikely(!addr)) return 0; devres = devres_alloc(devm_pages_release, sizeof(struct pages_devres), GFP_KERNEL); if (unlikely(!devres)) { free_pages(addr, order); return 0; } devres->addr = addr; devres->order = order; devres_add(dev, devres); return addr; } EXPORT_SYMBOL_GPL(devm_get_free_pages); /** * devm_free_pages - Resource-managed free_pages * @dev: Device this memory belongs to * @addr: Memory to free * * Free memory allocated with devm_get_free_pages(). Unlike free_pages, * there is no need to supply the @order. */ void devm_free_pages(struct device *dev, unsigned long addr) { struct pages_devres devres = { .addr = addr }; WARN_ON(devres_release(dev, devm_pages_release, devm_pages_match, &devres)); } EXPORT_SYMBOL_GPL(devm_free_pages); static void devm_percpu_release(struct device *dev, void *pdata) { void __percpu *p; p = *(void __percpu **)pdata; free_percpu(p); } static int devm_percpu_match(struct device *dev, void *data, void *p) { struct devres *devr = container_of(data, struct devres, data); return *(void **)devr->data == p; } /** * __devm_alloc_percpu - Resource-managed alloc_percpu * @dev: Device to allocate per-cpu memory for * @size: Size of per-cpu memory to allocate * @align: Alignment of per-cpu memory to allocate * * Managed alloc_percpu. Per-cpu memory allocated with this function is * automatically freed on driver detach. * * RETURNS: * Pointer to allocated memory on success, NULL on failure. */ void __percpu *__devm_alloc_percpu(struct device *dev, size_t size, size_t align) { void *p; void __percpu *pcpu; pcpu = __alloc_percpu(size, align); if (!pcpu) return NULL; p = devres_alloc(devm_percpu_release, sizeof(void *), GFP_KERNEL); if (!p) { free_percpu(pcpu); return NULL; } *(void __percpu **)p = pcpu; devres_add(dev, p); return pcpu; } EXPORT_SYMBOL_GPL(__devm_alloc_percpu); /** * devm_free_percpu - Resource-managed free_percpu * @dev: Device this memory belongs to * @pdata: Per-cpu memory to free * * Free memory allocated with devm_alloc_percpu(). */ void devm_free_percpu(struct device *dev, void __percpu *pdata) { /* * Use devres_release() to prevent memory leakage as * devm_free_pages() does. */ WARN_ON(devres_release(dev, devm_percpu_release, devm_percpu_match, (void *)(__force unsigned long)pdata)); } EXPORT_SYMBOL_GPL(devm_free_percpu);
7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 // SPDX-License-Identifier: LGPL-2.1 /* * Copyright (c) 2012 Taobao. * Written by Tao Ma <boyu.mt@taobao.com> */ #include <linux/iomap.h> #include <linux/fiemap.h> #include <linux/namei.h> #include <linux/iversion.h> #include <linux/sched/mm.h> #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" #include "truncate.h" #define EXT4_XATTR_SYSTEM_DATA "data" #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) #define EXT4_INLINE_DOTDOT_OFFSET 2 #define EXT4_INLINE_DOTDOT_SIZE 4 static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode, void **fsdata); static int ext4_get_inline_size(struct inode *inode) { if (EXT4_I(inode)->i_inline_off) return EXT4_I(inode)->i_inline_size; return 0; } static int get_max_inline_xattr_value_size(struct inode *inode, struct ext4_iloc *iloc) { struct ext4_xattr_ibody_header *header; struct ext4_xattr_entry *entry; struct ext4_inode *raw_inode; void *end; int free, min_offs; if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) return 0; min_offs = EXT4_SB(inode->i_sb)->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - EXT4_I(inode)->i_extra_isize - sizeof(struct ext4_xattr_ibody_header); /* * We need to subtract another sizeof(__u32) since an in-inode xattr * needs an empty 4 bytes to indicate the gap between the xattr entry * and the name/value pair. */ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) return EXT4_XATTR_SIZE(min_offs - EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) - EXT4_XATTR_ROUND - sizeof(__u32)); raw_inode = ext4_raw_inode(iloc); header = IHDR(inode, raw_inode); entry = IFIRST(header); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; /* Compute min_offs. */ while (!IS_LAST_ENTRY(entry)) { void *next = EXT4_XATTR_NEXT(entry); if (next >= end) { EXT4_ERROR_INODE(inode, "corrupt xattr in inline inode"); return 0; } if (!entry->e_value_inum && entry->e_value_size) { size_t offs = le16_to_cpu(entry->e_value_offs); if (offs < min_offs) min_offs = offs; } entry = next; } free = min_offs - ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32); if (EXT4_I(inode)->i_inline_off) { entry = (struct ext4_xattr_entry *) ((void *)raw_inode + EXT4_I(inode)->i_inline_off); free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); goto out; } free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)); if (free > EXT4_XATTR_ROUND) free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND); else free = 0; out: return free; } /* * Get the maximum size we now can store in an inode. * If we can't find the space for a xattr entry, don't use the space * of the extents since we have no space to indicate the inline data. */ int ext4_get_max_inline_size(struct inode *inode) { int error, max_inline_size; struct ext4_iloc iloc; if (EXT4_I(inode)->i_extra_isize == 0) return 0; error = ext4_get_inode_loc(inode, &iloc); if (error) { ext4_error_inode_err(inode, __func__, __LINE__, 0, -error, "can't get inode location %lu", inode->i_ino); return 0; } down_read(&EXT4_I(inode)->xattr_sem); max_inline_size = get_max_inline_xattr_value_size(inode, &iloc); up_read(&EXT4_I(inode)->xattr_sem); brelse(iloc.bh); if (!max_inline_size) return 0; return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE; } /* * this function does not take xattr_sem, which is OK because it is * currently only used in a code path coming form ext4_iget, before * the new inode has been unlocked */ int ext4_find_inline_data_nolock(struct inode *inode) { struct ext4_xattr_ibody_find is = { .s = { .not_found = -ENODATA, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, }; int error; if (EXT4_I(inode)->i_extra_isize == 0) return 0; error = ext4_get_inode_loc(inode, &is.iloc); if (error) return error; error = ext4_xattr_ibody_find(inode, &i, &is); if (error) goto out; if (!is.s.not_found) { if (is.s.here->e_value_inum) { EXT4_ERROR_INODE(inode, "inline data xattr refers " "to an external xattr inode"); error = -EFSCORRUPTED; goto out; } EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + le32_to_cpu(is.s.here->e_value_size); } out: brelse(is.iloc.bh); return error; } static int ext4_read_inline_data(struct inode *inode, void *buffer, unsigned int len, struct ext4_iloc *iloc) { struct ext4_xattr_entry *entry; struct ext4_xattr_ibody_header *header; int cp_len = 0; struct ext4_inode *raw_inode; if (!len) return 0; BUG_ON(len > EXT4_I(inode)->i_inline_size); cp_len = min_t(unsigned int, len, EXT4_MIN_INLINE_DATA_SIZE); raw_inode = ext4_raw_inode(iloc); memcpy(buffer, (void *)(raw_inode->i_block), cp_len); len -= cp_len; buffer += cp_len; if (!len) goto out; header = IHDR(inode, raw_inode); entry = (struct ext4_xattr_entry *)((void *)raw_inode + EXT4_I(inode)->i_inline_off); len = min_t(unsigned int, len, (unsigned int)le32_to_cpu(entry->e_value_size)); memcpy(buffer, (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len); cp_len += len; out: return cp_len; } /* * write the buffer to the inline inode. * If 'create' is set, we don't need to do the extra copy in the xattr * value since it is already handled by ext4_xattr_ibody_set. * That saves us one memcpy. */ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, void *buffer, loff_t pos, unsigned int len) { struct ext4_xattr_entry *entry; struct ext4_xattr_ibody_header *header; struct ext4_inode *raw_inode; int cp_len = 0; if (unlikely(ext4_emergency_state(inode->i_sb))) return; BUG_ON(!EXT4_I(inode)->i_inline_off); BUG_ON(pos + len > EXT4_I(inode)->i_inline_size); raw_inode = ext4_raw_inode(iloc); buffer += pos; if (pos < EXT4_MIN_INLINE_DATA_SIZE) { cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ? EXT4_MIN_INLINE_DATA_SIZE - pos : len; memcpy((void *)raw_inode->i_block + pos, buffer, cp_len); len -= cp_len; buffer += cp_len; pos += cp_len; } if (!len) return; pos -= EXT4_MIN_INLINE_DATA_SIZE; header = IHDR(inode, raw_inode); entry = (struct ext4_xattr_entry *)((void *)raw_inode + EXT4_I(inode)->i_inline_off); memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos, buffer, len); } static int ext4_create_inline_data(handle_t *handle, struct inode *inode, unsigned len) { int error; void *value = NULL; struct ext4_xattr_ibody_find is = { .s = { .not_found = -ENODATA, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, }; error = ext4_get_inode_loc(inode, &is.iloc); if (error) return error; BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, EXT4_JTR_NONE); if (error) goto out; if (len > EXT4_MIN_INLINE_DATA_SIZE) { value = EXT4_ZERO_XATTR_VALUE; len -= EXT4_MIN_INLINE_DATA_SIZE; } else { value = ""; len = 0; } /* Insert the xttr entry. */ i.value = value; i.value_len = len; error = ext4_xattr_ibody_find(inode, &i, &is); if (error) goto out; if (!is.s.not_found) { EXT4_ERROR_INODE(inode, "unexpected inline data xattr"); error = -EFSCORRUPTED; goto out; } error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) { if (error == -ENOSPC) ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); goto out; } memset((void *)ext4_raw_inode(&is.iloc)->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE); EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE; ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); get_bh(is.iloc.bh); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); out: brelse(is.iloc.bh); return error; } static int ext4_update_inline_data(handle_t *handle, struct inode *inode, unsigned int len) { int error; void *value = NULL; struct ext4_xattr_ibody_find is = { .s = { .not_found = -ENODATA, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, }; /* If the old space is ok, write the data directly. */ if (len <= EXT4_I(inode)->i_inline_size) return 0; error = ext4_get_inode_loc(inode, &is.iloc); if (error) return error; error = ext4_xattr_ibody_find(inode, &i, &is); if (error) goto out; if (is.s.not_found) { EXT4_ERROR_INODE(inode, "missing inline data xattr"); error = -EFSCORRUPTED; goto out; } len -= EXT4_MIN_INLINE_DATA_SIZE; value = kzalloc(len, GFP_NOFS); if (!value) { error = -ENOMEM; goto out; } error = ext4_xattr_ibody_get(inode, i.name_index, i.name, value, len); if (error < 0) goto out; BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, EXT4_JTR_NONE); if (error) goto out; /* Update the xattr entry. */ i.value = value; i.value_len = len; error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) goto out; EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + le32_to_cpu(is.s.here->e_value_size); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); get_bh(is.iloc.bh); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); out: kfree(value); brelse(is.iloc.bh); return error; } static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, loff_t len) { int ret, size, no_expand; struct ext4_inode_info *ei = EXT4_I(inode); if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) return -ENOSPC; size = ext4_get_max_inline_size(inode); if (size < len) return -ENOSPC; ext4_write_lock_xattr(inode, &no_expand); if (ei->i_inline_off) ret = ext4_update_inline_data(handle, inode, len); else ret = ext4_create_inline_data(handle, inode, len); ext4_write_unlock_xattr(inode, &no_expand); return ret; } static int ext4_destroy_inline_data_nolock(handle_t *handle, struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_xattr_ibody_find is = { .s = { .not_found = 0, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, .value = NULL, .value_len = 0, }; int error; if (!ei->i_inline_off) return 0; error = ext4_get_inode_loc(inode, &is.iloc); if (error) return error; error = ext4_xattr_ibody_find(inode, &i, &is); if (error) goto out; BUFFER_TRACE(is.iloc.bh, "get_write_access"); error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, EXT4_JTR_NONE); if (error) goto out; error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) goto out; memset((void *)ext4_raw_inode(&is.iloc)->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE); memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); if (ext4_has_feature_extents(inode->i_sb)) { if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); ext4_ext_tree_init(handle, inode); } } ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA); get_bh(is.iloc.bh); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); EXT4_I(inode)->i_inline_off = 0; EXT4_I(inode)->i_inline_size = 0; ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); out: brelse(is.iloc.bh); if (error == -ENODATA) error = 0; return error; } static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) { void *kaddr; int ret = 0; size_t len; struct ext4_iloc iloc; BUG_ON(!folio_test_locked(folio)); BUG_ON(!ext4_has_inline_data(inode)); BUG_ON(folio->index); if (!EXT4_I(inode)->i_inline_off) { ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.", inode->i_ino); goto out; } ret = ext4_get_inode_loc(inode, &iloc); if (ret) goto out; len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); BUG_ON(len > PAGE_SIZE); kaddr = kmap_local_folio(folio, 0); ret = ext4_read_inline_data(inode, kaddr, len, &iloc); kaddr = folio_zero_tail(folio, len, kaddr + len); kunmap_local(kaddr); folio_mark_uptodate(folio); brelse(iloc.bh); out: return ret; } int ext4_readpage_inline(struct inode *inode, struct folio *folio) { int ret = 0; down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { up_read(&EXT4_I(inode)->xattr_sem); return -EAGAIN; } /* * Current inline data can only exist in the 1st page, * So for all the other pages, just set them uptodate. */ if (!folio->index) ret = ext4_read_inline_folio(inode, folio); else if (!folio_test_uptodate(folio)) { folio_zero_segment(folio, 0, folio_size(folio)); folio_mark_uptodate(folio); } up_read(&EXT4_I(inode)->xattr_sem); folio_unlock(folio); return ret >= 0 ? 0 : ret; } static int ext4_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode) { int ret, needed_blocks, no_expand; handle_t *handle = NULL; int retries = 0, sem_held = 0; struct folio *folio = NULL; unsigned from, to; struct ext4_iloc iloc; if (!ext4_has_inline_data(inode)) { /* * clear the flag so that no new write * will trap here again. */ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); return 0; } needed_blocks = ext4_chunk_trans_extent(inode, 1); ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; retry: handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); handle = NULL; goto out; } /* We cannot recurse into the filesystem as the transaction is already * started */ folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) { ret = PTR_ERR(folio); goto out_nofolio; } ext4_write_lock_xattr(inode, &no_expand); sem_held = 1; /* If some one has already done this for us, just exit. */ if (!ext4_has_inline_data(inode)) { ret = 0; goto out; } from = 0; to = ext4_get_inline_size(inode); if (!folio_test_uptodate(folio)) { ret = ext4_read_inline_folio(inode, folio); if (ret < 0) goto out; } ext4_fc_track_inode(handle, inode); ret = ext4_destroy_inline_data_nolock(handle, inode); if (ret) goto out; if (ext4_should_dioread_nolock(inode)) { ret = ext4_block_write_begin(handle, folio, from, to, ext4_get_block_unwritten); } else ret = ext4_block_write_begin(handle, folio, from, to, ext4_get_block); clear_buffer_new(folio_buffers(folio)); if (!ret && ext4_should_journal_data(inode)) { ret = ext4_walk_page_buffers(handle, inode, folio_buffers(folio), from, to, NULL, do_journal_get_write_access); } if (ret) { folio_unlock(folio); folio_put(folio); folio = NULL; ext4_orphan_add(handle, inode); ext4_write_unlock_xattr(inode, &no_expand); sem_held = 0; ext4_journal_stop(handle); handle = NULL; ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might * still be on the orphan list; we need to * make sure the inode is removed from the * orphan list in that case. */ if (inode->i_nlink) ext4_orphan_del(NULL, inode); } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; if (folio) block_commit_write(folio, from, to); out: if (folio) { folio_unlock(folio); folio_put(folio); } out_nofolio: if (sem_held) ext4_write_unlock_xattr(inode, &no_expand); if (handle) ext4_journal_stop(handle); brelse(iloc.bh); return ret; } /* * Prepare the write for the inline data. * If the data can be written into the inode, we just read * the page and make it uptodate, and start the journal. * Otherwise read the page, makes it dirty so that it can be * handle in writepages(the i_disksize update is left to the * normal ext4_da_write_end). */ int ext4_generic_write_inline_data(struct address_space *mapping, struct inode *inode, loff_t pos, unsigned len, struct folio **foliop, void **fsdata, bool da) { int ret; handle_t *handle; struct folio *folio; struct ext4_iloc iloc; int retries = 0; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; retry_journal: handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out_release_bh; } ret = ext4_prepare_inline_data(handle, inode, pos + len); if (ret && ret != -ENOSPC) goto out_stop_journal; if (ret == -ENOSPC) { ext4_journal_stop(handle); if (!da) { brelse(iloc.bh); /* Retry inside */ return ext4_convert_inline_data_to_extent(mapping, inode); } ret = ext4_da_convert_inline_data_to_extent(mapping, inode, fsdata); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; goto out_release_bh; } folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) { ret = PTR_ERR(folio); goto out_stop_journal; } down_read(&EXT4_I(inode)->xattr_sem); /* Someone else had converted it to extent */ if (!ext4_has_inline_data(inode)) { ret = 0; goto out_release_folio; } if (!folio_test_uptodate(folio)) { ret = ext4_read_inline_folio(inode, folio); if (ret < 0) goto out_release_folio; } ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh, EXT4_JTR_NONE); if (ret) goto out_release_folio; *foliop = folio; up_read(&EXT4_I(inode)->xattr_sem); brelse(iloc.bh); return 1; out_release_folio: up_read(&EXT4_I(inode)->xattr_sem); folio_unlock(folio); folio_put(folio); out_stop_journal: ext4_journal_stop(handle); out_release_bh: brelse(iloc.bh); return ret; } /* * Try to write data in the inode. * If the inode has inline data, check whether the new write can be * in the inode also. If not, create the page the handle, move the data * to the page make it update and let the later codes create extent for it. */ int ext4_try_to_write_inline_data(struct address_space *mapping, struct inode *inode, loff_t pos, unsigned len, struct folio **foliop) { if (pos + len > ext4_get_max_inline_size(inode)) return ext4_convert_inline_data_to_extent(mapping, inode); return ext4_generic_write_inline_data(mapping, inode, pos, len, foliop, NULL, false); } int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct folio *folio) { handle_t *handle = ext4_journal_current_handle(); int no_expand; void *kaddr; struct ext4_iloc iloc; int ret = 0, ret2; if (unlikely(copied < len) && !folio_test_uptodate(folio)) copied = 0; if (likely(copied)) { ret = ext4_get_inode_loc(inode, &iloc); if (ret) { folio_unlock(folio); folio_put(folio); ext4_std_error(inode->i_sb, ret); goto out; } ext4_write_lock_xattr(inode, &no_expand); BUG_ON(!ext4_has_inline_data(inode)); /* * ei->i_inline_off may have changed since * ext4_write_begin() called * ext4_try_to_write_inline_data() */ (void) ext4_find_inline_data_nolock(inode); kaddr = kmap_local_folio(folio, 0); ext4_write_inline_data(inode, &iloc, kaddr, pos, copied); kunmap_local(kaddr); folio_mark_uptodate(folio); /* clear dirty flag so that writepages wouldn't work for us. */ folio_clear_dirty(folio); ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); /* * It's important to update i_size while still holding folio * lock: page writeout could otherwise come in and zero * beyond i_size. */ ext4_update_inode_size(inode, pos + copied); } folio_unlock(folio); folio_put(folio); /* * Don't mark the inode dirty under folio lock. First, it unnecessarily * makes the holding time of folio lock longer. Second, it forces lock * ordering of folio lock and transaction start for journaling * filesystems. */ if (likely(copied)) mark_inode_dirty(inode); out: /* * If we didn't copy as much data as expected, we need to trim back * size of xattr containing inline data. */ if (pos + len > inode->i_size && ext4_can_truncate(inode)) ext4_orphan_add(handle, inode); ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; if (pos + len > inode->i_size) { ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode * is removed from the orphan list in that case. */ if (inode->i_nlink) ext4_orphan_del(NULL, inode); } return ret ? ret : copied; } /* * Try to make the page cache and handle ready for the inline data case. * We can call this function in 2 cases: * 1. The inode is created and the first write exceeds inline size. We can * clear the inode state safely. * 2. The inode has inline data, then we need to read the data, make it * update and dirty so that ext4_da_writepages can handle it. We don't * need to start the journal since the file's metadata isn't changed now. */ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode, void **fsdata) { int ret = 0, inline_size; struct folio *folio; folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) return PTR_ERR(folio); down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); goto out; } inline_size = ext4_get_inline_size(inode); if (!folio_test_uptodate(folio)) { ret = ext4_read_inline_folio(inode, folio); if (ret < 0) goto out; } ret = ext4_block_write_begin(NULL, folio, 0, inline_size, ext4_da_get_block_prep); if (ret) { up_read(&EXT4_I(inode)->xattr_sem); folio_unlock(folio); folio_put(folio); ext4_truncate_failed_write(inode); return ret; } clear_buffer_new(folio_buffers(folio)); folio_mark_dirty(folio); folio_mark_uptodate(folio); ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); *fsdata = (void *)CONVERT_INLINE_DATA; out: up_read(&EXT4_I(inode)->xattr_sem); if (folio) { folio_unlock(folio); folio_put(folio); } return ret; } #ifdef INLINE_DIR_DEBUG void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, void *inline_start, int inline_size) { int offset; unsigned short de_len; struct ext4_dir_entry_2 *de = inline_start; void *dlimit = inline_start + inline_size; trace_printk("inode %lu\n", dir->i_ino); offset = 0; while ((void *)de < dlimit) { de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n", offset, de_len, de->name_len, de->name, de->name_len, le32_to_cpu(de->inode)); if (ext4_check_dir_entry(dir, NULL, de, bh, inline_start, inline_size, offset)) BUG(); offset += de_len; de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); } } #else #define ext4_show_inline_dir(dir, bh, inline_start, inline_size) #endif /* * Add a new entry into a inline dir. * It will return -ENOSPC if no space is available, and -EIO * and -EEXIST if directory entry already exists. */ static int ext4_add_dirent_to_inline(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode, struct ext4_iloc *iloc, void *inline_start, int inline_size) { int err; struct ext4_dir_entry_2 *de; err = ext4_find_dest_de(dir, iloc->bh, inline_start, inline_size, fname, &de); if (err) return err; BUFFER_TRACE(iloc->bh, "get_write_access"); err = ext4_journal_get_write_access(handle, dir->i_sb, iloc->bh, EXT4_JTR_NONE); if (err) return err; ext4_insert_dentry(dir, inode, de, inline_size, fname); ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); /* * XXX shouldn't update any times until successful * completion of syscall, but too many callers depend * on this. * * XXX similarly, too many callers depend on * ext4_new_inode() setting the times, but error * recovery deletes the inode, so the worst that can * happen is that the times are slightly out of date * and/or different from the directory change time. */ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); ext4_update_dx_flag(dir); inode_inc_iversion(dir); return 1; } static void *ext4_get_inline_xattr_pos(struct inode *inode, struct ext4_iloc *iloc) { struct ext4_xattr_entry *entry; struct ext4_xattr_ibody_header *header; BUG_ON(!EXT4_I(inode)->i_inline_off); header = IHDR(inode, ext4_raw_inode(iloc)); entry = (struct ext4_xattr_entry *)((void *)ext4_raw_inode(iloc) + EXT4_I(inode)->i_inline_off); return (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs); } /* Set the final de to cover the whole block. */ void ext4_update_final_de(void *de_buf, int old_size, int new_size) { struct ext4_dir_entry_2 *de, *prev_de; void *limit; int de_len; de = de_buf; if (old_size) { limit = de_buf + old_size; do { prev_de = de; de_len = ext4_rec_len_from_disk(de->rec_len, old_size); de_buf += de_len; de = de_buf; } while (de_buf < limit); prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size - old_size, new_size); } else { /* this is just created, so create an empty entry. */ de->inode = 0; de->rec_len = ext4_rec_len_to_disk(new_size, new_size); } } static int ext4_update_inline_dir(handle_t *handle, struct inode *dir, struct ext4_iloc *iloc) { int ret; int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; int new_size = get_max_inline_xattr_value_size(dir, iloc); if (new_size - old_size <= ext4_dir_rec_len(1, NULL)) return -ENOSPC; ret = ext4_update_inline_data(handle, dir, new_size + EXT4_MIN_INLINE_DATA_SIZE); if (ret) return ret; ext4_update_final_de(ext4_get_inline_xattr_pos(dir, iloc), old_size, EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE); dir->i_size = EXT4_I(dir)->i_disksize = EXT4_I(dir)->i_inline_size; return 0; } static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc, void *buf, int inline_size) { int ret; ret = ext4_create_inline_data(handle, inode, inline_size); if (ret) { ext4_msg(inode->i_sb, KERN_EMERG, "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", inode->i_ino, ret); return; } ext4_write_inline_data(inode, iloc, buf, 0, inline_size); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } static int ext4_convert_inline_data_nolock(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc) { int error; void *buf = NULL; struct buffer_head *data_bh = NULL; struct ext4_map_blocks map; int inline_size; inline_size = ext4_get_inline_size(inode); buf = kmalloc(inline_size, GFP_NOFS); if (!buf) { error = -ENOMEM; goto out; } error = ext4_read_inline_data(inode, buf, inline_size, iloc); if (error < 0) goto out; /* * Make sure the inline directory entries pass checks before we try to * convert them, so that we avoid touching stuff that needs fsck. */ if (S_ISDIR(inode->i_mode)) { error = ext4_check_all_de(inode, iloc->bh, buf + EXT4_INLINE_DOTDOT_SIZE, inline_size - EXT4_INLINE_DOTDOT_SIZE); if (error) goto out; } error = ext4_destroy_inline_data_nolock(handle, inode); if (error) goto out; map.m_lblk = 0; map.m_len = 1; map.m_flags = 0; error = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE); if (error < 0) goto out_restore; if (!(map.m_flags & EXT4_MAP_MAPPED)) { error = -EIO; goto out_restore; } data_bh = sb_getblk(inode->i_sb, map.m_pblk); if (!data_bh) { error = -ENOMEM; goto out_restore; } lock_buffer(data_bh); error = ext4_journal_get_create_access(handle, inode->i_sb, data_bh, EXT4_JTR_NONE); if (error) { unlock_buffer(data_bh); error = -EIO; goto out_restore; } memset(data_bh->b_data, 0, inode->i_sb->s_blocksize); if (!S_ISDIR(inode->i_mode)) { memcpy(data_bh->b_data, buf, inline_size); set_buffer_uptodate(data_bh); unlock_buffer(data_bh); error = ext4_handle_dirty_metadata(handle, inode, data_bh); } else { unlock_buffer(data_bh); inode->i_size = inode->i_sb->s_blocksize; i_size_write(inode, inode->i_sb->s_blocksize); EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; error = ext4_init_dirblock(handle, inode, data_bh, le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), buf + EXT4_INLINE_DOTDOT_SIZE, inline_size - EXT4_INLINE_DOTDOT_SIZE); if (!error) error = ext4_mark_inode_dirty(handle, inode); } out_restore: if (error) ext4_restore_inline_data(handle, inode, iloc, buf, inline_size); out: brelse(data_bh); kfree(buf); return error; } /* * Try to add the new entry to the inline data. * If succeeds, return 0. If not, extended the inline dir and copied data to * the new created block. */ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode) { int ret, ret2, inline_size, no_expand; void *inline_start; struct ext4_iloc iloc; ret = ext4_get_inode_loc(dir, &iloc); if (ret) return ret; ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) goto out; inline_start = (void *)ext4_raw_inode(&iloc)->i_block + EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, inline_start, inline_size); if (ret != -ENOSPC) goto out; /* check whether it can be inserted to inline xattr space. */ inline_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; if (!inline_size) { /* Try to use the xattr space.*/ ret = ext4_update_inline_dir(handle, dir, &iloc); if (ret && ret != -ENOSPC) goto out; inline_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; } if (inline_size) { inline_start = ext4_get_inline_xattr_pos(dir, &iloc); ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, inline_start, inline_size); if (ret != -ENOSPC) goto out; } /* * The inline space is filled up, so create a new block for it. * As the extent tree will be created, we have to save the inline * dir first. */ ret = ext4_convert_inline_data_nolock(handle, dir, &iloc); out: ext4_write_unlock_xattr(dir, &no_expand); ret2 = ext4_mark_inode_dirty(handle, dir); if (unlikely(ret2 && !ret)) ret = ret2; brelse(iloc.bh); return ret; } /* * This function fills a red-black tree with information from an * inlined dir. It returns the number directory entries loaded * into the tree. If there is an error it is returned in err. */ int ext4_inlinedir_to_tree(struct file *dir_file, struct inode *dir, ext4_lblk_t block, struct dx_hash_info *hinfo, __u32 start_hash, __u32 start_minor_hash, int *has_inline_data) { int err = 0, count = 0; unsigned int parent_ino; int pos; struct ext4_dir_entry_2 *de; struct inode *inode = file_inode(dir_file); int ret, inline_size = 0; struct ext4_iloc iloc; void *dir_buf = NULL; struct ext4_dir_entry_2 fake; struct fscrypt_str tmp_str; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { up_read(&EXT4_I(inode)->xattr_sem); *has_inline_data = 0; goto out; } inline_size = ext4_get_inline_size(inode); dir_buf = kmalloc(inline_size, GFP_NOFS); if (!dir_buf) { ret = -ENOMEM; up_read(&EXT4_I(inode)->xattr_sem); goto out; } ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); up_read(&EXT4_I(inode)->xattr_sem); if (ret < 0) goto out; pos = 0; parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); while (pos < inline_size) { /* * As inlined dir doesn't store any information about '.' and * only the inode number of '..' is stored, we have to handle * them differently. */ if (pos == 0) { fake.inode = cpu_to_le32(inode->i_ino); fake.name_len = 1; memcpy(fake.name, ".", 2); fake.rec_len = ext4_rec_len_to_disk( ext4_dir_rec_len(fake.name_len, NULL), inline_size); ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); de = &fake; pos = EXT4_INLINE_DOTDOT_OFFSET; } else if (pos == EXT4_INLINE_DOTDOT_OFFSET) { fake.inode = cpu_to_le32(parent_ino); fake.name_len = 2; memcpy(fake.name, "..", 3); fake.rec_len = ext4_rec_len_to_disk( ext4_dir_rec_len(fake.name_len, NULL), inline_size); ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); de = &fake; pos = EXT4_INLINE_DOTDOT_SIZE; } else { de = (struct ext4_dir_entry_2 *)(dir_buf + pos); pos += ext4_rec_len_from_disk(de->rec_len, inline_size); if (ext4_check_dir_entry(inode, dir_file, de, iloc.bh, dir_buf, inline_size, pos)) { ret = count; goto out; } } if (ext4_hash_in_dirent(dir)) { hinfo->hash = EXT4_DIRENT_HASH(de); hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de); } else { err = ext4fs_dirhash(dir, de->name, de->name_len, hinfo); if (err) { ret = err; goto out; } } if ((hinfo->hash < start_hash) || ((hinfo->hash == start_hash) && (hinfo->minor_hash < start_minor_hash))) continue; if (de->inode == 0) continue; tmp_str.name = de->name; tmp_str.len = de->name_len; err = ext4_htree_store_dirent(dir_file, hinfo->hash, hinfo->minor_hash, de, &tmp_str); if (err) { ret = err; goto out; } count++; } ret = count; out: kfree(dir_buf); brelse(iloc.bh); return ret; } /* * So this function is called when the volume is mkfsed with * dir_index disabled. In order to keep f_pos persistent * after we convert from an inlined dir to a blocked based, * we just pretend that we are a normal dir and return the * offset as if '.' and '..' really take place. * */ int ext4_read_inline_dir(struct file *file, struct dir_context *ctx, int *has_inline_data) { unsigned int offset, parent_ino; int i; struct ext4_dir_entry_2 *de; struct super_block *sb; struct inode *inode = file_inode(file); int ret, inline_size = 0; struct ext4_iloc iloc; void *dir_buf = NULL; int dotdot_offset, dotdot_size, extra_offset, extra_size; struct dir_private_info *info = file->private_data; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) { up_read(&EXT4_I(inode)->xattr_sem); *has_inline_data = 0; goto out; } inline_size = ext4_get_inline_size(inode); dir_buf = kmalloc(inline_size, GFP_NOFS); if (!dir_buf) { ret = -ENOMEM; up_read(&EXT4_I(inode)->xattr_sem); goto out; } ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); up_read(&EXT4_I(inode)->xattr_sem); if (ret < 0) goto out; ret = 0; sb = inode->i_sb; parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); offset = ctx->pos; /* * dotdot_offset and dotdot_size is the real offset and * size for ".." and "." if the dir is block based while * the real size for them are only EXT4_INLINE_DOTDOT_SIZE. * So we will use extra_offset and extra_size to indicate them * during the inline dir iteration. */ dotdot_offset = ext4_dir_rec_len(1, NULL); dotdot_size = dotdot_offset + ext4_dir_rec_len(2, NULL); extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; extra_size = extra_offset + inline_size; /* * If the cookie has changed since the last call to * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the inline * dir to make sure. */ if (!inode_eq_iversion(inode, info->cookie)) { for (i = 0; i < extra_size && i < offset;) { /* * "." is with offset 0 and * ".." is dotdot_offset. */ if (!i) { i = dotdot_offset; continue; } else if (i == dotdot_offset) { i = dotdot_size; continue; } /* for other entry, the real offset in * the buf has to be tuned accordingly. */ de = (struct ext4_dir_entry_2 *) (dir_buf + i - extra_offset); /* It's too expensive to do a full * dirent test each time round this * loop, but we do have to test at * least that it is non-zero. A * failure will be detected in the * dirent test below. */ if (ext4_rec_len_from_disk(de->rec_len, extra_size) < ext4_dir_rec_len(1, NULL)) break; i += ext4_rec_len_from_disk(de->rec_len, extra_size); } offset = i; ctx->pos = offset; info->cookie = inode_query_iversion(inode); } while (ctx->pos < extra_size) { if (ctx->pos == 0) { if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) goto out; ctx->pos = dotdot_offset; continue; } if (ctx->pos == dotdot_offset) { if (!dir_emit(ctx, "..", 2, parent_ino, DT_DIR)) goto out; ctx->pos = dotdot_size; continue; } de = (struct ext4_dir_entry_2 *) (dir_buf + ctx->pos - extra_offset); if (ext4_check_dir_entry(inode, file, de, iloc.bh, dir_buf, extra_size, ctx->pos)) goto out; if (le32_to_cpu(de->inode)) { if (!dir_emit(ctx, de->name, de->name_len, le32_to_cpu(de->inode), get_dtype(sb, de->file_type))) goto out; } ctx->pos += ext4_rec_len_from_disk(de->rec_len, extra_size); } out: kfree(dir_buf); brelse(iloc.bh); return ret; } void *ext4_read_inline_link(struct inode *inode) { struct ext4_iloc iloc; int ret, inline_size; void *link; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ERR_PTR(ret); ret = -ENOMEM; inline_size = ext4_get_inline_size(inode); link = kmalloc(inline_size + 1, GFP_NOFS); if (!link) goto out; ret = ext4_read_inline_data(inode, link, inline_size, &iloc); if (ret < 0) { kfree(link); goto out; } nd_terminate_link(link, inode->i_size, ret); out: if (ret < 0) link = ERR_PTR(ret); brelse(iloc.bh); return link; } struct buffer_head *ext4_get_first_inline_block(struct inode *inode, struct ext4_dir_entry_2 **parent_de, int *retval) { struct ext4_iloc iloc; *retval = ext4_get_inode_loc(inode, &iloc); if (*retval) return NULL; *parent_de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; return iloc.bh; } /* * Try to create the inline data for the new dir. * If it succeeds, return 0, otherwise return the error. * In case of ENOSPC, the caller should create the normal disk layout dir. */ int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, struct inode *inode) { int ret, inline_size = EXT4_MIN_INLINE_DATA_SIZE; struct ext4_iloc iloc; struct ext4_dir_entry_2 *de; ret = ext4_get_inode_loc(inode, &iloc); if (ret) return ret; ret = ext4_prepare_inline_data(handle, inode, inline_size); if (ret) goto out; /* * For inline dir, we only save the inode information for the ".." * and create a fake dentry to cover the left space. */ de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; de->inode = cpu_to_le32(parent->i_ino); de = (struct ext4_dir_entry_2 *)((void *)de + EXT4_INLINE_DOTDOT_SIZE); de->inode = 0; de->rec_len = ext4_rec_len_to_disk( inline_size - EXT4_INLINE_DOTDOT_SIZE, inline_size); set_nlink(inode, 2); inode->i_size = EXT4_I(inode)->i_disksize = inline_size; out: brelse(iloc.bh); return ret; } struct buffer_head *ext4_find_inline_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir, int *has_inline_data) { struct ext4_xattr_ibody_find is = { .s = { .not_found = -ENODATA, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, }; int ret; void *inline_start; int inline_size; ret = ext4_get_inode_loc(dir, &is.iloc); if (ret) return ERR_PTR(ret); down_read(&EXT4_I(dir)->xattr_sem); ret = ext4_xattr_ibody_find(dir, &i, &is); if (ret) goto out; if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; } inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block + EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, dir, fname, 0, res_dir); if (ret == 1) goto out_find; if (ret < 0) goto out; if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE) goto out; inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc); inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, dir, fname, 0, res_dir); if (ret == 1) goto out_find; out: brelse(is.iloc.bh); if (ret < 0) is.iloc.bh = ERR_PTR(ret); else is.iloc.bh = NULL; out_find: up_read(&EXT4_I(dir)->xattr_sem); return is.iloc.bh; } int ext4_delete_inline_entry(handle_t *handle, struct inode *dir, struct ext4_dir_entry_2 *de_del, struct buffer_head *bh, int *has_inline_data) { int err, inline_size, no_expand; struct ext4_iloc iloc; void *inline_start; err = ext4_get_inode_loc(dir, &iloc); if (err) return err; ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; } if ((void *)de_del - ((void *)ext4_raw_inode(&iloc)->i_block) < EXT4_MIN_INLINE_DATA_SIZE) { inline_start = (void *)ext4_raw_inode(&iloc)->i_block + EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; } else { inline_start = ext4_get_inline_xattr_pos(dir, &iloc); inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; } BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, dir->i_sb, bh, EXT4_JTR_NONE); if (err) goto out; err = ext4_generic_delete_entry(dir, de_del, bh, inline_start, inline_size, 0); if (err) goto out; ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); out: ext4_write_unlock_xattr(dir, &no_expand); if (likely(err == 0)) err = ext4_mark_inode_dirty(handle, dir); brelse(iloc.bh); if (err != -ENOENT) ext4_std_error(dir->i_sb, err); return err; } /* * Get the inline dentry at offset. */ static inline struct ext4_dir_entry_2 * ext4_get_inline_entry(struct inode *inode, struct ext4_iloc *iloc, unsigned int offset, void **inline_start, int *inline_size) { void *inline_pos; BUG_ON(offset > ext4_get_inline_size(inode)); if (offset < EXT4_MIN_INLINE_DATA_SIZE) { inline_pos = (void *)ext4_raw_inode(iloc)->i_block; *inline_size = EXT4_MIN_INLINE_DATA_SIZE; } else { inline_pos = ext4_get_inline_xattr_pos(inode, iloc); offset -= EXT4_MIN_INLINE_DATA_SIZE; *inline_size = ext4_get_inline_size(inode) - EXT4_MIN_INLINE_DATA_SIZE; } if (inline_start) *inline_start = inline_pos; return (struct ext4_dir_entry_2 *)(inline_pos + offset); } bool empty_inline_dir(struct inode *dir, int *has_inline_data) { int err, inline_size; struct ext4_iloc iloc; size_t inline_len; void *inline_pos; unsigned int offset; struct ext4_dir_entry_2 *de; bool ret = false; err = ext4_get_inode_loc(dir, &iloc); if (err) { EXT4_ERROR_INODE_ERR(dir, -err, "error %d getting inode %lu block", err, dir->i_ino); return false; } down_read(&EXT4_I(dir)->xattr_sem); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; ret = true; goto out; } de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; if (!le32_to_cpu(de->inode)) { ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - no `..'", dir->i_ino); goto out; } inline_len = ext4_get_inline_size(dir); offset = EXT4_INLINE_DOTDOT_SIZE; while (offset < inline_len) { de = ext4_get_inline_entry(dir, &iloc, offset, &inline_pos, &inline_size); if (ext4_check_dir_entry(dir, NULL, de, iloc.bh, inline_pos, inline_size, offset)) { ext4_warning(dir->i_sb, "bad inline directory (dir #%lu) - " "inode %u, rec_len %u, name_len %d" "inline size %d", dir->i_ino, le32_to_cpu(de->inode), le16_to_cpu(de->rec_len), de->name_len, inline_size); goto out; } if (le32_to_cpu(de->inode)) { goto out; } offset += ext4_rec_len_from_disk(de->rec_len, inline_size); } ret = true; out: up_read(&EXT4_I(dir)->xattr_sem); brelse(iloc.bh); return ret; } int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) { int ret, no_expand; ext4_write_lock_xattr(inode, &no_expand); ret = ext4_destroy_inline_data_nolock(handle, inode); ext4_write_unlock_xattr(inode, &no_expand); return ret; } int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap) { __u64 addr; int error = -EAGAIN; struct ext4_iloc iloc; down_read(&EXT4_I(inode)->xattr_sem); if (!ext4_has_inline_data(inode)) goto out; error = ext4_get_inode_loc(inode, &iloc); if (error) goto out; addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; addr += offsetof(struct ext4_inode, i_block); brelse(iloc.bh); iomap->addr = addr; iomap->offset = 0; iomap->length = min_t(loff_t, ext4_get_inline_size(inode), i_size_read(inode)); iomap->type = IOMAP_INLINE; iomap->flags = 0; out: up_read(&EXT4_I(inode)->xattr_sem); return error; } int ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; int inline_size, value_len, needed_blocks, no_expand, err = 0; size_t i_size; void *value = NULL; struct ext4_xattr_ibody_find is = { .s = { .not_found = -ENODATA, }, }; struct ext4_xattr_info i = { .name_index = EXT4_XATTR_INDEX_SYSTEM, .name = EXT4_XATTR_SYSTEM_DATA, }; needed_blocks = ext4_chunk_trans_extent(inode, 1); handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks); if (IS_ERR(handle)) return PTR_ERR(handle); ext4_write_lock_xattr(inode, &no_expand); if (!ext4_has_inline_data(inode)) { ext4_write_unlock_xattr(inode, &no_expand); *has_inline = 0; ext4_journal_stop(handle); return 0; } if ((err = ext4_orphan_add(handle, inode)) != 0) goto out; if ((err = ext4_get_inode_loc(inode, &is.iloc)) != 0) goto out; down_write(&EXT4_I(inode)->i_data_sem); i_size = inode->i_size; inline_size = ext4_get_inline_size(inode); EXT4_I(inode)->i_disksize = i_size; if (i_size < inline_size) { /* * if there's inline data to truncate and this file was * converted to extents after that inline data was written, * the extent status cache must be cleared to avoid leaving * behind stale delayed allocated extent entries */ if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); /* Clear the content in the xattr space. */ if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) { if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0) goto out_error; if (is.s.not_found) { EXT4_ERROR_INODE(inode, "missing inline data xattr"); err = -EFSCORRUPTED; goto out_error; } value_len = le32_to_cpu(is.s.here->e_value_size); value = kmalloc(value_len, GFP_NOFS); if (!value) { err = -ENOMEM; goto out_error; } err = ext4_xattr_ibody_get(inode, i.name_index, i.name, value, value_len); if (err <= 0) goto out_error; i.value = value; i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ? i_size - EXT4_MIN_INLINE_DATA_SIZE : 0; err = ext4_xattr_ibody_set(handle, inode, &i, &is); if (err) goto out_error; } /* Clear the content within i_blocks. */ if (i_size < EXT4_MIN_INLINE_DATA_SIZE) { void *p = (void *) ext4_raw_inode(&is.iloc)->i_block; memset(p + i_size, 0, EXT4_MIN_INLINE_DATA_SIZE - i_size); } EXT4_I(inode)->i_inline_size = i_size < EXT4_MIN_INLINE_DATA_SIZE ? EXT4_MIN_INLINE_DATA_SIZE : i_size; } out_error: up_write(&EXT4_I(inode)->i_data_sem); out: brelse(is.iloc.bh); ext4_write_unlock_xattr(inode, &no_expand); kfree(value); if (inode->i_nlink) ext4_orphan_del(handle, inode); if (err == 0) { inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); err = ext4_mark_inode_dirty(handle, inode); if (IS_SYNC(inode)) ext4_handle_sync(handle); } ext4_journal_stop(handle); return err; } int ext4_convert_inline_data(struct inode *inode) { int error, needed_blocks, no_expand; handle_t *handle; struct ext4_iloc iloc; if (!ext4_has_inline_data(inode)) { ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); return 0; } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { /* * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is * cleared. This means we are in the middle of moving of * inline data to delay allocated block. Just force writeout * here to finish conversion. */ error = filemap_flush(inode->i_mapping); if (error) return error; if (!ext4_has_inline_data(inode)) return 0; } needed_blocks = ext4_chunk_trans_extent(inode, 1); iloc.bh = NULL; error = ext4_get_inode_loc(inode, &iloc); if (error) return error; handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); if (IS_ERR(handle)) { error = PTR_ERR(handle); goto out_free; } ext4_write_lock_xattr(inode, &no_expand); if (ext4_has_inline_data(inode)) error = ext4_convert_inline_data_nolock(handle, inode, &iloc); ext4_write_unlock_xattr(inode, &no_expand); ext4_journal_stop(handle); out_free: brelse(iloc.bh); return error; }
65 2 24 51 49 2 266 266 28 27 7 28 25 18 18 48 48 50 6 33 8 48 27 5 1 33 32 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_IP_TUNNELS_H #define __NET_IP_TUNNELS_H 1 #include <linux/if_tunnel.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/socket.h> #include <linux/types.h> #include <linux/u64_stats_sync.h> #include <linux/bitops.h> #include <net/dsfield.h> #include <net/flow.h> #include <net/gro_cells.h> #include <net/inet_dscp.h> #include <net/inet_ecn.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/lwtunnel.h> #include <net/dst_cache.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> #endif /* Keep error state on tunnel for 30 sec */ #define IPTUNNEL_ERR_TIMEO (30*HZ) /* Used to memset ip_tunnel padding. */ #define IP_TUNNEL_KEY_SIZE offsetofend(struct ip_tunnel_key, tp_dst) /* Used to memset ipv4 address padding. */ #define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst) #define IP_TUNNEL_KEY_IPV4_PAD_LEN \ (sizeof_field(struct ip_tunnel_key, u) - \ sizeof_field(struct ip_tunnel_key, u.ipv4)) #define __ipt_flag_op(op, ...) \ op(__VA_ARGS__, __IP_TUNNEL_FLAG_NUM) #define IP_TUNNEL_DECLARE_FLAGS(...) \ __ipt_flag_op(DECLARE_BITMAP, __VA_ARGS__) #define ip_tunnel_flags_zero(...) __ipt_flag_op(bitmap_zero, __VA_ARGS__) #define ip_tunnel_flags_copy(...) __ipt_flag_op(bitmap_copy, __VA_ARGS__) #define ip_tunnel_flags_and(...) __ipt_flag_op(bitmap_and, __VA_ARGS__) #define ip_tunnel_flags_or(...) __ipt_flag_op(bitmap_or, __VA_ARGS__) #define ip_tunnel_flags_empty(...) \ __ipt_flag_op(bitmap_empty, __VA_ARGS__) #define ip_tunnel_flags_intersect(...) \ __ipt_flag_op(bitmap_intersects, __VA_ARGS__) #define ip_tunnel_flags_subset(...) \ __ipt_flag_op(bitmap_subset, __VA_ARGS__) struct ip_tunnel_key { __be64 tun_id; union { struct { __be32 src; __be32 dst; } ipv4; struct { struct in6_addr src; struct in6_addr dst; } ipv6; } u; IP_TUNNEL_DECLARE_FLAGS(tun_flags); __be32 label; /* Flow Label for IPv6 */ u32 nhid; u8 tos; /* TOS for IPv4, TC for IPv6 */ u8 ttl; /* TTL for IPv4, HL for IPv6 */ __be16 tp_src; __be16 tp_dst; __u8 flow_flags; }; struct ip_tunnel_encap { u16 type; u16 flags; __be16 sport; __be16 dport; }; /* Flags for ip_tunnel_info mode. */ #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */ #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */ #define IP_TUNNEL_INFO_BRIDGE 0x04 /* represents a bridged tunnel id */ /* Maximum tunnel options length. */ #define IP_TUNNEL_OPTS_MAX \ GENMASK((sizeof_field(struct ip_tunnel_info, \ options_len) * BITS_PER_BYTE) - 1, 0) #define ip_tunnel_info_opts(info) \ _Generic(info, \ const struct ip_tunnel_info * : ((const void *)(info)->options),\ struct ip_tunnel_info * : ((void *)(info)->options)\ ) struct ip_tunnel_info { struct ip_tunnel_key key; struct ip_tunnel_encap encap; #ifdef CONFIG_DST_CACHE struct dst_cache dst_cache; #endif u8 options_len; u8 mode; u8 options[] __aligned_largest __counted_by(options_len); }; /* 6rd prefix/relay information */ #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd_parm { struct in6_addr prefix; __be32 relay_prefix; u16 prefixlen; u16 relay_prefixlen; }; #endif struct ip_tunnel_prl_entry { struct ip_tunnel_prl_entry __rcu *next; __be32 addr; u16 flags; struct rcu_head rcu_head; }; struct metadata_dst; /* Kernel-side variant of ip_tunnel_parm */ struct ip_tunnel_parm_kern { char name[IFNAMSIZ]; IP_TUNNEL_DECLARE_FLAGS(i_flags); IP_TUNNEL_DECLARE_FLAGS(o_flags); __be32 i_key; __be32 o_key; int link; struct iphdr iph; }; struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; struct net_device *dev; netdevice_tracker dev_tracker; struct net *net; /* netns for packet i/o */ unsigned long err_time; /* Time when the last ICMP error * arrived */ int err_count; /* Number of arrived ICMP errors */ /* These four fields used only by GRE */ u32 i_seqno; /* The last seen seqno */ atomic_t o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ /* These four fields used only by ERSPAN */ u32 index; /* ERSPAN type II index */ u8 erspan_ver; /* ERSPAN version */ u8 dir; /* ERSPAN direction */ u16 hwid; /* ERSPAN hardware ID */ struct dst_cache dst_cache; struct ip_tunnel_parm_kern parms; int mlink; int encap_hlen; /* Encap header length (FOU,GUE) */ int hlen; /* tun_hlen + encap_hlen */ struct ip_tunnel_encap encap; /* for SIT */ #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd_parm ip6rd; #endif struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ unsigned int prl_count; /* # of entries in PRL */ unsigned int ip_tnl_net_id; struct gro_cells gro_cells; __u32 fwmark; bool collect_md; bool ignore_df; }; struct tnl_ptk_info { IP_TUNNEL_DECLARE_FLAGS(flags); __be16 proto; __be32 key; __be32 seq; int hdr_len; }; #define PACKET_RCVD 0 #define PACKET_REJECT 1 #define PACKET_NEXT 2 #define IP_TNL_HASH_BITS 7 #define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS) struct ip_tunnel_net { struct net_device *fb_tunnel_dev; struct rtnl_link_ops *rtnl_link_ops; struct hlist_head tunnels[IP_TNL_HASH_SIZE]; struct ip_tunnel __rcu *collect_md_tun; int type; }; static inline void ip_tunnel_set_options_present(unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(present) = { }; __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); ip_tunnel_flags_or(flags, flags, present); } static inline void ip_tunnel_clear_options_present(unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(present) = { }; __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); __ipt_flag_op(bitmap_andnot, flags, flags, present); } static inline bool ip_tunnel_is_options_present(const unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(present) = { }; __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); return ip_tunnel_flags_intersect(flags, present); } static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(supp) = { }; bitmap_set(supp, 0, BITS_PER_TYPE(__be16)); __set_bit(IP_TUNNEL_VTI_BIT, supp); return ip_tunnel_flags_subset(flags, supp); } static inline void ip_tunnel_flags_from_be16(unsigned long *dst, __be16 flags) { ip_tunnel_flags_zero(dst); bitmap_write(dst, be16_to_cpu(flags), 0, BITS_PER_TYPE(__be16)); __assign_bit(IP_TUNNEL_VTI_BIT, dst, flags & VTI_ISVTI); } static inline __be16 ip_tunnel_flags_to_be16(const unsigned long *flags) { __be16 ret; ret = cpu_to_be16(bitmap_read(flags, 0, BITS_PER_TYPE(__be16))); if (test_bit(IP_TUNNEL_VTI_BIT, flags)) ret |= VTI_ISVTI; return ret; } static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, u8 tos, u8 ttl, __be32 label, __be16 tp_src, __be16 tp_dst, __be64 tun_id, const unsigned long *tun_flags) { key->tun_id = tun_id; key->u.ipv4.src = saddr; key->u.ipv4.dst = daddr; memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD, 0, IP_TUNNEL_KEY_IPV4_PAD_LEN); key->tos = tos; key->ttl = ttl; key->label = label; ip_tunnel_flags_copy(key->tun_flags, tun_flags); /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of * the upper tunnel are used. * E.g: GRE over IPSEC, the tp_src and tp_port are zero. */ key->tp_src = tp_src; key->tp_dst = tp_dst; /* Clear struct padding. */ if (sizeof(*key) != IP_TUNNEL_KEY_SIZE) memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE, 0, sizeof(*key) - IP_TUNNEL_KEY_SIZE); } static inline bool ip_tunnel_dst_cache_usable(const struct sk_buff *skb, const struct ip_tunnel_info *info) { if (skb->mark) return false; return !info || !test_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags); } static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info *tun_info) { return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET; } static inline __be64 key32_to_tunnel_id(__be32 key) { #ifdef __BIG_ENDIAN return (__force __be64)key; #else return (__force __be64)((__force u64)key << 32); #endif } /* Returns the least-significant 32 bits of a __be64. */ static inline __be32 tunnel_id_to_key32(__be64 tun_id) { #ifdef __BIG_ENDIAN return (__force __be32)tun_id; #else return (__force __be32)((__force u64)tun_id >> 32); #endif } #ifdef CONFIG_INET static inline void ip_tunnel_init_flow(struct flowi4 *fl4, int proto, __be32 daddr, __be32 saddr, __be32 key, __u8 tos, struct net *net, int oif, __u32 mark, __u32 tun_inner_hash, __u8 flow_flags) { memset(fl4, 0, sizeof(*fl4)); if (oif) { fl4->flowi4_l3mdev = l3mdev_master_upper_ifindex_by_index(net, oif); /* Legacy VRF/l3mdev use case */ fl4->flowi4_oif = fl4->flowi4_l3mdev ? 0 : oif; } fl4->daddr = daddr; fl4->saddr = saddr; fl4->flowi4_dscp = inet_dsfield_to_dscp(tos); fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; fl4->flowi4_mark = mark; fl4->flowi4_multipath_hash = tun_inner_hash; fl4->flowi4_flags = flow_flags; } int ip_tunnel_init(struct net_device *dev); void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); struct net *ip_tunnel_get_link_net(const struct net_device *dev); int ip_tunnel_get_iflink(const struct net_device *dev); int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); void ip_tunnel_delete_net(struct net *net, unsigned int id, struct rtnl_link_ops *ops, struct list_head *dev_to_kill); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const u8 proto, int tunnel_hlen); int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd); bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp, const void __user *data); bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp); int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, int link, const unsigned long *flags, __be32 remote, __be32 local, __be32 key); void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info); int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm_kern *p, __u32 fwmark); int ip_tunnel_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm_kern *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *encap); void ip_tunnel_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm_kern *parms); extern const struct header_ops ip_tunnel_header_ops; __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); struct ip_tunnel_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4); int (*err_handler)(struct sk_buff *skb, u32 info); }; #define MAX_IPTUN_ENCAP_OPS 8 extern const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS]; int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op, unsigned int num); int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, unsigned int num); int ip_tunnel_encap_setup(struct ip_tunnel *t, struct ip_tunnel_encap *ipencap); static inline enum skb_drop_reason pskb_inet_may_pull_reason(struct sk_buff *skb) { int nhlen; switch (skb->protocol) { #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): nhlen = sizeof(struct ipv6hdr); break; #endif case htons(ETH_P_IP): nhlen = sizeof(struct iphdr); break; default: nhlen = 0; } return pskb_network_may_pull_reason(skb, nhlen); } static inline bool pskb_inet_may_pull(struct sk_buff *skb) { return pskb_inet_may_pull_reason(skb) == SKB_NOT_DROPPED_YET; } /* Variant of pskb_inet_may_pull(). */ static inline enum skb_drop_reason skb_vlan_inet_prepare(struct sk_buff *skb, bool inner_proto_inherit) { int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; __be16 type = skb->protocol; enum skb_drop_reason reason; /* Essentially this is skb_protocol(skb, true) * And we get MAC len. */ if (eth_type_vlan(type)) type = __vlan_get_protocol(skb, type, &maclen); switch (type) { #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): nhlen = sizeof(struct ipv6hdr); break; #endif case htons(ETH_P_IP): nhlen = sizeof(struct iphdr); break; } /* For ETH_P_IPV6/ETH_P_IP we make sure to pull * a base network header in skb->head. */ reason = pskb_may_pull_reason(skb, maclen + nhlen); if (reason) return reason; skb_set_network_header(skb, maclen); return SKB_NOT_DROPPED_YET; } static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; int hlen = -EINVAL; if (e->type == TUNNEL_ENCAP_NONE) return 0; if (e->type >= MAX_IPTUN_ENCAP_OPS) return -EINVAL; rcu_read_lock(); ops = rcu_dereference(iptun_encaps[e->type]); if (likely(ops && ops->encap_hlen)) hlen = ops->encap_hlen(e); rcu_read_unlock(); return hlen; } static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel_encap *e, u8 *protocol, struct flowi4 *fl4) { const struct ip_tunnel_encap_ops *ops; int ret = -EINVAL; if (e->type == TUNNEL_ENCAP_NONE) return 0; if (e->type >= MAX_IPTUN_ENCAP_OPS) return -EINVAL; rcu_read_lock(); ops = rcu_dereference(iptun_encaps[e->type]); if (likely(ops && ops->build_header)) ret = ops->build_header(skb, e, protocol, fl4); rcu_read_unlock(); return ret; } /* Extract dsfield from inner protocol */ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, const struct sk_buff *skb) { __be16 payload_protocol = skb_protocol(skb, true); if (payload_protocol == htons(ETH_P_IP)) return iph->tos; else if (payload_protocol == htons(ETH_P_IPV6)) return ipv6_get_dsfield((const struct ipv6hdr *)iph); else return 0; } static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph, const struct sk_buff *skb) { __be16 payload_protocol = skb_protocol(skb, true); if (payload_protocol == htons(ETH_P_IPV6)) return ip6_flowlabel((const struct ipv6hdr *)iph); else return 0; } static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, const struct sk_buff *skb) { __be16 payload_protocol = skb_protocol(skb, true); if (payload_protocol == htons(ETH_P_IP)) return iph->ttl; else if (payload_protocol == htons(ETH_P_IPV6)) return ((const struct ipv6hdr *)iph)->hop_limit; else return 0; } /* Propagate ECN bits out */ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, const struct sk_buff *skb) { u8 inner = ip_tunnel_get_dsfield(iph, skb); return INET_ECN_encapsulate(tos, inner); } int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, bool raw_proto, bool xnet); static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, bool xnet) { return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet); } void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, u8 proto, u8 tos, u8 ttl, __be16 df, bool xnet, u16 ipcb_flags); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags); int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, int headroom, bool reply); int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); static inline int iptunnel_pull_offloads(struct sk_buff *skb) { if (skb_is_gso(skb)) { int err; err = skb_unclone(skb, GFP_ATOMIC); if (unlikely(err)) return err; skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >> NETIF_F_GSO_SHIFT); } skb->encapsulation = 0; return 0; } static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { if (pkt_len > 0) { struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); u64_stats_add(&tstats->tx_bytes, pkt_len); u64_stats_inc(&tstats->tx_packets); u64_stats_update_end(&tstats->syncp); put_cpu_ptr(tstats); return; } if (pkt_len < 0) { DEV_STATS_INC(dev, tx_errors); DEV_STATS_INC(dev, tx_aborted_errors); } else { DEV_STATS_INC(dev, tx_dropped); } } static inline void ip_tunnel_info_opts_get(void *to, const struct ip_tunnel_info *info) { memcpy(to, ip_tunnel_info_opts(info), info->options_len); } static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, const unsigned long *flags) { info->options_len = len; if (len > 0) { memcpy(ip_tunnel_info_opts(info), from, len); ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags, flags); } } static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) { return (struct ip_tunnel_info *)lwtstate->data; } DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt); /* Returns > 0 if metadata should be collected */ static inline int ip_tunnel_collect_metadata(void) { return static_branch_unlikely(&ip_tunnel_metadata_cnt); } void __init ip_tunnel_core_init(void); void ip_tunnel_need_metadata(void); void ip_tunnel_unneed_metadata(void); #else /* CONFIG_INET */ static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) { return NULL; } static inline void ip_tunnel_need_metadata(void) { } static inline void ip_tunnel_unneed_metadata(void) { } static inline void ip_tunnel_info_opts_get(void *to, const struct ip_tunnel_info *info) { } static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, const unsigned long *flags) { info->options_len = 0; } #endif /* CONFIG_INET */ #endif /* __NET_IP_TUNNELS_H */
5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 // SPDX-License-Identifier: GPL-2.0-only /* * crc16.c */ #include <linux/crc16.h> #include <linux/export.h> #include <linux/module.h> #include <linux/types.h> /** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */ static const u16 crc16_table[256] = { 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 }; /** * crc16 - compute the CRC-16 for the data buffer * @crc: previous CRC value * @p: data pointer * @len: number of bytes in the buffer * * Returns the updated CRC value. */ u16 crc16(u16 crc, const u8 *p, size_t len) { while (len--) crc = (crc >> 8) ^ crc16_table[(crc & 0xff) ^ *p++]; return crc; } EXPORT_SYMBOL(crc16); MODULE_DESCRIPTION("CRC16 calculations"); MODULE_LICENSE("GPL");
1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 // SPDX-License-Identifier: GPL-2.0-only /* * NFC Digital Protocol stack * Copyright (c) 2013, Intel Corporation. */ #define pr_fmt(fmt) "digital: %s: " fmt, __func__ #include <linux/module.h> #include "digital.h" #define DIGITAL_PROTO_NFCA_RF_TECH \ (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | \ NFC_PROTO_NFC_DEP_MASK | NFC_PROTO_ISO14443_MASK) #define DIGITAL_PROTO_NFCB_RF_TECH NFC_PROTO_ISO14443_B_MASK #define DIGITAL_PROTO_NFCF_RF_TECH \ (NFC_PROTO_FELICA_MASK | NFC_PROTO_NFC_DEP_MASK) #define DIGITAL_PROTO_ISO15693_RF_TECH NFC_PROTO_ISO15693_MASK /* Delay between each poll frame (ms) */ #define DIGITAL_POLL_INTERVAL 10 struct digital_cmd { struct list_head queue; u8 type; u8 pending; u16 timeout; struct sk_buff *req; struct sk_buff *resp; struct digital_tg_mdaa_params *mdaa_params; nfc_digital_cmd_complete_t cmd_cb; void *cb_context; }; struct sk_buff *digital_skb_alloc(struct nfc_digital_dev *ddev, unsigned int len) { struct sk_buff *skb; skb = alloc_skb(len + ddev->tx_headroom + ddev->tx_tailroom, GFP_KERNEL); if (skb) skb_reserve(skb, ddev->tx_headroom); return skb; } void digital_skb_add_crc(struct sk_buff *skb, crc_func_t crc_func, u16 init, u8 bitwise_inv, u8 msb_first) { u16 crc; crc = crc_func(init, skb->data, skb->len); if (bitwise_inv) crc = ~crc; if (msb_first) crc = __fswab16(crc); skb_put_u8(skb, crc & 0xFF); skb_put_u8(skb, (crc >> 8) & 0xFF); } int digital_skb_check_crc(struct sk_buff *skb, crc_func_t crc_func, u16 crc_init, u8 bitwise_inv, u8 msb_first) { int rc; u16 crc; if (skb->len <= 2) return -EIO; crc = crc_func(crc_init, skb->data, skb->len - 2); if (bitwise_inv) crc = ~crc; if (msb_first) crc = __swab16(crc); rc = (skb->data[skb->len - 2] - (crc & 0xFF)) + (skb->data[skb->len - 1] - ((crc >> 8) & 0xFF)); if (rc) return -EIO; skb_trim(skb, skb->len - 2); return 0; } static inline void digital_switch_rf(struct nfc_digital_dev *ddev, bool on) { ddev->ops->switch_rf(ddev, on); } static inline void digital_abort_cmd(struct nfc_digital_dev *ddev) { ddev->ops->abort_cmd(ddev); } static void digital_wq_cmd_complete(struct work_struct *work) { struct digital_cmd *cmd; struct nfc_digital_dev *ddev = container_of(work, struct nfc_digital_dev, cmd_complete_work); mutex_lock(&ddev->cmd_lock); cmd = list_first_entry_or_null(&ddev->cmd_queue, struct digital_cmd, queue); if (!cmd) { mutex_unlock(&ddev->cmd_lock); return; } list_del(&cmd->queue); mutex_unlock(&ddev->cmd_lock); if (!IS_ERR(cmd->resp)) print_hex_dump_debug("DIGITAL RX: ", DUMP_PREFIX_NONE, 16, 1, cmd->resp->data, cmd->resp->len, false); cmd->cmd_cb(ddev, cmd->cb_context, cmd->resp); kfree(cmd->mdaa_params); kfree(cmd); schedule_work(&ddev->cmd_work); } static void digital_send_cmd_complete(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { struct digital_cmd *cmd = arg; cmd->resp = resp; schedule_work(&ddev->cmd_complete_work); } static void digital_wq_cmd(struct work_struct *work) { int rc; struct digital_cmd *cmd; struct digital_tg_mdaa_params *params; struct nfc_digital_dev *ddev = container_of(work, struct nfc_digital_dev, cmd_work); mutex_lock(&ddev->cmd_lock); cmd = list_first_entry_or_null(&ddev->cmd_queue, struct digital_cmd, queue); if (!cmd || cmd->pending) { mutex_unlock(&ddev->cmd_lock); return; } cmd->pending = 1; mutex_unlock(&ddev->cmd_lock); if (cmd->req) print_hex_dump_debug("DIGITAL TX: ", DUMP_PREFIX_NONE, 16, 1, cmd->req->data, cmd->req->len, false); switch (cmd->type) { case DIGITAL_CMD_IN_SEND: rc = ddev->ops->in_send_cmd(ddev, cmd->req, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_SEND: rc = ddev->ops->tg_send_cmd(ddev, cmd->req, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_LISTEN: rc = ddev->ops->tg_listen(ddev, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_LISTEN_MDAA: params = cmd->mdaa_params; rc = ddev->ops->tg_listen_mdaa(ddev, params, cmd->timeout, digital_send_cmd_complete, cmd); break; case DIGITAL_CMD_TG_LISTEN_MD: rc = ddev->ops->tg_listen_md(ddev, cmd->timeout, digital_send_cmd_complete, cmd); break; default: pr_err("Unknown cmd type %d\n", cmd->type); return; } if (!rc) return; pr_err("in_send_command returned err %d\n", rc); mutex_lock(&ddev->cmd_lock); list_del(&cmd->queue); mutex_unlock(&ddev->cmd_lock); kfree_skb(cmd->req); kfree(cmd->mdaa_params); kfree(cmd); schedule_work(&ddev->cmd_work); } int digital_send_cmd(struct nfc_digital_dev *ddev, u8 cmd_type, struct sk_buff *skb, struct digital_tg_mdaa_params *params, u16 timeout, nfc_digital_cmd_complete_t cmd_cb, void *cb_context) { struct digital_cmd *cmd; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->type = cmd_type; cmd->timeout = timeout; cmd->req = skb; cmd->mdaa_params = params; cmd->cmd_cb = cmd_cb; cmd->cb_context = cb_context; INIT_LIST_HEAD(&cmd->queue); mutex_lock(&ddev->cmd_lock); list_add_tail(&cmd->queue, &ddev->cmd_queue); mutex_unlock(&ddev->cmd_lock); schedule_work(&ddev->cmd_work); return 0; } int digital_in_configure_hw(struct nfc_digital_dev *ddev, int type, int param) { int rc; rc = ddev->ops->in_configure_hw(ddev, type, param); if (rc) pr_err("in_configure_hw failed: %d\n", rc); return rc; } int digital_tg_configure_hw(struct nfc_digital_dev *ddev, int type, int param) { int rc; rc = ddev->ops->tg_configure_hw(ddev, type, param); if (rc) pr_err("tg_configure_hw failed: %d\n", rc); return rc; } static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) { struct digital_tg_mdaa_params *params; int rc; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; params->sens_res = DIGITAL_SENS_RES_NFC_DEP; get_random_bytes(params->nfcid1, sizeof(params->nfcid1)); params->sel_res = DIGITAL_SEL_RES_NFC_DEP; params->nfcid2[0] = DIGITAL_SENSF_NFCID2_NFC_DEP_B1; params->nfcid2[1] = DIGITAL_SENSF_NFCID2_NFC_DEP_B2; get_random_bytes(params->nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2); params->sc = DIGITAL_SENSF_FELICA_SC; rc = digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params, 500, digital_tg_recv_atr_req, NULL); if (rc) kfree(params); return rc; } static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech) { return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MD, NULL, NULL, 500, digital_tg_recv_md_req, NULL); } int digital_target_found(struct nfc_digital_dev *ddev, struct nfc_target *target, u8 protocol) { int rc; u8 framing; u8 rf_tech; u8 poll_tech_count; int (*check_crc)(struct sk_buff *skb); void (*add_crc)(struct sk_buff *skb); rf_tech = ddev->poll_techs[ddev->poll_tech_index].rf_tech; switch (protocol) { case NFC_PROTO_JEWEL: framing = NFC_DIGITAL_FRAMING_NFCA_T1T; check_crc = digital_skb_check_crc_b; add_crc = digital_skb_add_crc_b; break; case NFC_PROTO_MIFARE: framing = NFC_DIGITAL_FRAMING_NFCA_T2T; check_crc = digital_skb_check_crc_a; add_crc = digital_skb_add_crc_a; break; case NFC_PROTO_FELICA: framing = NFC_DIGITAL_FRAMING_NFCF_T3T; check_crc = digital_skb_check_crc_f; add_crc = digital_skb_add_crc_f; break; case NFC_PROTO_NFC_DEP: if (rf_tech == NFC_DIGITAL_RF_TECH_106A) { framing = NFC_DIGITAL_FRAMING_NFCA_NFC_DEP; check_crc = digital_skb_check_crc_a; add_crc = digital_skb_add_crc_a; } else { framing = NFC_DIGITAL_FRAMING_NFCF_NFC_DEP; check_crc = digital_skb_check_crc_f; add_crc = digital_skb_add_crc_f; } break; case NFC_PROTO_ISO15693: framing = NFC_DIGITAL_FRAMING_ISO15693_T5T; check_crc = digital_skb_check_crc_b; add_crc = digital_skb_add_crc_b; break; case NFC_PROTO_ISO14443: framing = NFC_DIGITAL_FRAMING_NFCA_T4T; check_crc = digital_skb_check_crc_a; add_crc = digital_skb_add_crc_a; break; case NFC_PROTO_ISO14443_B: framing = NFC_DIGITAL_FRAMING_NFCB_T4T; check_crc = digital_skb_check_crc_b; add_crc = digital_skb_add_crc_b; break; default: pr_err("Invalid protocol %d\n", protocol); return -EINVAL; } pr_debug("rf_tech=%d, protocol=%d\n", rf_tech, protocol); ddev->curr_rf_tech = rf_tech; if (DIGITAL_DRV_CAPS_IN_CRC(ddev)) { ddev->skb_add_crc = digital_skb_add_crc_none; ddev->skb_check_crc = digital_skb_check_crc_none; } else { ddev->skb_add_crc = add_crc; ddev->skb_check_crc = check_crc; } rc = digital_in_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING, framing); if (rc) return rc; target->supported_protocols = (1 << protocol); poll_tech_count = ddev->poll_tech_count; ddev->poll_tech_count = 0; rc = nfc_targets_found(ddev->nfc_dev, target, 1); if (rc) { ddev->poll_tech_count = poll_tech_count; return rc; } return 0; } void digital_poll_next_tech(struct nfc_digital_dev *ddev) { u8 rand_mod; digital_switch_rf(ddev, 0); mutex_lock(&ddev->poll_lock); if (!ddev->poll_tech_count) { mutex_unlock(&ddev->poll_lock); return; } get_random_bytes(&rand_mod, sizeof(rand_mod)); ddev->poll_tech_index = rand_mod % ddev->poll_tech_count; mutex_unlock(&ddev->poll_lock); schedule_delayed_work(&ddev->poll_work, msecs_to_jiffies(DIGITAL_POLL_INTERVAL)); } static void digital_wq_poll(struct work_struct *work) { int rc; struct digital_poll_tech *poll_tech; struct nfc_digital_dev *ddev = container_of(work, struct nfc_digital_dev, poll_work.work); mutex_lock(&ddev->poll_lock); if (!ddev->poll_tech_count) { mutex_unlock(&ddev->poll_lock); return; } poll_tech = &ddev->poll_techs[ddev->poll_tech_index]; mutex_unlock(&ddev->poll_lock); rc = poll_tech->poll_func(ddev, poll_tech->rf_tech); if (rc) digital_poll_next_tech(ddev); } static void digital_add_poll_tech(struct nfc_digital_dev *ddev, u8 rf_tech, digital_poll_t poll_func) { struct digital_poll_tech *poll_tech; if (ddev->poll_tech_count >= NFC_DIGITAL_POLL_MODE_COUNT_MAX) return; poll_tech = &ddev->poll_techs[ddev->poll_tech_count++]; poll_tech->rf_tech = rf_tech; poll_tech->poll_func = poll_func; } /** * digital_start_poll - start_poll operation * @nfc_dev: device to be polled * @im_protocols: bitset of nfc initiator protocols to be used for polling * @tm_protocols: bitset of nfc transport protocols to be used for polling * * For every supported protocol, the corresponding polling function is added * to the table of polling technologies (ddev->poll_techs[]) using * digital_add_poll_tech(). * When a polling function fails (by timeout or protocol error) the next one is * schedule by digital_poll_next_tech() on the poll workqueue (ddev->poll_work). */ static int digital_start_poll(struct nfc_dev *nfc_dev, __u32 im_protocols, __u32 tm_protocols) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); u32 matching_im_protocols, matching_tm_protocols; pr_debug("protocols: im 0x%x, tm 0x%x, supported 0x%x\n", im_protocols, tm_protocols, ddev->protocols); matching_im_protocols = ddev->protocols & im_protocols; matching_tm_protocols = ddev->protocols & tm_protocols; if (!matching_im_protocols && !matching_tm_protocols) { pr_err("Unknown protocol\n"); return -EINVAL; } if (ddev->poll_tech_count) { pr_err("Already polling\n"); return -EBUSY; } if (ddev->curr_protocol) { pr_err("A target is already active\n"); return -EBUSY; } ddev->poll_tech_count = 0; ddev->poll_tech_index = 0; if (matching_im_protocols & DIGITAL_PROTO_NFCA_RF_TECH) digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A, digital_in_send_sens_req); if (matching_im_protocols & DIGITAL_PROTO_NFCB_RF_TECH) digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106B, digital_in_send_sensb_req); if (matching_im_protocols & DIGITAL_PROTO_NFCF_RF_TECH) { digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F, digital_in_send_sensf_req); digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_424F, digital_in_send_sensf_req); } if (matching_im_protocols & DIGITAL_PROTO_ISO15693_RF_TECH) digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_ISO15693, digital_in_send_iso15693_inv_req); if (matching_tm_protocols & NFC_PROTO_NFC_DEP_MASK) { if (ddev->ops->tg_listen_mdaa) { digital_add_poll_tech(ddev, 0, digital_tg_listen_mdaa); } else if (ddev->ops->tg_listen_md) { digital_add_poll_tech(ddev, 0, digital_tg_listen_md); } else { digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_106A, digital_tg_listen_nfca); digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_212F, digital_tg_listen_nfcf); digital_add_poll_tech(ddev, NFC_DIGITAL_RF_TECH_424F, digital_tg_listen_nfcf); } } if (!ddev->poll_tech_count) { pr_err("Unsupported protocols: im=0x%x, tm=0x%x\n", matching_im_protocols, matching_tm_protocols); return -EINVAL; } schedule_delayed_work(&ddev->poll_work, 0); return 0; } static void digital_stop_poll(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); mutex_lock(&ddev->poll_lock); if (!ddev->poll_tech_count) { pr_err("Polling operation was not running\n"); mutex_unlock(&ddev->poll_lock); return; } ddev->poll_tech_count = 0; mutex_unlock(&ddev->poll_lock); cancel_delayed_work_sync(&ddev->poll_work); digital_abort_cmd(ddev); } static int digital_dev_up(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); digital_switch_rf(ddev, 1); return 0; } static int digital_dev_down(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); digital_switch_rf(ddev, 0); return 0; } static int digital_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, __u8 comm_mode, __u8 *gb, size_t gb_len) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); int rc; rc = digital_in_send_atr_req(ddev, target, comm_mode, gb, gb_len); if (!rc) ddev->curr_protocol = NFC_PROTO_NFC_DEP; return rc; } static int digital_dep_link_down(struct nfc_dev *nfc_dev) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); digital_abort_cmd(ddev); ddev->curr_protocol = 0; return 0; } static int digital_activate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, __u32 protocol) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); if (ddev->poll_tech_count) { pr_err("Can't activate a target while polling\n"); return -EBUSY; } if (ddev->curr_protocol) { pr_err("A target is already active\n"); return -EBUSY; } ddev->curr_protocol = protocol; return 0; } static void digital_deactivate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, u8 mode) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); if (!ddev->curr_protocol) { pr_err("No active target\n"); return; } digital_abort_cmd(ddev); ddev->curr_protocol = 0; } static int digital_tg_send(struct nfc_dev *dev, struct sk_buff *skb) { struct nfc_digital_dev *ddev = nfc_get_drvdata(dev); return digital_tg_send_dep_res(ddev, skb); } static void digital_in_send_complete(struct nfc_digital_dev *ddev, void *arg, struct sk_buff *resp) { struct digital_data_exch *data_exch = arg; int rc; if (IS_ERR(resp)) { rc = PTR_ERR(resp); resp = NULL; goto done; } if (ddev->curr_protocol == NFC_PROTO_MIFARE) { rc = digital_in_recv_mifare_res(resp); /* crc check is done in digital_in_recv_mifare_res() */ goto done; } if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { rc = digital_in_iso_dep_pull_sod(ddev, resp); if (rc) goto done; } rc = ddev->skb_check_crc(resp); done: if (rc) { kfree_skb(resp); resp = NULL; } data_exch->cb(data_exch->cb_context, resp, rc); kfree(data_exch); } static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context) { struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev); struct digital_data_exch *data_exch; int rc; data_exch = kzalloc(sizeof(*data_exch), GFP_KERNEL); if (!data_exch) return -ENOMEM; data_exch->cb = cb; data_exch->cb_context = cb_context; if (ddev->curr_protocol == NFC_PROTO_NFC_DEP) { rc = digital_in_send_dep_req(ddev, target, skb, data_exch); goto exit; } if ((ddev->curr_protocol == NFC_PROTO_ISO14443) || (ddev->curr_protocol == NFC_PROTO_ISO14443_B)) { rc = digital_in_iso_dep_push_sod(ddev, skb); if (rc) goto exit; } ddev->skb_add_crc(skb); rc = digital_in_send_cmd(ddev, skb, 500, digital_in_send_complete, data_exch); exit: if (rc) kfree(data_exch); return rc; } static const struct nfc_ops digital_nfc_ops = { .dev_up = digital_dev_up, .dev_down = digital_dev_down, .start_poll = digital_start_poll, .stop_poll = digital_stop_poll, .dep_link_up = digital_dep_link_up, .dep_link_down = digital_dep_link_down, .activate_target = digital_activate_target, .deactivate_target = digital_deactivate_target, .tm_send = digital_tg_send, .im_transceive = digital_in_send, }; struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops, __u32 supported_protocols, __u32 driver_capabilities, int tx_headroom, int tx_tailroom) { struct nfc_digital_dev *ddev; if (!ops->in_configure_hw || !ops->in_send_cmd || !ops->tg_listen || !ops->tg_configure_hw || !ops->tg_send_cmd || !ops->abort_cmd || !ops->switch_rf || (ops->tg_listen_md && !ops->tg_get_rf_tech)) return NULL; ddev = kzalloc(sizeof(*ddev), GFP_KERNEL); if (!ddev) return NULL; ddev->driver_capabilities = driver_capabilities; ddev->ops = ops; mutex_init(&ddev->cmd_lock); INIT_LIST_HEAD(&ddev->cmd_queue); INIT_WORK(&ddev->cmd_work, digital_wq_cmd); INIT_WORK(&ddev->cmd_complete_work, digital_wq_cmd_complete); mutex_init(&ddev->poll_lock); INIT_DELAYED_WORK(&ddev->poll_work, digital_wq_poll); if (supported_protocols & NFC_PROTO_JEWEL_MASK) ddev->protocols |= NFC_PROTO_JEWEL_MASK; if (supported_protocols & NFC_PROTO_MIFARE_MASK) ddev->protocols |= NFC_PROTO_MIFARE_MASK; if (supported_protocols & NFC_PROTO_FELICA_MASK) ddev->protocols |= NFC_PROTO_FELICA_MASK; if (supported_protocols & NFC_PROTO_NFC_DEP_MASK) ddev->protocols |= NFC_PROTO_NFC_DEP_MASK; if (supported_protocols & NFC_PROTO_ISO15693_MASK) ddev->protocols |= NFC_PROTO_ISO15693_MASK; if (supported_protocols & NFC_PROTO_ISO14443_MASK) ddev->protocols |= NFC_PROTO_ISO14443_MASK; if (supported_protocols & NFC_PROTO_ISO14443_B_MASK) ddev->protocols |= NFC_PROTO_ISO14443_B_MASK; ddev->tx_headroom = tx_headroom + DIGITAL_MAX_HEADER_LEN; ddev->tx_tailroom = tx_tailroom + DIGITAL_CRC_LEN; ddev->nfc_dev = nfc_allocate_device(&digital_nfc_ops, ddev->protocols, ddev->tx_headroom, ddev->tx_tailroom); if (!ddev->nfc_dev) { pr_err("nfc_allocate_device failed\n"); goto free_dev; } nfc_set_drvdata(ddev->nfc_dev, ddev); return ddev; free_dev: kfree(ddev); return NULL; } EXPORT_SYMBOL(nfc_digital_allocate_device); void nfc_digital_free_device(struct nfc_digital_dev *ddev) { nfc_free_device(ddev->nfc_dev); kfree(ddev); } EXPORT_SYMBOL(nfc_digital_free_device); int nfc_digital_register_device(struct nfc_digital_dev *ddev) { return nfc_register_device(ddev->nfc_dev); } EXPORT_SYMBOL(nfc_digital_register_device); void nfc_digital_unregister_device(struct nfc_digital_dev *ddev) { struct digital_cmd *cmd, *n; nfc_unregister_device(ddev->nfc_dev); mutex_lock(&ddev->poll_lock); ddev->poll_tech_count = 0; mutex_unlock(&ddev->poll_lock); cancel_delayed_work_sync(&ddev->poll_work); cancel_work_sync(&ddev->cmd_work); cancel_work_sync(&ddev->cmd_complete_work); list_for_each_entry_safe(cmd, n, &ddev->cmd_queue, queue) { list_del(&cmd->queue); /* Call the command callback if any and pass it a ENODEV error. * This gives a chance to the command issuer to free any * allocated buffer. */ if (cmd->cmd_cb) cmd->cmd_cb(ddev, cmd->cb_context, ERR_PTR(-ENODEV)); kfree(cmd->mdaa_params); kfree(cmd); } } EXPORT_SYMBOL(nfc_digital_unregister_device); MODULE_DESCRIPTION("NFC Digital protocol stack"); MODULE_LICENSE("GPL");
11 11 11 11 3 3 3 3 3 2 2 2 2 3 3 3 3 3 3 2 2 2 2 10 10 10 10 10 10 10 10 9 10 10 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "send.h" #include "main.h" #include <linux/atomic.h> #include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if.h> #include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/printk.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/workqueue.h> #include "distributed-arp-table.h" #include "fragmentation.h" #include "gateway_client.h" #include "hard-interface.h" #include "log.h" #include "mesh-interface.h" #include "network-coding.h" #include "originator.h" #include "routing.h" #include "translation-table.h" static void batadv_send_outstanding_bcast_packet(struct work_struct *work); /** * batadv_send_skb_packet() - send an already prepared packet * @skb: the packet to send * @hard_iface: the interface to use to send the broadcast packet * @dst_addr: the payload destination * * Send out an already prepared packet to the given neighbor or broadcast it * using the specified interface. Either hard_iface or neigh_node must be not * NULL. * If neigh_node is NULL, then the packet is broadcasted using hard_iface, * otherwise it is sent as unicast to the given neighbor. * * Regardless of the return value, the skb is consumed. * * Return: A negative errno code is returned on a failure. A success does not * guarantee the frame will be transmitted as it may be dropped due * to congestion or traffic shaping. */ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, const u8 *dst_addr) { struct batadv_priv *bat_priv; struct ethhdr *ethhdr; int ret; bat_priv = netdev_priv(hard_iface->mesh_iface); if (hard_iface->if_status != BATADV_IF_ACTIVE) goto send_skb_err; if (unlikely(!hard_iface->net_dev)) goto send_skb_err; if (!(hard_iface->net_dev->flags & IFF_UP)) { pr_warn("Interface %s is not up - can't send packet via that interface!\n", hard_iface->net_dev->name); goto send_skb_err; } /* push to the ethernet header. */ if (batadv_skb_head_push(skb, ETH_HLEN) < 0) goto send_skb_err; skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); ether_addr_copy(ethhdr->h_source, hard_iface->net_dev->dev_addr); ether_addr_copy(ethhdr->h_dest, dst_addr); ethhdr->h_proto = htons(ETH_P_BATMAN); skb_set_network_header(skb, ETH_HLEN); skb->protocol = htons(ETH_P_BATMAN); skb->dev = hard_iface->net_dev; /* Save a clone of the skb to use when decoding coded packets */ batadv_nc_skb_store_for_decoding(bat_priv, skb); /* dev_queue_xmit() returns a negative result on error. However on * congestion and traffic shaping, it drops and returns NET_XMIT_DROP * (which is > 0). This will not be treated as an error. */ ret = dev_queue_xmit(skb); return net_xmit_eval(ret); send_skb_err: kfree_skb(skb); return NET_XMIT_DROP; } /** * batadv_send_broadcast_skb() - Send broadcast packet via hard interface * @skb: packet to be transmitted (with batadv header and no outer eth header) * @hard_iface: outgoing interface * * Return: A negative errno code is returned on a failure. A success does not * guarantee the frame will be transmitted as it may be dropped due * to congestion or traffic shaping. */ int batadv_send_broadcast_skb(struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { static const u8 broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; return batadv_send_skb_packet(skb, hard_iface, broadcast_addr); } /** * batadv_send_unicast_skb() - Send unicast packet to neighbor * @skb: packet to be transmitted (with batadv header and no outer eth header) * @neigh: neighbor which is used as next hop to destination * * Return: A negative errno code is returned on a failure. A success does not * guarantee the frame will be transmitted as it may be dropped due * to congestion or traffic shaping. */ int batadv_send_unicast_skb(struct sk_buff *skb, struct batadv_neigh_node *neigh) { #ifdef CONFIG_BATMAN_ADV_BATMAN_V struct batadv_hardif_neigh_node *hardif_neigh; #endif int ret; ret = batadv_send_skb_packet(skb, neigh->if_incoming, neigh->addr); #ifdef CONFIG_BATMAN_ADV_BATMAN_V hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr); if (hardif_neigh && ret != NET_XMIT_DROP) hardif_neigh->bat_v.last_unicast_tx = jiffies; batadv_hardif_neigh_put(hardif_neigh); #endif return ret; } /** * batadv_send_skb_to_orig() - Lookup next-hop and transmit skb. * @skb: Packet to be transmitted. * @orig_node: Final destination of the packet. * @recv_if: Interface used when receiving the packet (can be NULL). * * Looks up the best next-hop towards the passed originator and passes the * skb on for preparation of MAC header. If the packet originated from this * host, NULL can be passed as recv_if and no interface alternating is * attempted. * * Return: negative errno code on a failure, -EINPROGRESS if the skb is * buffered for later transmit or the NET_XMIT status returned by the * lower routine if the packet has been passed down. */ int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if) { struct batadv_priv *bat_priv = orig_node->bat_priv; struct batadv_neigh_node *neigh_node; int ret; /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); if (!neigh_node) { ret = -EINVAL; goto free_skb; } /* Check if the skb is too large to send in one piece and fragment * it if needed. */ if (atomic_read(&bat_priv->fragmentation) && skb->len > neigh_node->if_incoming->net_dev->mtu) { /* Fragment and send packet. */ ret = batadv_frag_send_packet(skb, orig_node, neigh_node); /* skb was consumed */ skb = NULL; goto put_neigh_node; } /* try to network code the packet, if it is received on an interface * (i.e. being forwarded). If the packet originates from this node or if * network coding fails, then send the packet as usual. */ if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) ret = -EINPROGRESS; else ret = batadv_send_unicast_skb(skb, neigh_node); /* skb was consumed */ skb = NULL; put_neigh_node: batadv_neigh_node_put(neigh_node); free_skb: kfree_skb(skb); return ret; } /** * batadv_send_skb_push_fill_unicast() - extend the buffer and initialize the * common fields for unicast packets * @skb: the skb carrying the unicast header to initialize * @hdr_size: amount of bytes to push at the beginning of the skb * @orig_node: the destination node * * Return: false if the buffer extension was not possible or true otherwise. */ static bool batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, struct batadv_orig_node *orig_node) { struct batadv_unicast_packet *unicast_packet; u8 ttvn = (u8)atomic_read(&orig_node->last_ttvn); if (batadv_skb_head_push(skb, hdr_size) < 0) return false; unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_packet->version = BATADV_COMPAT_VERSION; /* batman packet type: unicast */ unicast_packet->packet_type = BATADV_UNICAST; /* set unicast ttl */ unicast_packet->ttl = BATADV_TTL; /* copy the destination for faster routing */ ether_addr_copy(unicast_packet->dest, orig_node->orig); /* set the destination tt version number */ unicast_packet->ttvn = ttvn; return true; } /** * batadv_send_skb_prepare_unicast() - encapsulate an skb with a unicast header * @skb: the skb containing the payload to encapsulate * @orig_node: the destination node * * Return: false if the payload could not be encapsulated or true otherwise. */ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb, struct batadv_orig_node *orig_node) { size_t uni_size = sizeof(struct batadv_unicast_packet); return batadv_send_skb_push_fill_unicast(skb, uni_size, orig_node); } /** * batadv_send_skb_prepare_unicast_4addr() - encapsulate an skb with a * unicast 4addr header * @bat_priv: the bat priv with all the mesh interface information * @skb: the skb containing the payload to encapsulate * @orig: the destination node * @packet_subtype: the unicast 4addr packet subtype to use * * Return: false if the payload could not be encapsulated or true otherwise. */ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node *orig, int packet_subtype) { struct batadv_hard_iface *primary_if; struct batadv_unicast_4addr_packet *uc_4addr_packet; bool ret = false; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; /* Pull the header space and fill the unicast_packet substructure. * We can do that because the first member of the uc_4addr_packet * is of type struct unicast_packet */ if (!batadv_send_skb_push_fill_unicast(skb, sizeof(*uc_4addr_packet), orig)) goto out; uc_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; uc_4addr_packet->u.packet_type = BATADV_UNICAST_4ADDR; ether_addr_copy(uc_4addr_packet->src, primary_if->net_dev->dev_addr); uc_4addr_packet->subtype = packet_subtype; uc_4addr_packet->reserved = 0; ret = true; out: batadv_hardif_put(primary_if); return ret; } /** * batadv_send_skb_unicast() - encapsulate and send an skb via unicast * @bat_priv: the bat priv with all the mesh interface information * @skb: payload to send * @packet_type: the batman unicast packet type to use * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast * 4addr packets) * @orig_node: the originator to send the packet to * @vid: the vid to be used to search the translation table * * Wrap the given skb into a batman-adv unicast or unicast-4addr header * depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied * as packet_type. Then send this frame to the given orig_node. * * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, int packet_subtype, struct batadv_orig_node *orig_node, unsigned short vid) { struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr; int ret = NET_XMIT_DROP; if (!orig_node) goto out; switch (packet_type) { case BATADV_UNICAST: if (!batadv_send_skb_prepare_unicast(skb, orig_node)) goto out; break; case BATADV_UNICAST_4ADDR: if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, skb, orig_node, packet_subtype)) goto out; break; default: /* this function supports UNICAST and UNICAST_4ADDR only. It * should never be invoked with any other packet type */ goto out; } /* skb->data might have been reallocated by * batadv_send_skb_prepare_unicast{,_4addr}() */ ethhdr = eth_hdr(skb); unicast_packet = (struct batadv_unicast_packet *)skb->data; /* inform the destination node that we are still missing a correct route * for this client. The destination will receive this packet and will * try to reroute it because the ttvn contained in the header is less * than the current one */ if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) unicast_packet->ttvn = unicast_packet->ttvn - 1; ret = batadv_send_skb_to_orig(skb, orig_node, NULL); /* skb was consumed */ skb = NULL; out: kfree_skb(skb); return ret; } /** * batadv_send_skb_via_tt_generic() - send an skb via TT lookup * @bat_priv: the bat priv with all the mesh interface information * @skb: payload to send * @packet_type: the batman unicast packet type to use * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast * 4addr packets) * @dst_hint: can be used to override the destination contained in the skb * @vid: the vid to be used to search the translation table * * Look up the recipient node for the destination address in the ethernet * header via the translation table. Wrap the given skb into a batman-adv * unicast or unicast-4addr header depending on whether BATADV_UNICAST or * BATADV_UNICAST_4ADDR was supplied as packet_type. Then send this frame * to the according destination node. * * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct sk_buff *skb, int packet_type, int packet_subtype, u8 *dst_hint, unsigned short vid) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct batadv_orig_node *orig_node; u8 *src, *dst; int ret; src = ethhdr->h_source; dst = ethhdr->h_dest; /* if we got an hint! let's send the packet to this client (if any) */ if (dst_hint) { src = NULL; dst = dst_hint; } orig_node = batadv_transtable_search(bat_priv, src, dst, vid); ret = batadv_send_skb_unicast(bat_priv, skb, packet_type, packet_subtype, orig_node, vid); batadv_orig_node_put(orig_node); return ret; } /** * batadv_send_skb_via_gw() - send an skb via gateway lookup * @bat_priv: the bat priv with all the mesh interface information * @skb: payload to send * @vid: the vid to be used to search the translation table * * Look up the currently selected gateway. Wrap the given skb into a batman-adv * unicast header and send this frame to this gateway node. * * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; int ret; orig_node = batadv_gw_get_selected_orig(bat_priv); ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR, BATADV_P_DATA, orig_node, vid); batadv_orig_node_put(orig_node); return ret; } /** * batadv_forw_packet_free() - free a forwarding packet * @forw_packet: The packet to free * @dropped: whether the packet is freed because is dropped * * This frees a forwarding packet and releases any resources it might * have claimed. */ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet, bool dropped) { if (dropped) kfree_skb(forw_packet->skb); else consume_skb(forw_packet->skb); batadv_hardif_put(forw_packet->if_incoming); batadv_hardif_put(forw_packet->if_outgoing); if (forw_packet->queue_left) atomic_inc(forw_packet->queue_left); kfree(forw_packet); } /** * batadv_forw_packet_alloc() - allocate a forwarding packet * @if_incoming: The (optional) if_incoming to be grabbed * @if_outgoing: The (optional) if_outgoing to be grabbed * @queue_left: The (optional) queue counter to decrease * @bat_priv: The bat_priv for the mesh of this forw_packet * @skb: The raw packet this forwarding packet shall contain * * Allocates a forwarding packet and tries to get a reference to the * (optional) if_incoming, if_outgoing and queue_left. If queue_left * is NULL then bat_priv is optional, too. * * Return: An allocated forwarding packet on success, NULL otherwise. */ struct batadv_forw_packet * batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, atomic_t *queue_left, struct batadv_priv *bat_priv, struct sk_buff *skb) { struct batadv_forw_packet *forw_packet; const char *qname; if (queue_left && !batadv_atomic_dec_not_zero(queue_left)) { qname = "unknown"; if (queue_left == &bat_priv->bcast_queue_left) qname = "bcast"; if (queue_left == &bat_priv->batman_queue_left) qname = "batman"; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "%s queue is full\n", qname); return NULL; } forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC); if (!forw_packet) goto err; if (if_incoming) kref_get(&if_incoming->refcount); if (if_outgoing) kref_get(&if_outgoing->refcount); INIT_HLIST_NODE(&forw_packet->list); INIT_HLIST_NODE(&forw_packet->cleanup_list); forw_packet->skb = skb; forw_packet->queue_left = queue_left; forw_packet->if_incoming = if_incoming; forw_packet->if_outgoing = if_outgoing; forw_packet->num_packets = 1; return forw_packet; err: if (queue_left) atomic_inc(queue_left); return NULL; } /** * batadv_forw_packet_was_stolen() - check whether someone stole this packet * @forw_packet: the forwarding packet to check * * This function checks whether the given forwarding packet was claimed by * someone else for free(). * * Return: True if someone stole it, false otherwise. */ static bool batadv_forw_packet_was_stolen(struct batadv_forw_packet *forw_packet) { return !hlist_unhashed(&forw_packet->cleanup_list); } /** * batadv_forw_packet_steal() - claim a forw_packet for free() * @forw_packet: the forwarding packet to steal * @lock: a key to the store to steal from (e.g. forw_{bat,bcast}_list_lock) * * This function tries to steal a specific forw_packet from global * visibility for the purpose of getting it for free(). That means * the caller is *not* allowed to requeue it afterwards. * * Return: True if stealing was successful. False if someone else stole it * before us. */ bool batadv_forw_packet_steal(struct batadv_forw_packet *forw_packet, spinlock_t *lock) { /* did purging routine steal it earlier? */ spin_lock_bh(lock); if (batadv_forw_packet_was_stolen(forw_packet)) { spin_unlock_bh(lock); return false; } hlist_del_init(&forw_packet->list); /* Just to spot misuse of this function */ hlist_add_fake(&forw_packet->cleanup_list); spin_unlock_bh(lock); return true; } /** * batadv_forw_packet_list_steal() - claim a list of forward packets for free() * @forw_list: the to be stolen forward packets * @cleanup_list: a backup pointer, to be able to dispose the packet later * @hard_iface: the interface to steal forward packets from * * This function claims responsibility to free any forw_packet queued on the * given hard_iface. If hard_iface is NULL forwarding packets on all hard * interfaces will be claimed. * * The packets are being moved from the forw_list to the cleanup_list. This * makes it possible for already running threads to notice the claim. */ static void batadv_forw_packet_list_steal(struct hlist_head *forw_list, struct hlist_head *cleanup_list, const struct batadv_hard_iface *hard_iface) { struct batadv_forw_packet *forw_packet; struct hlist_node *safe_tmp_node; hlist_for_each_entry_safe(forw_packet, safe_tmp_node, forw_list, list) { /* if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ if (hard_iface && forw_packet->if_incoming != hard_iface && forw_packet->if_outgoing != hard_iface) continue; hlist_del(&forw_packet->list); hlist_add_head(&forw_packet->cleanup_list, cleanup_list); } } /** * batadv_forw_packet_list_free() - free a list of forward packets * @head: a list of to be freed forw_packets * * This function cancels the scheduling of any packet in the provided list, * waits for any possibly running packet forwarding thread to finish and * finally, safely frees this forward packet. * * This function might sleep. */ static void batadv_forw_packet_list_free(struct hlist_head *head) { struct batadv_forw_packet *forw_packet; struct hlist_node *safe_tmp_node; hlist_for_each_entry_safe(forw_packet, safe_tmp_node, head, cleanup_list) { cancel_delayed_work_sync(&forw_packet->delayed_work); hlist_del(&forw_packet->cleanup_list); batadv_forw_packet_free(forw_packet, true); } } /** * batadv_forw_packet_queue() - try to queue a forwarding packet * @forw_packet: the forwarding packet to queue * @lock: a key to the store (e.g. forw_{bat,bcast}_list_lock) * @head: the shelve to queue it on (e.g. forw_{bat,bcast}_list) * @send_time: timestamp (jiffies) when the packet is to be sent * * This function tries to (re)queue a forwarding packet. Requeuing * is prevented if the according interface is shutting down * (e.g. if batadv_forw_packet_list_steal() was called for this * packet earlier). * * Calling batadv_forw_packet_queue() after a call to * batadv_forw_packet_steal() is forbidden! * * Caller needs to ensure that forw_packet->delayed_work was initialized. */ static void batadv_forw_packet_queue(struct batadv_forw_packet *forw_packet, spinlock_t *lock, struct hlist_head *head, unsigned long send_time) { spin_lock_bh(lock); /* did purging routine steal it from us? */ if (batadv_forw_packet_was_stolen(forw_packet)) { /* If you got it for free() without trouble, then * don't get back into the queue after stealing... */ WARN_ONCE(hlist_fake(&forw_packet->cleanup_list), "Requeuing after batadv_forw_packet_steal() not allowed!\n"); spin_unlock_bh(lock); return; } hlist_del_init(&forw_packet->list); hlist_add_head(&forw_packet->list, head); queue_delayed_work(batadv_event_workqueue, &forw_packet->delayed_work, send_time - jiffies); spin_unlock_bh(lock); } /** * batadv_forw_packet_bcast_queue() - try to queue a broadcast packet * @bat_priv: the bat priv with all the mesh interface information * @forw_packet: the forwarding packet to queue * @send_time: timestamp (jiffies) when the packet is to be sent * * This function tries to (re)queue a broadcast packet. * * Caller needs to ensure that forw_packet->delayed_work was initialized. */ static void batadv_forw_packet_bcast_queue(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time) { batadv_forw_packet_queue(forw_packet, &bat_priv->forw_bcast_list_lock, &bat_priv->forw_bcast_list, send_time); } /** * batadv_forw_packet_ogmv1_queue() - try to queue an OGMv1 packet * @bat_priv: the bat priv with all the mesh interface information * @forw_packet: the forwarding packet to queue * @send_time: timestamp (jiffies) when the packet is to be sent * * This function tries to (re)queue an OGMv1 packet. * * Caller needs to ensure that forw_packet->delayed_work was initialized. */ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time) { batadv_forw_packet_queue(forw_packet, &bat_priv->forw_bat_list_lock, &bat_priv->forw_bat_list, send_time); } /** * batadv_forw_bcast_packet_to_list() - queue broadcast packet for transmissions * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet * @if_in: the interface where the packet was received on * @if_out: the outgoing interface to queue on * * Adds a broadcast packet to the queue and sets up timers. Broadcast packets * are sent multiple times to increase probability for being received. * * This call clones the given skb, hence the caller needs to take into * account that the data segment of the original skb might not be * modifiable anymore. * * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors. */ static int batadv_forw_bcast_packet_to_list(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned long delay, bool own_packet, struct batadv_hard_iface *if_in, struct batadv_hard_iface *if_out) { struct batadv_forw_packet *forw_packet; unsigned long send_time = jiffies; struct sk_buff *newskb; newskb = skb_clone(skb, GFP_ATOMIC); if (!newskb) goto err; forw_packet = batadv_forw_packet_alloc(if_in, if_out, &bat_priv->bcast_queue_left, bat_priv, newskb); if (!forw_packet) goto err_packet_free; forw_packet->own = own_packet; INIT_DELAYED_WORK(&forw_packet->delayed_work, batadv_send_outstanding_bcast_packet); send_time += delay ? delay : msecs_to_jiffies(5); batadv_forw_packet_bcast_queue(bat_priv, forw_packet, send_time); return NETDEV_TX_OK; err_packet_free: kfree_skb(newskb); err: return NETDEV_TX_BUSY; } /** * batadv_forw_bcast_packet_if() - forward and queue a broadcast packet * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet * @if_in: the interface where the packet was received on * @if_out: the outgoing interface to forward to * * Transmits a broadcast packet on the specified interface either immediately * or if a delay is given after that. Furthermore, queues additional * retransmissions if this interface is a wireless one. * * This call clones the given skb, hence the caller needs to take into * account that the data segment of the original skb might not be * modifiable anymore. * * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors. */ static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned long delay, bool own_packet, struct batadv_hard_iface *if_in, struct batadv_hard_iface *if_out) { unsigned int num_bcasts = if_out->num_bcasts; struct sk_buff *newskb; int ret = NETDEV_TX_OK; if (!delay) { newskb = skb_clone(skb, GFP_ATOMIC); if (!newskb) return NETDEV_TX_BUSY; batadv_send_broadcast_skb(newskb, if_out); num_bcasts--; } /* delayed broadcast or rebroadcasts? */ if (num_bcasts >= 1) { BATADV_SKB_CB(skb)->num_bcasts = num_bcasts; ret = batadv_forw_bcast_packet_to_list(bat_priv, skb, delay, own_packet, if_in, if_out); } return ret; } /** * batadv_send_no_broadcast() - check whether (re)broadcast is necessary * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to check * @own_packet: true if it is a self-generated broadcast packet * @if_out: the outgoing interface checked and considered for (re)broadcast * * Return: False if a packet needs to be (re)broadcasted on the given interface, * true otherwise. */ static bool batadv_send_no_broadcast(struct batadv_priv *bat_priv, struct sk_buff *skb, bool own_packet, struct batadv_hard_iface *if_out) { struct batadv_hardif_neigh_node *neigh_node = NULL; struct batadv_bcast_packet *bcast_packet; u8 *orig_neigh; u8 *neigh_addr; char *type; int ret; if (!own_packet) { neigh_addr = eth_hdr(skb)->h_source; neigh_node = batadv_hardif_neigh_get(if_out, neigh_addr); } bcast_packet = (struct batadv_bcast_packet *)skb->data; orig_neigh = neigh_node ? neigh_node->orig : NULL; ret = batadv_hardif_no_broadcast(if_out, bcast_packet->orig, orig_neigh); batadv_hardif_neigh_put(neigh_node); /* ok, may broadcast */ if (!ret) return false; /* no broadcast */ switch (ret) { case BATADV_HARDIF_BCAST_NORECIPIENT: type = "no neighbor"; break; case BATADV_HARDIF_BCAST_DUPFWD: type = "single neighbor is source"; break; case BATADV_HARDIF_BCAST_DUPORIG: type = "single neighbor is originator"; break; default: type = "unknown"; } batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s suppressed: %s\n", bcast_packet->orig, if_out->net_dev->name, type); return true; } /** * __batadv_forw_bcast_packet() - forward and queue a broadcast packet * @bat_priv: the bat priv with all the mesh interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending * @own_packet: true if it is a self-generated broadcast packet * * Transmits a broadcast packet either immediately or if a delay is given * after that. Furthermore, queues additional retransmissions on wireless * interfaces. * * This call clones the given skb, hence the caller needs to take into * account that the data segment of the given skb might not be * modifiable anymore. * * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors. */ static int __batadv_forw_bcast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned long delay, bool own_packet) { struct batadv_hard_iface *hard_iface; struct batadv_hard_iface *prim