Total coverage: 286104 (16%)of 1809986
76 494 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 // SPDX-License-Identifier: GPL-2.0 /* File: fs/ext4/xattr.h On-disk format of extended attributes for the ext4 filesystem. (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> */ #include <linux/xattr.h> /* Magic value in attribute blocks */ #define EXT4_XATTR_MAGIC 0xEA020000 /* Maximum number of references to one attribute block */ #define EXT4_XATTR_REFCOUNT_MAX 1024 /* Name indexes */ #define EXT4_XATTR_INDEX_USER 1 #define EXT4_XATTR_INDEX_POSIX_ACL_ACCESS 2 #define EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT 3 #define EXT4_XATTR_INDEX_TRUSTED 4 #define EXT4_XATTR_INDEX_LUSTRE 5 #define EXT4_XATTR_INDEX_SECURITY 6 #define EXT4_XATTR_INDEX_SYSTEM 7 #define EXT4_XATTR_INDEX_RICHACL 8 #define EXT4_XATTR_INDEX_ENCRYPTION 9 #define EXT4_XATTR_INDEX_HURD 10 /* Reserved for Hurd */ struct ext4_xattr_header { __le32 h_magic; /* magic number for identification */ __le32 h_refcount; /* reference count */ __le32 h_blocks; /* number of disk blocks used */ __le32 h_hash; /* hash value of all attributes */ __le32 h_checksum; /* crc32c(uuid+id+xattrblock) */ /* id = inum if refcount=1, blknum otherwise */ __u32 h_reserved[3]; /* zero right now */ }; struct ext4_xattr_ibody_header { __le32 h_magic; /* magic number for identification */ }; struct ext4_xattr_entry { __u8 e_name_len; /* length of name */ __u8 e_name_index; /* attribute name index */ __le16 e_value_offs; /* offset in disk block of value */ __le32 e_value_inum; /* inode in which the value is stored */ __le32 e_value_size; /* size of attribute value */ __le32 e_hash; /* hash value of name and value */ char e_name[]; /* attribute name */ }; #define EXT4_XATTR_PAD_BITS 2 #define EXT4_XATTR_PAD (1<<EXT4_XATTR_PAD_BITS) #define EXT4_XATTR_ROUND (EXT4_XATTR_PAD-1) #define EXT4_XATTR_LEN(name_len) \ (((name_len) + EXT4_XATTR_ROUND + \ sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) #define EXT4_XATTR_NEXT(entry) \ ((struct ext4_xattr_entry *)( \ (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len))) #define EXT4_XATTR_SIZE(size) \ (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) #define IHDR(inode, raw_inode) \ ((struct ext4_xattr_ibody_header *) \ ((void *)raw_inode + \ EXT4_GOOD_OLD_INODE_SIZE + \ EXT4_I(inode)->i_extra_isize)) #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) /* * XATTR_SIZE_MAX is currently 64k, but for the purposes of checking * for file system consistency errors, we use a somewhat bigger value. * This allows XATTR_SIZE_MAX to grow in the future, but by using this * instead of INT_MAX for certain consistency checks, we don't need to * worry about arithmetic overflows. (Actually XATTR_SIZE_MAX is * defined in include/uapi/linux/limits.h, so changing it is going * not going to be trivial....) */ #define EXT4_XATTR_SIZE_MAX (1 << 24) /* * The minimum size of EA value when you start storing it in an external inode * size of block - size of header - size of 1 entry - 4 null bytes */ #define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \ ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4) #define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data)) #define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr)) #define BFIRST(bh) ENTRY(BHDR(bh)+1) #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) #define EXT4_ZERO_XATTR_VALUE ((void *)-1) /* * If we want to add an xattr to the inode, we should make sure that * i_extra_isize is not 0 and that the inode size is not less than * EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad. * EXT4_GOOD_OLD_INODE_SIZE extra_isize header entry pad data * |--------------------------|------------|------|---------|---|-------| */ #define EXT4_INODE_HAS_XATTR_SPACE(inode) \ ((EXT4_I(inode)->i_extra_isize != 0) && \ (EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize + \ sizeof(struct ext4_xattr_ibody_header) + EXT4_XATTR_PAD <= \ EXT4_INODE_SIZE((inode)->i_sb))) struct ext4_xattr_info { const char *name; const void *value; size_t value_len; int name_index; int in_inode; }; struct ext4_xattr_search { struct ext4_xattr_entry *first; void *base; void *end; struct ext4_xattr_entry *here; int not_found; }; struct ext4_xattr_ibody_find { struct ext4_xattr_search s; struct ext4_iloc iloc; }; struct ext4_xattr_inode_array { unsigned int count; /* # of used items in the array */ struct inode *inodes[]; }; extern const struct xattr_handler ext4_xattr_user_handler; extern const struct xattr_handler ext4_xattr_trusted_handler; extern const struct xattr_handler ext4_xattr_security_handler; extern const struct xattr_handler ext4_xattr_hurd_handler; #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c" /* * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes. * The first is to signal that there the inline xattrs and data are * taking up so much space that we might as well not keep trying to * expand it. The second is that xattr_sem is taken for writing, so * we shouldn't try to recurse into the inode expansion. For this * second case, we need to make sure that we take save and restore the * NO_EXPAND state flag appropriately. */ static inline void ext4_write_lock_xattr(struct inode *inode, int *save) { down_write(&EXT4_I(inode)->xattr_sem); *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); } static inline int ext4_write_trylock_xattr(struct inode *inode, int *save) { if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0) return 0; *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); return 1; } static inline void ext4_write_unlock_xattr(struct inode *inode, int *save) { if (*save == 0) ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); up_write(&EXT4_I(inode)->xattr_sem); } extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len, bool is_create, int *credits); extern int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode, struct buffer_head *block_bh, size_t value_len, bool is_create); extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, struct ext4_xattr_inode_array **array, int extra_credits); extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array); extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle); extern void ext4_evict_ea_inode(struct inode *inode); extern const struct xattr_handler * const ext4_xattr_handlers[]; extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, struct ext4_xattr_ibody_find *is); extern int ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, void *buffer, size_t buffer_size); extern int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, struct ext4_xattr_info *i, struct ext4_xattr_ibody_find *is); extern struct mb_cache *ext4_xattr_create_cache(void); extern void ext4_xattr_destroy_cache(struct mb_cache *); #ifdef CONFIG_EXT4_FS_SECURITY extern int ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir, const struct qstr *qstr); #else static inline int ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir, const struct qstr *qstr) { return 0; } #endif #ifdef CONFIG_LOCKDEP extern void ext4_xattr_inode_set_class(struct inode *ea_inode); #else static inline void ext4_xattr_inode_set_class(struct inode *ea_inode) { } #endif extern int ext4_get_inode_usage(struct inode *inode, qsize_t *usage);
11 2 2 1 6 6 5 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 // SPDX-License-Identifier: GPL-2.0-only /* * ebt_ip6 * * Authors: * Manohar Castelino <manohar.r.castelino@intel.com> * Kuo-Lang Tseng <kuo-lang.tseng@intel.com> * Jan Engelhardt <jengelh@medozas.de> * * Summary: * This is just a modification of the IPv4 code written by * Bart De Schuymer <bdschuym@pandora.be> * with the changes required to support IPv6 * * Jan, 2008 */ #include <linux/ipv6.h> #include <net/ipv6.h> #include <linux/in.h> #include <linux/module.h> #include <net/dsfield.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_ip6.h> union pkthdr { struct { __be16 src; __be16 dst; } tcpudphdr; struct { u8 type; u8 code; } icmphdr; }; static bool ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ebt_ip6_info *info = par->matchinfo; const struct ipv6hdr *ih6; struct ipv6hdr _ip6h; const union pkthdr *pptr; union pkthdr _pkthdr; ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); if (ih6 == NULL) return false; if ((info->bitmask & EBT_IP6_TCLASS) && NF_INVF(info, EBT_IP6_TCLASS, info->tclass != ipv6_get_dsfield(ih6))) return false; if (((info->bitmask & EBT_IP6_SOURCE) && NF_INVF(info, EBT_IP6_SOURCE, ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, &info->saddr))) || ((info->bitmask & EBT_IP6_DEST) && NF_INVF(info, EBT_IP6_DEST, ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, &info->daddr)))) return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; __be16 frag_off; int offset_ph; offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off); if (offset_ph == -1) return false; if (NF_INVF(info, EBT_IP6_PROTO, info->protocol != nexthdr)) return false; if (!(info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT | EBT_IP6_ICMP6))) return true; /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */ pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr), &_pkthdr); if (pptr == NULL) return false; if (info->bitmask & EBT_IP6_DPORT) { u16 dst = ntohs(pptr->tcpudphdr.dst); if (NF_INVF(info, EBT_IP6_DPORT, dst < info->dport[0] || dst > info->dport[1])) return false; } if (info->bitmask & EBT_IP6_SPORT) { u16 src = ntohs(pptr->tcpudphdr.src); if (NF_INVF(info, EBT_IP6_SPORT, src < info->sport[0] || src > info->sport[1])) return false; } if ((info->bitmask & EBT_IP6_ICMP6) && NF_INVF(info, EBT_IP6_ICMP6, pptr->icmphdr.type < info->icmpv6_type[0] || pptr->icmphdr.type > info->icmpv6_type[1] || pptr->icmphdr.code < info->icmpv6_code[0] || pptr->icmphdr.code > info->icmpv6_code[1])) return false; } return true; } static int ebt_ip6_mt_check(const struct xt_mtchk_param *par) { const struct ebt_entry *e = par->entryinfo; struct ebt_ip6_info *info = par->matchinfo; if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO) return -EINVAL; if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK) return -EINVAL; if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) { if (info->invflags & EBT_IP6_PROTO) return -EINVAL; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP && info->protocol != IPPROTO_UDPLITE && info->protocol != IPPROTO_SCTP && info->protocol != IPPROTO_DCCP) return -EINVAL; } if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1]) return -EINVAL; if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) return -EINVAL; if (info->bitmask & EBT_IP6_ICMP6) { if ((info->invflags & EBT_IP6_PROTO) || info->protocol != IPPROTO_ICMPV6) return -EINVAL; if (info->icmpv6_type[0] > info->icmpv6_type[1] || info->icmpv6_code[0] > info->icmpv6_code[1]) return -EINVAL; } return 0; } static struct xt_match ebt_ip6_mt_reg __read_mostly = { .name = "ip6", .revision = 0, .family = NFPROTO_BRIDGE, .match = ebt_ip6_mt, .checkentry = ebt_ip6_mt_check, .matchsize = sizeof(struct ebt_ip6_info), .me = THIS_MODULE, }; static int __init ebt_ip6_init(void) { return xt_register_match(&ebt_ip6_mt_reg); } static void __exit ebt_ip6_fini(void) { xt_unregister_match(&ebt_ip6_mt_reg); } module_init(ebt_ip6_init); module_exit(ebt_ip6_fini); MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match"); MODULE_AUTHOR("Kuo-Lang Tseng <kuo-lang.tseng@intel.com>"); MODULE_LICENSE("GPL");
12 12 12 2 2 93 7 86 94 94 19 19 19 19 19 19 19 19 9 3 3 4 6 6 9 9 3 3 31 5 5 3 22 20 4 89 90 15 85 100 3 3 63 63 63 63 38 38 3 3 19 10 10 31 31 1 7 3 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 11 11 1 8 2 2 8 8 8 14 14 9 14 18 1 19 19 19 19 19 19 9 14 14 13 6 13 5 14 6 12 13 13 1 12 10 6 13 6 4 1 1 6 6 3 1 3 5 5 5 2 30 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 // SPDX-License-Identifier: GPL-2.0 /* * BlueZ - Bluetooth protocol stack for Linux * * Copyright (C) 2021 Intel Corporation * Copyright 2023 NXP */ #include <linux/property.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/mgmt.h> #include "hci_codec.h" #include "hci_debugfs.h" #include "smp.h" #include "eir.h" #include "msft.h" #include "aosp.h" #include "leds.h" static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, struct sk_buff *skb) { bt_dev_dbg(hdev, "result 0x%2.2x", result); if (hdev->req_status != HCI_REQ_PEND) return; hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; /* Free the request command so it is not used as response */ kfree_skb(hdev->req_skb); hdev->req_skb = NULL; if (skb) { struct sock *sk = hci_skb_sk(skb); /* Drop sk reference if set */ if (sk) sock_put(sk); hdev->req_rsp = skb_get(skb); } wake_up_interruptible(&hdev->req_wait_q); } struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, struct sock *sk) { int len = HCI_COMMAND_HDR_SIZE + plen; struct hci_command_hdr *hdr; struct sk_buff *skb; skb = bt_skb_alloc(len, GFP_ATOMIC); if (!skb) return NULL; hdr = skb_put(skb, HCI_COMMAND_HDR_SIZE); hdr->opcode = cpu_to_le16(opcode); hdr->plen = plen; if (plen) skb_put_data(skb, param, plen); bt_dev_dbg(hdev, "skb len %d", skb->len); hci_skb_pkt_type(skb) = HCI_COMMAND_PKT; hci_skb_opcode(skb) = opcode; /* Grab a reference if command needs to be associated with a sock (e.g. * likely mgmt socket that initiated the command). */ if (sk) { hci_skb_sk(skb) = sk; sock_hold(sk); } return skb; } static void hci_cmd_sync_add(struct hci_request *req, u16 opcode, u32 plen, const void *param, u8 event, struct sock *sk) { struct hci_dev *hdev = req->hdev; struct sk_buff *skb; bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen); /* If an error occurred during request building, there is no point in * queueing the HCI command. We can simply return. */ if (req->err) return; skb = hci_cmd_sync_alloc(hdev, opcode, plen, param, sk); if (!skb) { bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)", opcode); req->err = -ENOMEM; return; } if (skb_queue_empty(&req->cmd_q)) bt_cb(skb)->hci.req_flags |= HCI_REQ_START; hci_skb_event(skb) = event; skb_queue_tail(&req->cmd_q, skb); } static int hci_cmd_sync_run(struct hci_request *req) { struct hci_dev *hdev = req->hdev; struct sk_buff *skb; unsigned long flags; bt_dev_dbg(hdev, "length %u", skb_queue_len(&req->cmd_q)); /* If an error occurred during request building, remove all HCI * commands queued on the HCI request queue. */ if (req->err) { skb_queue_purge(&req->cmd_q); return req->err; } /* Do not allow empty requests */ if (skb_queue_empty(&req->cmd_q)) return -ENODATA; skb = skb_peek_tail(&req->cmd_q); bt_cb(skb)->hci.req_complete_skb = hci_cmd_sync_complete; bt_cb(skb)->hci.req_flags |= HCI_REQ_SKB; spin_lock_irqsave(&hdev->cmd_q.lock, flags); skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); queue_work(hdev->workqueue, &hdev->cmd_work); return 0; } static void hci_request_init(struct hci_request *req, struct hci_dev *hdev) { skb_queue_head_init(&req->cmd_q); req->hdev = hdev; req->err = 0; } /* This function requires the caller holds hdev->req_lock. */ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout, struct sock *sk) { struct hci_request req; struct sk_buff *skb; int err = 0; bt_dev_dbg(hdev, "Opcode 0x%4.4x", opcode); hci_request_init(&req, hdev); hci_cmd_sync_add(&req, opcode, plen, param, event, sk); hdev->req_status = HCI_REQ_PEND; err = hci_cmd_sync_run(&req); if (err < 0) return ERR_PTR(err); err = wait_event_interruptible_timeout(hdev->req_wait_q, hdev->req_status != HCI_REQ_PEND, timeout); if (err == -ERESTARTSYS) return ERR_PTR(-EINTR); switch (hdev->req_status) { case HCI_REQ_DONE: err = -bt_to_errno(hdev->req_result); break; case HCI_REQ_CANCELED: err = -hdev->req_result; break; default: err = -ETIMEDOUT; break; } hdev->req_status = 0; hdev->req_result = 0; skb = hdev->req_rsp; hdev->req_rsp = NULL; bt_dev_dbg(hdev, "end: err %d", err); if (err < 0) { kfree_skb(skb); return ERR_PTR(err); } return skb; } EXPORT_SYMBOL(__hci_cmd_sync_sk); /* This function requires the caller holds hdev->req_lock. */ struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout) { return __hci_cmd_sync_sk(hdev, opcode, plen, param, 0, timeout, NULL); } EXPORT_SYMBOL(__hci_cmd_sync); /* Send HCI command and wait for command complete event */ struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout) { struct sk_buff *skb; if (!test_bit(HCI_UP, &hdev->flags)) return ERR_PTR(-ENETDOWN); bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen); hci_req_sync_lock(hdev); skb = __hci_cmd_sync(hdev, opcode, plen, param, timeout); hci_req_sync_unlock(hdev); return skb; } EXPORT_SYMBOL(hci_cmd_sync); /* This function requires the caller holds hdev->req_lock. */ struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout) { return __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, NULL); } EXPORT_SYMBOL(__hci_cmd_sync_ev); /* This function requires the caller holds hdev->req_lock. */ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout, struct sock *sk) { struct sk_buff *skb; u8 status; skb = __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, sk); if (IS_ERR(skb)) { if (!event) bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld", opcode, PTR_ERR(skb)); return PTR_ERR(skb); } /* If command return a status event skb will be set to NULL as there are * no parameters, in case of failure IS_ERR(skb) would have be set to * the actual error would be found with PTR_ERR(skb). */ if (!skb) return 0; status = skb->data[0]; kfree_skb(skb); return status; } EXPORT_SYMBOL(__hci_cmd_sync_status_sk); int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout) { return __hci_cmd_sync_status_sk(hdev, opcode, plen, param, 0, timeout, NULL); } EXPORT_SYMBOL(__hci_cmd_sync_status); int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout) { int err; hci_req_sync_lock(hdev); err = __hci_cmd_sync_status(hdev, opcode, plen, param, timeout); hci_req_sync_unlock(hdev); return err; } EXPORT_SYMBOL(hci_cmd_sync_status); static void hci_cmd_sync_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work); bt_dev_dbg(hdev, ""); /* Dequeue all entries and run them */ while (1) { struct hci_cmd_sync_work_entry *entry; mutex_lock(&hdev->cmd_sync_work_lock); entry = list_first_entry_or_null(&hdev->cmd_sync_work_list, struct hci_cmd_sync_work_entry, list); if (entry) list_del(&entry->list); mutex_unlock(&hdev->cmd_sync_work_lock); if (!entry) break; bt_dev_dbg(hdev, "entry %p", entry); if (entry->func) { int err; hci_req_sync_lock(hdev); err = entry->func(hdev, entry->data); if (entry->destroy) entry->destroy(hdev, entry->data, err); hci_req_sync_unlock(hdev); } kfree(entry); } } static void hci_cmd_sync_cancel_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_cancel_work); cancel_delayed_work_sync(&hdev->cmd_timer); cancel_delayed_work_sync(&hdev->ncmd_timer); atomic_set(&hdev->cmd_cnt, 1); wake_up_interruptible(&hdev->req_wait_q); } static int hci_scan_disable_sync(struct hci_dev *hdev); static int scan_disable_sync(struct hci_dev *hdev, void *data) { return hci_scan_disable_sync(hdev); } static int interleaved_inquiry_sync(struct hci_dev *hdev, void *data) { return hci_inquiry_sync(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN, 0); } static void le_scan_disable(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, le_scan_disable.work); int status; bt_dev_dbg(hdev, ""); hci_dev_lock(hdev); if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) goto _return; status = hci_cmd_sync_queue(hdev, scan_disable_sync, NULL, NULL); if (status) { bt_dev_err(hdev, "failed to disable LE scan: %d", status); goto _return; } /* If we were running LE only scan, change discovery state. If * we were running both LE and BR/EDR inquiry simultaneously, * and BR/EDR inquiry is already finished, stop discovery, * otherwise BR/EDR inquiry will stop discovery when finished. * If we will resolve remote device name, do not change * discovery state. */ if (hdev->discovery.type == DISCOV_TYPE_LE) goto discov_stopped; if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED) goto _return; if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) { if (!test_bit(HCI_INQUIRY, &hdev->flags) && hdev->discovery.state != DISCOVERY_RESOLVING) goto discov_stopped; goto _return; } status = hci_cmd_sync_queue(hdev, interleaved_inquiry_sync, NULL, NULL); if (status) { bt_dev_err(hdev, "inquiry failed: status %d", status); goto discov_stopped; } goto _return; discov_stopped: hci_discovery_set_state(hdev, DISCOVERY_STOPPED); _return: hci_dev_unlock(hdev); } static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val, u8 filter_dup); static int reenable_adv_sync(struct hci_dev *hdev, void *data) { bt_dev_dbg(hdev, ""); if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && list_empty(&hdev->adv_instances)) return 0; if (hdev->cur_adv_instance) { return hci_schedule_adv_instance_sync(hdev, hdev->cur_adv_instance, true); } else { if (ext_adv_capable(hdev)) { hci_start_ext_adv_sync(hdev, 0x00); } else { hci_update_adv_data_sync(hdev, 0x00); hci_update_scan_rsp_data_sync(hdev, 0x00); hci_enable_advertising_sync(hdev); } } return 0; } static void reenable_adv(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, reenable_adv_work); int status; bt_dev_dbg(hdev, ""); hci_dev_lock(hdev); status = hci_cmd_sync_queue(hdev, reenable_adv_sync, NULL, NULL); if (status) bt_dev_err(hdev, "failed to reenable ADV: %d", status); hci_dev_unlock(hdev); } static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { hdev->adv_instance_timeout = 0; cancel_delayed_work(&hdev->adv_instance_expire); } } /* For a single instance: * - force == true: The instance will be removed even when its remaining * lifetime is not zero. * - force == false: the instance will be deactivated but kept stored unless * the remaining lifetime is zero. * * For instance == 0x00: * - force == true: All instances will be removed regardless of their timeout * setting. * - force == false: Only instances that have a timeout will be removed. */ int hci_clear_adv_instance_sync(struct hci_dev *hdev, struct sock *sk, u8 instance, bool force) { struct adv_info *adv_instance, *n, *next_instance = NULL; int err; u8 rem_inst; /* Cancel any timeout concerning the removed instance(s). */ if (!instance || hdev->cur_adv_instance == instance) cancel_adv_timeout(hdev); /* Get the next instance to advertise BEFORE we remove * the current one. This can be the same instance again * if there is only one instance. */ if (instance && hdev->cur_adv_instance == instance) next_instance = hci_get_next_instance(hdev, instance); if (instance == 0x00) { list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) { if (!(force || adv_instance->timeout)) continue; rem_inst = adv_instance->instance; err = hci_remove_adv_instance(hdev, rem_inst); if (!err) mgmt_advertising_removed(sk, hdev, rem_inst); } } else { adv_instance = hci_find_adv_instance(hdev, instance); if (force || (adv_instance && adv_instance->timeout && !adv_instance->remaining_time)) { /* Don't advertise a removed instance. */ if (next_instance && next_instance->instance == instance) next_instance = NULL; err = hci_remove_adv_instance(hdev, instance); if (!err) mgmt_advertising_removed(sk, hdev, instance); } } if (!hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING)) return 0; if (next_instance && !ext_adv_capable(hdev)) return hci_schedule_adv_instance_sync(hdev, next_instance->instance, false); return 0; } static int adv_timeout_expire_sync(struct hci_dev *hdev, void *data) { u8 instance = *(u8 *)data; kfree(data); hci_clear_adv_instance_sync(hdev, NULL, instance, false); if (list_empty(&hdev->adv_instances)) return hci_disable_advertising_sync(hdev); return 0; } static void adv_timeout_expire(struct work_struct *work) { u8 *inst_ptr; struct hci_dev *hdev = container_of(work, struct hci_dev, adv_instance_expire.work); bt_dev_dbg(hdev, ""); hci_dev_lock(hdev); hdev->adv_instance_timeout = 0; if (hdev->cur_adv_instance == 0x00) goto unlock; inst_ptr = kmalloc(1, GFP_KERNEL); if (!inst_ptr) goto unlock; *inst_ptr = hdev->cur_adv_instance; hci_cmd_sync_queue(hdev, adv_timeout_expire_sync, inst_ptr, NULL); unlock: hci_dev_unlock(hdev); } static bool is_interleave_scanning(struct hci_dev *hdev) { return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE; } static int hci_passive_scan_sync(struct hci_dev *hdev); static void interleave_scan_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, interleave_scan.work); unsigned long timeout; if (hdev->interleave_scan_state == INTERLEAVE_SCAN_ALLOWLIST) { timeout = msecs_to_jiffies(hdev->advmon_allowlist_duration); } else if (hdev->interleave_scan_state == INTERLEAVE_SCAN_NO_FILTER) { timeout = msecs_to_jiffies(hdev->advmon_no_filter_duration); } else { bt_dev_err(hdev, "unexpected error"); return; } hci_passive_scan_sync(hdev); hci_dev_lock(hdev); switch (hdev->interleave_scan_state) { case INTERLEAVE_SCAN_ALLOWLIST: bt_dev_dbg(hdev, "next state: allowlist"); hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER; break; case INTERLEAVE_SCAN_NO_FILTER: bt_dev_dbg(hdev, "next state: no filter"); hdev->interleave_scan_state = INTERLEAVE_SCAN_ALLOWLIST; break; case INTERLEAVE_SCAN_NONE: bt_dev_err(hdev, "unexpected error"); } hci_dev_unlock(hdev); /* Don't continue interleaving if it was canceled */ if (is_interleave_scanning(hdev)) queue_delayed_work(hdev->req_workqueue, &hdev->interleave_scan, timeout); } void hci_cmd_sync_init(struct hci_dev *hdev) { INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work); INIT_LIST_HEAD(&hdev->cmd_sync_work_list); mutex_init(&hdev->cmd_sync_work_lock); mutex_init(&hdev->unregister_lock); INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work); INIT_WORK(&hdev->reenable_adv_work, reenable_adv); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work); } static void _hci_cmd_sync_cancel_entry(struct hci_dev *hdev, struct hci_cmd_sync_work_entry *entry, int err) { if (entry->destroy) entry->destroy(hdev, entry->data, err); list_del(&entry->list); kfree(entry); } void hci_cmd_sync_clear(struct hci_dev *hdev) { struct hci_cmd_sync_work_entry *entry, *tmp; cancel_work_sync(&hdev->cmd_sync_work); cancel_work_sync(&hdev->reenable_adv_work); mutex_lock(&hdev->cmd_sync_work_lock); list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); mutex_unlock(&hdev->cmd_sync_work_lock); } void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = err; hdev->req_status = HCI_REQ_CANCELED; queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); } } EXPORT_SYMBOL(hci_cmd_sync_cancel); /* Cancel ongoing command request synchronously: * * - Set result and mark status to HCI_REQ_CANCELED * - Wakeup command sync thread */ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); if (hdev->req_status == HCI_REQ_PEND) { /* req_result is __u32 so error must be positive to be properly * propagated. */ hdev->req_result = err < 0 ? -err : err; hdev->req_status = HCI_REQ_CANCELED; wake_up_interruptible(&hdev->req_wait_q); } } EXPORT_SYMBOL(hci_cmd_sync_cancel_sync); /* Submit HCI command to be run in as cmd_sync_work: * * - hdev must _not_ be unregistered */ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { struct hci_cmd_sync_work_entry *entry; int err = 0; mutex_lock(&hdev->unregister_lock); if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { err = -ENODEV; goto unlock; } entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { err = -ENOMEM; goto unlock; } entry->func = func; entry->data = data; entry->destroy = destroy; mutex_lock(&hdev->cmd_sync_work_lock); list_add_tail(&entry->list, &hdev->cmd_sync_work_list); mutex_unlock(&hdev->cmd_sync_work_lock); queue_work(hdev->req_workqueue, &hdev->cmd_sync_work); unlock: mutex_unlock(&hdev->unregister_lock); return err; } EXPORT_SYMBOL(hci_cmd_sync_submit); /* Queue HCI command: * * - hdev must be running */ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { /* Only queue command if hdev is running which means it had been opened * and is either on init phase or is already up. */ if (!test_bit(HCI_RUNNING, &hdev->flags)) return -ENETDOWN; return hci_cmd_sync_submit(hdev, func, data, destroy); } EXPORT_SYMBOL(hci_cmd_sync_queue); static struct hci_cmd_sync_work_entry * _hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { struct hci_cmd_sync_work_entry *entry, *tmp; list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) { if (func && entry->func != func) continue; if (data && entry->data != data) continue; if (destroy && entry->destroy != destroy) continue; return entry; } return NULL; } /* Queue HCI command entry once: * * - Lookup if an entry already exist and only if it doesn't creates a new entry * and queue it. */ int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy)) return 0; return hci_cmd_sync_queue(hdev, func, data, destroy); } EXPORT_SYMBOL(hci_cmd_sync_queue_once); /* Lookup HCI command entry: * * - Return first entry that matches by function callback or data or * destroy callback. */ struct hci_cmd_sync_work_entry * hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { struct hci_cmd_sync_work_entry *entry; mutex_lock(&hdev->cmd_sync_work_lock); entry = _hci_cmd_sync_lookup_entry(hdev, func, data, destroy); mutex_unlock(&hdev->cmd_sync_work_lock); return entry; } EXPORT_SYMBOL(hci_cmd_sync_lookup_entry); /* Cancel HCI command entry */ void hci_cmd_sync_cancel_entry(struct hci_dev *hdev, struct hci_cmd_sync_work_entry *entry) { mutex_lock(&hdev->cmd_sync_work_lock); _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); mutex_unlock(&hdev->cmd_sync_work_lock); } EXPORT_SYMBOL(hci_cmd_sync_cancel_entry); /* Dequeue one HCI command entry: * * - Lookup and cancel first entry that matches. */ bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { struct hci_cmd_sync_work_entry *entry; entry = hci_cmd_sync_lookup_entry(hdev, func, data, destroy); if (!entry) return false; hci_cmd_sync_cancel_entry(hdev, entry); return true; } EXPORT_SYMBOL(hci_cmd_sync_dequeue_once); /* Dequeue HCI command entry: * * - Lookup and cancel any entry that matches by function callback or data or * destroy callback. */ bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { struct hci_cmd_sync_work_entry *entry; bool ret = false; mutex_lock(&hdev->cmd_sync_work_lock); while ((entry = _hci_cmd_sync_lookup_entry(hdev, func, data, destroy))) { _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); ret = true; } mutex_unlock(&hdev->cmd_sync_work_lock); return ret; } EXPORT_SYMBOL(hci_cmd_sync_dequeue); int hci_update_eir_sync(struct hci_dev *hdev) { struct hci_cp_write_eir cp; bt_dev_dbg(hdev, ""); if (!hdev_is_powered(hdev)) return 0; if (!lmp_ext_inq_capable(hdev)) return 0; if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) return 0; if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) return 0; memset(&cp, 0, sizeof(cp)); eir_create(hdev, cp.data); if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) return 0; memcpy(hdev->eir, cp.data, sizeof(cp.data)); return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static u8 get_service_classes(struct hci_dev *hdev) { struct bt_uuid *uuid; u8 val = 0; list_for_each_entry(uuid, &hdev->uuids, list) val |= uuid->svc_hint; return val; } int hci_update_class_sync(struct hci_dev *hdev) { u8 cod[3]; bt_dev_dbg(hdev, ""); if (!hdev_is_powered(hdev)) return 0; if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return 0; if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) return 0; cod[0] = hdev->minor_class; cod[1] = hdev->major_class; cod[2] = get_service_classes(hdev); if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) cod[1] |= 0x20; if (memcmp(cod, hdev->dev_class, 3) == 0) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod, HCI_CMD_TIMEOUT); } static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable) { /* If there is no connection we are OK to advertise. */ if (hci_conn_num(hdev, LE_LINK) == 0) return true; /* Check le_states if there is any connection in peripheral role. */ if (hdev->conn_hash.le_num_peripheral > 0) { /* Peripheral connection state and non connectable mode * bit 20. */ if (!connectable && !(hdev->le_states[2] & 0x10)) return false; /* Peripheral connection state and connectable mode bit 38 * and scannable bit 21. */ if (connectable && (!(hdev->le_states[4] & 0x40) || !(hdev->le_states[2] & 0x20))) return false; } /* Check le_states if there is any connection in central role. */ if (hci_conn_num(hdev, LE_LINK) != hdev->conn_hash.le_num_peripheral) { /* Central connection state and non connectable mode bit 18. */ if (!connectable && !(hdev->le_states[2] & 0x02)) return false; /* Central connection state and connectable mode bit 35 and * scannable 19. */ if (connectable && (!(hdev->le_states[4] & 0x08) || !(hdev->le_states[2] & 0x08))) return false; } return true; } static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags) { /* If privacy is not enabled don't use RPA */ if (!hci_dev_test_flag(hdev, HCI_PRIVACY)) return false; /* If basic privacy mode is enabled use RPA */ if (!hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY)) return true; /* If limited privacy mode is enabled don't use RPA if we're * both discoverable and bondable. */ if ((flags & MGMT_ADV_FLAG_DISCOV) && hci_dev_test_flag(hdev, HCI_BONDABLE)) return false; /* We're neither bondable nor discoverable in the limited * privacy mode, therefore use RPA. */ return true; } static int hci_set_random_addr_sync(struct hci_dev *hdev, bdaddr_t *rpa) { /* If we're advertising or initiating an LE connection we can't * go ahead and change the random address at this time. This is * because the eventual initiator address used for the * subsequently created connection will be undefined (some * controllers use the new address and others the one we had * when the operation started). * * In this kind of scenario skip the update and let the random * address be updated at the next cycle. */ if (hci_dev_test_flag(hdev, HCI_LE_ADV) || hci_lookup_le_connect(hdev)) { bt_dev_dbg(hdev, "Deferring random address update"); hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); return 0; } return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa, HCI_CMD_TIMEOUT); } int hci_update_random_address_sync(struct hci_dev *hdev, bool require_privacy, bool rpa, u8 *own_addr_type) { int err; /* If privacy is enabled use a resolvable private address. If * current RPA has expired or there is something else than * the current RPA in use, then generate a new one. */ if (rpa) { /* If Controller supports LL Privacy use own address type is * 0x03 */ if (use_ll_privacy(hdev)) *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; else *own_addr_type = ADDR_LE_DEV_RANDOM; /* Check if RPA is valid */ if (rpa_valid(hdev)) return 0; err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); if (err < 0) { bt_dev_err(hdev, "failed to generate new RPA"); return err; } err = hci_set_random_addr_sync(hdev, &hdev->rpa); if (err) return err; return 0; } /* In case of required privacy without resolvable private address, * use an non-resolvable private address. This is useful for active * scanning and non-connectable advertising. */ if (require_privacy) { bdaddr_t nrpa; while (true) { /* The non-resolvable private address is generated * from random six bytes with the two most significant * bits cleared. */ get_random_bytes(&nrpa, 6); nrpa.b[5] &= 0x3f; /* The non-resolvable private address shall not be * equal to the public address. */ if (bacmp(&hdev->bdaddr, &nrpa)) break; } *own_addr_type = ADDR_LE_DEV_RANDOM; return hci_set_random_addr_sync(hdev, &nrpa); } /* If forcing static address is in use or there is no public * address use the static address as random address (but skip * the HCI command if the current random address is already the * static one. * * In case BR/EDR has been disabled on a dual-mode controller * and a static address has been configured, then use that * address instead of the public BR/EDR address. */ if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || !bacmp(&hdev->bdaddr, BDADDR_ANY) || (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && bacmp(&hdev->static_addr, BDADDR_ANY))) { *own_addr_type = ADDR_LE_DEV_RANDOM; if (bacmp(&hdev->static_addr, &hdev->random_addr)) return hci_set_random_addr_sync(hdev, &hdev->static_addr); return 0; } /* Neither privacy nor static address is being used so use a * public address. */ *own_addr_type = ADDR_LE_DEV_PUBLIC; return 0; } static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_ext_adv_enable *cp; struct hci_cp_ext_adv_set *set; u8 data[sizeof(*cp) + sizeof(*set) * 1]; u8 size; struct adv_info *adv = NULL; /* If request specifies an instance that doesn't exist, fail */ if (instance > 0) { adv = hci_find_adv_instance(hdev, instance); if (!adv) return -EINVAL; /* If not enabled there is nothing to do */ if (!adv->enabled) return 0; } memset(data, 0, sizeof(data)); cp = (void *)data; set = (void *)cp->data; /* Instance 0x00 indicates all advertising instances will be disabled */ cp->num_of_sets = !!instance; cp->enable = 0x00; set->handle = adv ? adv->handle : instance; size = sizeof(*cp) + sizeof(*set) * cp->num_of_sets; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_ENABLE, size, data, HCI_CMD_TIMEOUT); } static int hci_set_adv_set_random_addr_sync(struct hci_dev *hdev, u8 instance, bdaddr_t *random_addr) { struct hci_cp_le_set_adv_set_rand_addr cp; int err; if (!instance) { /* Instance 0x00 doesn't have an adv_info, instead it uses * hdev->random_addr to track its address so whenever it needs * to be updated this also set the random address since * hdev->random_addr is shared with scan state machine. */ err = hci_set_random_addr_sync(hdev, random_addr); if (err) return err; } memset(&cp, 0, sizeof(cp)); cp.handle = instance; bacpy(&cp.bdaddr, random_addr); return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_SET_RAND_ADDR, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_ext_adv_params cp; bool connectable; u32 flags; bdaddr_t random_addr; u8 own_addr_type; int err; struct adv_info *adv; bool secondary_adv; if (instance > 0) { adv = hci_find_adv_instance(hdev, instance); if (!adv) return -EINVAL; } else { adv = NULL; } /* Updating parameters of an active instance will return a * Command Disallowed error, so we must first disable the * instance if it is active. */ if (adv && !adv->pending) { err = hci_disable_ext_adv_instance_sync(hdev, instance); if (err) return err; } flags = hci_adv_instance_flags(hdev, instance); /* If the "connectable" instance flag was not set, then choose between * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. */ connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || mgmt_get_connectable(hdev); if (!is_advertising_allowed(hdev, connectable)) return -EPERM; /* Set require_privacy to true only when non-connectable * advertising is used. In that case it is fine to use a * non-resolvable private address. */ err = hci_get_random_address(hdev, !connectable, adv_use_rpa(hdev, flags), adv, &own_addr_type, &random_addr); if (err < 0) return err; memset(&cp, 0, sizeof(cp)); if (adv) { hci_cpu_to_le24(adv->min_interval, cp.min_interval); hci_cpu_to_le24(adv->max_interval, cp.max_interval); cp.tx_power = adv->tx_power; } else { hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval); hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval); cp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE; } secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK); if (connectable) { if (secondary_adv) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_CONN_IND); else cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND); } else if (hci_adv_instance_is_scannable(hdev, instance) || (flags & MGMT_ADV_PARAM_SCAN_RSP)) { if (secondary_adv) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_SCAN_IND); else cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_SCAN_IND); } else { if (secondary_adv) cp.evt_properties = cpu_to_le16(LE_EXT_ADV_NON_CONN_IND); else cp.evt_properties = cpu_to_le16(LE_LEGACY_NONCONN_IND); } /* If Own_Address_Type equals 0x02 or 0x03, the Peer_Address parameter * contains the peer’s Identity Address and the Peer_Address_Type * parameter contains the peer’s Identity Type (i.e., 0x00 or 0x01). * These parameters are used to locate the corresponding local IRK in * the resolving list; this IRK is used to generate their own address * used in the advertisement. */ if (own_addr_type == ADDR_LE_DEV_RANDOM_RESOLVED) hci_copy_identity_address(hdev, &cp.peer_addr, &cp.peer_addr_type); cp.own_addr_type = own_addr_type; cp.channel_map = hdev->le_adv_channel_map; cp.handle = adv ? adv->handle : instance; if (flags & MGMT_ADV_FLAG_SEC_2M) { cp.primary_phy = HCI_ADV_PHY_1M; cp.secondary_phy = HCI_ADV_PHY_2M; } else if (flags & MGMT_ADV_FLAG_SEC_CODED) { cp.primary_phy = HCI_ADV_PHY_CODED; cp.secondary_phy = HCI_ADV_PHY_CODED; } else { /* In all other cases use 1M */ cp.primary_phy = HCI_ADV_PHY_1M; cp.secondary_phy = HCI_ADV_PHY_1M; } err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp, HCI_CMD_TIMEOUT); if (err) return err; if ((own_addr_type == ADDR_LE_DEV_RANDOM || own_addr_type == ADDR_LE_DEV_RANDOM_RESOLVED) && bacmp(&random_addr, BDADDR_ANY)) { /* Check if random address need to be updated */ if (adv) { if (!bacmp(&random_addr, &adv->random_addr)) return 0; } else { if (!bacmp(&random_addr, &hdev->random_addr)) return 0; } return hci_set_adv_set_random_addr_sync(hdev, instance, &random_addr); } return 0; } static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance) { DEFINE_FLEX(struct hci_cp_le_set_ext_scan_rsp_data, pdu, data, length, HCI_MAX_EXT_AD_LENGTH); u8 len; struct adv_info *adv = NULL; int err; if (instance) { adv = hci_find_adv_instance(hdev, instance); if (!adv || !adv->scan_rsp_changed) return 0; } len = eir_create_scan_rsp(hdev, instance, pdu->data); pdu->handle = adv ? adv->handle : instance; pdu->length = len; pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA, struct_size(pdu, data, len), pdu, HCI_CMD_TIMEOUT); if (err) return err; if (adv) { adv->scan_rsp_changed = false; } else { memcpy(hdev->scan_rsp_data, pdu->data, len); hdev->scan_rsp_data_len = len; } return 0; } static int __hci_set_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_scan_rsp_data cp; u8 len; memset(&cp, 0, sizeof(cp)); len = eir_create_scan_rsp(hdev, instance, cp.data); if (hdev->scan_rsp_data_len == len && !memcmp(cp.data, hdev->scan_rsp_data, len)) return 0; memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data)); hdev->scan_rsp_data_len = len; cp.length = len; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } int hci_update_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance) { if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return 0; if (ext_adv_capable(hdev)) return hci_set_ext_scan_rsp_data_sync(hdev, instance); return __hci_set_scan_rsp_data_sync(hdev, instance); } int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_ext_adv_enable *cp; struct hci_cp_ext_adv_set *set; u8 data[sizeof(*cp) + sizeof(*set) * 1]; struct adv_info *adv; if (instance > 0) { adv = hci_find_adv_instance(hdev, instance); if (!adv) return -EINVAL; /* If already enabled there is nothing to do */ if (adv->enabled) return 0; } else { adv = NULL; } cp = (void *)data; set = (void *)cp->data; memset(cp, 0, sizeof(*cp)); cp->enable = 0x01; cp->num_of_sets = 0x01; memset(set, 0, sizeof(*set)); set->handle = adv ? adv->handle : instance; /* Set duration per instance since controller is responsible for * scheduling it. */ if (adv && adv->timeout) { u16 duration = adv->timeout * MSEC_PER_SEC; /* Time = N * 10 ms */ set->duration = cpu_to_le16(duration / 10); } return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_ENABLE, sizeof(*cp) + sizeof(*set) * cp->num_of_sets, data, HCI_CMD_TIMEOUT); } int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance) { int err; err = hci_setup_ext_adv_instance_sync(hdev, instance); if (err) return err; err = hci_set_ext_scan_rsp_data_sync(hdev, instance); if (err) return err; return hci_enable_ext_advertising_sync(hdev, instance); } int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_per_adv_enable cp; struct adv_info *adv = NULL; /* If periodic advertising already disabled there is nothing to do. */ adv = hci_find_adv_instance(hdev, instance); if (!adv || !adv->periodic || !adv->enabled) return 0; memset(&cp, 0, sizeof(cp)); cp.enable = 0x00; cp.handle = instance; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_set_per_adv_params_sync(struct hci_dev *hdev, u8 instance, u16 min_interval, u16 max_interval) { struct hci_cp_le_set_per_adv_params cp; memset(&cp, 0, sizeof(cp)); if (!min_interval) min_interval = DISCOV_LE_PER_ADV_INT_MIN; if (!max_interval) max_interval = DISCOV_LE_PER_ADV_INT_MAX; cp.handle = instance; cp.min_interval = cpu_to_le16(min_interval); cp.max_interval = cpu_to_le16(max_interval); cp.periodic_properties = 0x0000; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_PARAMS, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance) { DEFINE_FLEX(struct hci_cp_le_set_per_adv_data, pdu, data, length, HCI_MAX_PER_AD_LENGTH); u8 len; struct adv_info *adv = NULL; if (instance) { adv = hci_find_adv_instance(hdev, instance); if (!adv || !adv->periodic) return 0; } len = eir_create_per_adv_data(hdev, instance, pdu->data); pdu->length = len; pdu->handle = adv ? adv->handle : instance; pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_DATA, struct_size(pdu, data, len), pdu, HCI_CMD_TIMEOUT); } static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_per_adv_enable cp; struct adv_info *adv = NULL; /* If periodic advertising already enabled there is nothing to do. */ adv = hci_find_adv_instance(hdev, instance); if (adv && adv->periodic && adv->enabled) return 0; memset(&cp, 0, sizeof(cp)); cp.enable = 0x01; cp.handle = instance; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } /* Checks if periodic advertising data contains a Basic Announcement and if it * does generates a Broadcast ID and add Broadcast Announcement. */ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) { u8 bid[3]; u8 ad[4 + 3]; /* Skip if NULL adv as instance 0x00 is used for general purpose * advertising so it cannot used for the likes of Broadcast Announcement * as it can be overwritten at any point. */ if (!adv) return 0; /* Check if PA data doesn't contains a Basic Audio Announcement then * there is nothing to do. */ if (!eir_get_service_data(adv->per_adv_data, adv->per_adv_data_len, 0x1851, NULL)) return 0; /* Check if advertising data already has a Broadcast Announcement since * the process may want to control the Broadcast ID directly and in that * case the kernel shall no interfere. */ if (eir_get_service_data(adv->adv_data, adv->adv_data_len, 0x1852, NULL)) return 0; /* Generate Broadcast ID */ get_random_bytes(bid, sizeof(bid)); eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL); return hci_update_adv_data_sync(hdev, adv->instance); } int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, u8 *data, u32 flags, u16 min_interval, u16 max_interval, u16 sync_interval) { struct adv_info *adv = NULL; int err; bool added = false; hci_disable_per_advertising_sync(hdev, instance); if (instance) { adv = hci_find_adv_instance(hdev, instance); /* Create an instance if that could not be found */ if (!adv) { adv = hci_add_per_instance(hdev, instance, flags, data_len, data, sync_interval, sync_interval); if (IS_ERR(adv)) return PTR_ERR(adv); adv->pending = false; added = true; } } /* Start advertising */ err = hci_start_ext_adv_sync(hdev, instance); if (err < 0) goto fail; err = hci_adv_bcast_annoucement(hdev, adv); if (err < 0) goto fail; err = hci_set_per_adv_params_sync(hdev, instance, min_interval, max_interval); if (err < 0) goto fail; err = hci_set_per_adv_data_sync(hdev, instance); if (err < 0) goto fail; err = hci_enable_per_advertising_sync(hdev, instance); if (err < 0) goto fail; return 0; fail: if (added) hci_remove_adv_instance(hdev, instance); return err; } static int hci_start_adv_sync(struct hci_dev *hdev, u8 instance) { int err; if (ext_adv_capable(hdev)) return hci_start_ext_adv_sync(hdev, instance); err = hci_update_adv_data_sync(hdev, instance); if (err) return err; err = hci_update_scan_rsp_data_sync(hdev, instance); if (err) return err; return hci_enable_advertising_sync(hdev); } int hci_enable_advertising_sync(struct hci_dev *hdev) { struct adv_info *adv_instance; struct hci_cp_le_set_adv_param cp; u8 own_addr_type, enable = 0x01; bool connectable; u16 adv_min_interval, adv_max_interval; u32 flags; u8 status; if (ext_adv_capable(hdev)) return hci_enable_ext_advertising_sync(hdev, hdev->cur_adv_instance); flags = hci_adv_instance_flags(hdev, hdev->cur_adv_instance); adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance); /* If the "connectable" instance flag was not set, then choose between * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. */ connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || mgmt_get_connectable(hdev); if (!is_advertising_allowed(hdev, connectable)) return -EINVAL; status = hci_disable_advertising_sync(hdev); if (status) return status; /* Clear the HCI_LE_ADV bit temporarily so that the * hci_update_random_address knows that it's safe to go ahead * and write a new random address. The flag will be set back on * as soon as the SET_ADV_ENABLE HCI command completes. */ hci_dev_clear_flag(hdev, HCI_LE_ADV); /* Set require_privacy to true only when non-connectable * advertising is used. In that case it is fine to use a * non-resolvable private address. */ status = hci_update_random_address_sync(hdev, !connectable, adv_use_rpa(hdev, flags), &own_addr_type); if (status) return status; memset(&cp, 0, sizeof(cp)); if (adv_instance) { adv_min_interval = adv_instance->min_interval; adv_max_interval = adv_instance->max_interval; } else { adv_min_interval = hdev->le_adv_min_interval; adv_max_interval = hdev->le_adv_max_interval; } if (connectable) { cp.type = LE_ADV_IND; } else { if (hci_adv_instance_is_scannable(hdev, hdev->cur_adv_instance)) cp.type = LE_ADV_SCAN_IND; else cp.type = LE_ADV_NONCONN_IND; if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE) || hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) { adv_min_interval = DISCOV_LE_FAST_ADV_INT_MIN; adv_max_interval = DISCOV_LE_FAST_ADV_INT_MAX; } } cp.min_interval = cpu_to_le16(adv_min_interval); cp.max_interval = cpu_to_le16(adv_max_interval); cp.own_address_type = own_addr_type; cp.channel_map = hdev->le_adv_channel_map; status = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp, HCI_CMD_TIMEOUT); if (status) return status; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable, HCI_CMD_TIMEOUT); } static int enable_advertising_sync(struct hci_dev *hdev, void *data) { return hci_enable_advertising_sync(hdev); } int hci_enable_advertising(struct hci_dev *hdev) { if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && list_empty(&hdev->adv_instances)) return 0; return hci_cmd_sync_queue(hdev, enable_advertising_sync, NULL, NULL); } int hci_remove_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance, struct sock *sk) { int err; if (!ext_adv_capable(hdev)) return 0; err = hci_disable_ext_adv_instance_sync(hdev, instance); if (err) return err; /* If request specifies an instance that doesn't exist, fail */ if (instance > 0 && !hci_find_adv_instance(hdev, instance)) return -EINVAL; return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_REMOVE_ADV_SET, sizeof(instance), &instance, 0, HCI_CMD_TIMEOUT, sk); } static int remove_ext_adv_sync(struct hci_dev *hdev, void *data) { struct adv_info *adv = data; u8 instance = 0; if (adv) instance = adv->instance; return hci_remove_ext_adv_instance_sync(hdev, instance, NULL); } int hci_remove_ext_adv_instance(struct hci_dev *hdev, u8 instance) { struct adv_info *adv = NULL; if (instance) { adv = hci_find_adv_instance(hdev, instance); if (!adv) return -EINVAL; } return hci_cmd_sync_queue(hdev, remove_ext_adv_sync, adv, NULL); } int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason) { struct hci_cp_le_term_big cp; memset(&cp, 0, sizeof(cp)); cp.handle = handle; cp.reason = reason; return __hci_cmd_sync_status(hdev, HCI_OP_LE_TERM_BIG, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) { DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, HCI_MAX_EXT_AD_LENGTH); u8 len; struct adv_info *adv = NULL; int err; if (instance) { adv = hci_find_adv_instance(hdev, instance); if (!adv || !adv->adv_data_changed) return 0; } len = eir_create_adv_data(hdev, instance, pdu->data); pdu->length = len; pdu->handle = adv ? adv->handle : instance; pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, struct_size(pdu, data, len), pdu, HCI_CMD_TIMEOUT); if (err) return err; /* Update data if the command succeed */ if (adv) { adv->adv_data_changed = false; } else { memcpy(hdev->adv_data, pdu->data, len); hdev->adv_data_len = len; } return 0; } static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_adv_data cp; u8 len; memset(&cp, 0, sizeof(cp)); len = eir_create_adv_data(hdev, instance, cp.data); /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && memcmp(cp.data, hdev->adv_data, len) == 0) return 0; memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); hdev->adv_data_len = len; cp.length = len; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance) { if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return 0; if (ext_adv_capable(hdev)) return hci_set_ext_adv_data_sync(hdev, instance); return hci_set_adv_data_sync(hdev, instance); } int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, bool force) { struct adv_info *adv = NULL; u16 timeout; if (hci_dev_test_flag(hdev, HCI_ADVERTISING) && !ext_adv_capable(hdev)) return -EPERM; if (hdev->adv_instance_timeout) return -EBUSY; adv = hci_find_adv_instance(hdev, instance); if (!adv) return -ENOENT; /* A zero timeout means unlimited advertising. As long as there is * only one instance, duration should be ignored. We still set a timeout * in case further instances are being added later on. * * If the remaining lifetime of the instance is more than the duration * then the timeout corresponds to the duration, otherwise it will be * reduced to the remaining instance lifetime. */ if (adv->timeout == 0 || adv->duration <= adv->remaining_time) timeout = adv->duration; else timeout = adv->remaining_time; /* The remaining time is being reduced unless the instance is being * advertised without time limit. */ if (adv->timeout) adv->remaining_time = adv->remaining_time - timeout; /* Only use work for scheduling instances with legacy advertising */ if (!ext_adv_capable(hdev)) { hdev->adv_instance_timeout = timeout; queue_delayed_work(hdev->req_workqueue, &hdev->adv_instance_expire, msecs_to_jiffies(timeout * 1000)); } /* If we're just re-scheduling the same instance again then do not * execute any HCI commands. This happens when a single instance is * being advertised. */ if (!force && hdev->cur_adv_instance == instance && hci_dev_test_flag(hdev, HCI_LE_ADV)) return 0; hdev->cur_adv_instance = instance; return hci_start_adv_sync(hdev, instance); } static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk) { int err; if (!ext_adv_capable(hdev)) return 0; /* Disable instance 0x00 to disable all instances */ err = hci_disable_ext_adv_instance_sync(hdev, 0x00); if (err) return err; return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CLEAR_ADV_SETS, 0, NULL, 0, HCI_CMD_TIMEOUT, sk); } static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) { struct adv_info *adv, *n; int err = 0; if (ext_adv_capable(hdev)) /* Remove all existing sets */ err = hci_clear_adv_sets_sync(hdev, sk); if (ext_adv_capable(hdev)) return err; /* This is safe as long as there is no command send while the lock is * held. */ hci_dev_lock(hdev); /* Cleanup non-ext instances */ list_for_each_entry_safe(adv, n, &hdev->adv_instances, list) { u8 instance = adv->instance; int err; if (!(force || adv->timeout)) continue; err = hci_remove_adv_instance(hdev, instance); if (!err) mgmt_advertising_removed(sk, hdev, instance); } hci_dev_unlock(hdev); return 0; } static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance, struct sock *sk) { int err = 0; /* If we use extended advertising, instance has to be removed first. */ if (ext_adv_capable(hdev)) err = hci_remove_ext_adv_instance_sync(hdev, instance, sk); if (ext_adv_capable(hdev)) return err; /* This is safe as long as there is no command send while the lock is * held. */ hci_dev_lock(hdev); err = hci_remove_adv_instance(hdev, instance); if (!err) mgmt_advertising_removed(sk, hdev, instance); hci_dev_unlock(hdev); return err; } /* For a single instance: * - force == true: The instance will be removed even when its remaining * lifetime is not zero. * - force == false: the instance will be deactivated but kept stored unless * the remaining lifetime is zero. * * For instance == 0x00: * - force == true: All instances will be removed regardless of their timeout * setting. * - force == false: Only instances that have a timeout will be removed. */ int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk, u8 instance, bool force) { struct adv_info *next = NULL; int err; /* Cancel any timeout concerning the removed instance(s). */ if (!instance || hdev->cur_adv_instance == instance) cancel_adv_timeout(hdev); /* Get the next instance to advertise BEFORE we remove * the current one. This can be the same instance again * if there is only one instance. */ if (hdev->cur_adv_instance == instance) next = hci_get_next_instance(hdev, instance); if (!instance) { err = hci_clear_adv_sync(hdev, sk, force); if (err) return err; } else { struct adv_info *adv = hci_find_adv_instance(hdev, instance); if (force || (adv && adv->timeout && !adv->remaining_time)) { /* Don't advertise a removed instance. */ if (next && next->instance == instance) next = NULL; err = hci_remove_adv_sync(hdev, instance, sk); if (err) return err; } } if (!hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING)) return 0; if (next && !ext_adv_capable(hdev)) hci_schedule_adv_instance_sync(hdev, next->instance, false); return 0; } int hci_read_rssi_sync(struct hci_dev *hdev, __le16 handle) { struct hci_cp_read_rssi cp; cp.handle = handle; return __hci_cmd_sync_status(hdev, HCI_OP_READ_RSSI, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } int hci_read_clock_sync(struct hci_dev *hdev, struct hci_cp_read_clock *cp) { return __hci_cmd_sync_status(hdev, HCI_OP_READ_CLOCK, sizeof(*cp), cp, HCI_CMD_TIMEOUT); } int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type) { struct hci_cp_read_tx_power cp; cp.handle = handle; cp.type = type; return __hci_cmd_sync_status(hdev, HCI_OP_READ_TX_POWER, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } int hci_disable_advertising_sync(struct hci_dev *hdev) { u8 enable = 0x00; int err = 0; /* If controller is not advertising we are done. */ if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) return 0; if (ext_adv_capable(hdev)) err = hci_disable_ext_adv_instance_sync(hdev, 0x00); if (ext_adv_capable(hdev)) return err; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable, HCI_CMD_TIMEOUT); } static int hci_le_set_ext_scan_enable_sync(struct hci_dev *hdev, u8 val, u8 filter_dup) { struct hci_cp_le_set_ext_scan_enable cp; memset(&cp, 0, sizeof(cp)); cp.enable = val; if (hci_dev_test_flag(hdev, HCI_MESH)) cp.filter_dup = LE_SCAN_FILTER_DUP_DISABLE; else cp.filter_dup = filter_dup; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_ENABLE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val, u8 filter_dup) { struct hci_cp_le_set_scan_enable cp; if (use_ext_scan(hdev)) return hci_le_set_ext_scan_enable_sync(hdev, val, filter_dup); memset(&cp, 0, sizeof(cp)); cp.enable = val; if (val && hci_dev_test_flag(hdev, HCI_MESH)) cp.filter_dup = LE_SCAN_FILTER_DUP_DISABLE; else cp.filter_dup = filter_dup; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_le_set_addr_resolution_enable_sync(struct hci_dev *hdev, u8 val) { if (!use_ll_privacy(hdev)) return 0; /* If controller is not/already resolving we are done. */ if (val == hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, sizeof(val), &val, HCI_CMD_TIMEOUT); } static int hci_scan_disable_sync(struct hci_dev *hdev) { int err; /* If controller is not scanning we are done. */ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) return 0; if (hdev->scanning_paused) { bt_dev_dbg(hdev, "Scanning is paused for suspend"); return 0; } err = hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00); if (err) { bt_dev_err(hdev, "Unable to disable scanning: %d", err); return err; } return err; } static bool scan_use_rpa(struct hci_dev *hdev) { return hci_dev_test_flag(hdev, HCI_PRIVACY); } static void hci_start_interleave_scan(struct hci_dev *hdev) { hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER; queue_delayed_work(hdev->req_workqueue, &hdev->interleave_scan, 0); } static void cancel_interleave_scan(struct hci_dev *hdev) { bt_dev_dbg(hdev, "cancelling interleave scan"); cancel_delayed_work_sync(&hdev->interleave_scan); hdev->interleave_scan_state = INTERLEAVE_SCAN_NONE; } /* Return true if interleave_scan wasn't started until exiting this function, * otherwise, return false */ static bool hci_update_interleaved_scan_sync(struct hci_dev *hdev) { /* Do interleaved scan only if all of the following are true: * - There is at least one ADV monitor * - At least one pending LE connection or one device to be scanned for * - Monitor offloading is not supported * If so, we should alternate between allowlist scan and one without * any filters to save power. */ bool use_interleaving = hci_is_adv_monitoring(hdev) && !(list_empty(&hdev->pend_le_conns) && list_empty(&hdev->pend_le_reports)) && hci_get_adv_monitor_offload_ext(hdev) == HCI_ADV_MONITOR_EXT_NONE; bool is_interleaving = is_interleave_scanning(hdev); if (use_interleaving && !is_interleaving) { hci_start_interleave_scan(hdev); bt_dev_dbg(hdev, "starting interleave scan"); return true; } if (!use_interleaving && is_interleaving) cancel_interleave_scan(hdev); return false; } /* Removes connection to resolve list if needed.*/ static int hci_le_del_resolve_list_sync(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type) { struct hci_cp_le_del_from_resolv_list cp; struct bdaddr_list_with_irk *entry; if (!use_ll_privacy(hdev)) return 0; /* Check if the IRK has been programmed */ entry = hci_bdaddr_list_lookup_with_irk(&hdev->le_resolv_list, bdaddr, bdaddr_type); if (!entry) return 0; cp.bdaddr_type = bdaddr_type; bacpy(&cp.bdaddr, bdaddr); return __hci_cmd_sync_status(hdev, HCI_OP_LE_DEL_FROM_RESOLV_LIST, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_le_del_accept_list_sync(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type) { struct hci_cp_le_del_from_accept_list cp; int err; /* Check if device is on accept list before removing it */ if (!hci_bdaddr_list_lookup(&hdev->le_accept_list, bdaddr, bdaddr_type)) return 0; cp.bdaddr_type = bdaddr_type; bacpy(&cp.bdaddr, bdaddr); /* Ignore errors when removing from resolving list as that is likely * that the device was never added. */ hci_le_del_resolve_list_sync(hdev, &cp.bdaddr, cp.bdaddr_type); err = __hci_cmd_sync_status(hdev, HCI_OP_LE_DEL_FROM_ACCEPT_LIST, sizeof(cp), &cp, HCI_CMD_TIMEOUT); if (err) { bt_dev_err(hdev, "Unable to remove from allow list: %d", err); return err; } bt_dev_dbg(hdev, "Remove %pMR (0x%x) from allow list", &cp.bdaddr, cp.bdaddr_type); return 0; } struct conn_params { bdaddr_t addr; u8 addr_type; hci_conn_flags_t flags; u8 privacy_mode; }; /* Adds connection to resolve list if needed. * Setting params to NULL programs local hdev->irk */ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev, struct conn_params *params) { struct hci_cp_le_add_to_resolv_list cp; struct smp_irk *irk; struct bdaddr_list_with_irk *entry; struct hci_conn_params *p; if (!use_ll_privacy(hdev)) return 0; /* Attempt to program local identity address, type and irk if params is * NULL. */ if (!params) { if (!hci_dev_test_flag(hdev, HCI_PRIVACY)) return 0; hci_copy_identity_address(hdev, &cp.bdaddr, &cp.bdaddr_type); memcpy(cp.peer_irk, hdev->irk, 16); goto done; } irk = hci_find_irk_by_addr(hdev, &params->addr, params->addr_type); if (!irk) return 0; /* Check if the IK has _not_ been programmed yet. */ entry = hci_bdaddr_list_lookup_with_irk(&hdev->le_resolv_list, &params->addr, params->addr_type); if (entry) return 0; cp.bdaddr_type = params->addr_type; bacpy(&cp.bdaddr, &params->addr); memcpy(cp.peer_irk, irk->val, 16); /* Default privacy mode is always Network */ params->privacy_mode = HCI_NETWORK_PRIVACY; rcu_read_lock(); p = hci_pend_le_action_lookup(&hdev->pend_le_conns, &params->addr, params->addr_type); if (!p) p = hci_pend_le_action_lookup(&hdev->pend_le_reports, &params->addr, params->addr_type); if (p) WRITE_ONCE(p->privacy_mode, HCI_NETWORK_PRIVACY); rcu_read_unlock(); done: if (hci_dev_test_flag(hdev, HCI_PRIVACY)) memcpy(cp.local_irk, hdev->irk, 16); else memset(cp.local_irk, 0, 16); return __hci_cmd_sync_status(hdev, HCI_OP_LE_ADD_TO_RESOLV_LIST, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } /* Set Device Privacy Mode. */ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev, struct conn_params *params) { struct hci_cp_le_set_privacy_mode cp; struct smp_irk *irk; /* If device privacy mode has already been set there is nothing to do */ if (params->privacy_mode == HCI_DEVICE_PRIVACY) return 0; /* Check if HCI_CONN_FLAG_DEVICE_PRIVACY has been set as it also * indicates that LL Privacy has been enabled and * HCI_OP_LE_SET_PRIVACY_MODE is supported. */ if (!(params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY)) return 0; irk = hci_find_irk_by_addr(hdev, &params->addr, params->addr_type); if (!irk) return 0; memset(&cp, 0, sizeof(cp)); cp.bdaddr_type = irk->addr_type; bacpy(&cp.bdaddr, &irk->bdaddr); cp.mode = HCI_DEVICE_PRIVACY; /* Note: params->privacy_mode is not updated since it is a copy */ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PRIVACY_MODE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } /* Adds connection to allow list if needed, if the device uses RPA (has IRK) * this attempts to program the device in the resolving list as well and * properly set the privacy mode. */ static int hci_le_add_accept_list_sync(struct hci_dev *hdev, struct conn_params *params, u8 *num_entries) { struct hci_cp_le_add_to_accept_list cp; int err; /* During suspend, only wakeable devices can be in acceptlist */ if (hdev->suspended && !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) { hci_le_del_accept_list_sync(hdev, &params->addr, params->addr_type); return 0; } /* Select filter policy to accept all advertising */ if (*num_entries >= hdev->le_accept_list_size) return -ENOSPC; /* Accept list can not be used with RPAs */ if (!use_ll_privacy(hdev) && hci_find_irk_by_addr(hdev, &params->addr, params->addr_type)) return -EINVAL; /* Attempt to program the device in the resolving list first to avoid * having to rollback in case it fails since the resolving list is * dynamic it can probably be smaller than the accept list. */ err = hci_le_add_resolve_list_sync(hdev, params); if (err) { bt_dev_err(hdev, "Unable to add to resolve list: %d", err); return err; } /* Set Privacy Mode */ err = hci_le_set_privacy_mode_sync(hdev, params); if (err) { bt_dev_err(hdev, "Unable to set privacy mode: %d", err); return err; } /* Check if already in accept list */ if (hci_bdaddr_list_lookup(&hdev->le_accept_list, &params->addr, params->addr_type)) return 0; *num_entries += 1; cp.bdaddr_type = params->addr_type; bacpy(&cp.bdaddr, &params->addr); err = __hci_cmd_sync_status(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST, sizeof(cp), &cp, HCI_CMD_TIMEOUT); if (err) { bt_dev_err(hdev, "Unable to add to allow list: %d", err); /* Rollback the device from the resolving list */ hci_le_del_resolve_list_sync(hdev, &cp.bdaddr, cp.bdaddr_type); return err; } bt_dev_dbg(hdev, "Add %pMR (0x%x) to allow list", &cp.bdaddr, cp.bdaddr_type); return 0; } /* This function disables/pause all advertising instances */ static int hci_pause_advertising_sync(struct hci_dev *hdev) { int err; int old_state; /* If already been paused there is nothing to do. */ if (hdev->advertising_paused) return 0; bt_dev_dbg(hdev, "Pausing directed advertising"); /* Stop directed advertising */ old_state = hci_dev_test_flag(hdev, HCI_ADVERTISING); if (old_state) { /* When discoverable timeout triggers, then just make sure * the limited discoverable flag is cleared. Even in the case * of a timeout triggered from general discoverable, it is * safe to unconditionally clear the flag. */ hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hdev->discov_timeout = 0; } bt_dev_dbg(hdev, "Pausing advertising instances"); /* Call to disable any advertisements active on the controller. * This will succeed even if no advertisements are configured. */ err = hci_disable_advertising_sync(hdev); if (err) return err; /* If we are using software rotation, pause the loop */ if (!ext_adv_capable(hdev)) cancel_adv_timeout(hdev); hdev->advertising_paused = true; hdev->advertising_old_state = old_state; return 0; } /* This function enables all user advertising instances */ static int hci_resume_advertising_sync(struct hci_dev *hdev) { struct adv_info *adv, *tmp; int err; /* If advertising has not been paused there is nothing to do. */ if (!hdev->advertising_paused) return 0; /* Resume directed advertising */ hdev->advertising_paused = false; if (hdev->advertising_old_state) { hci_dev_set_flag(hdev, HCI_ADVERTISING); hdev->advertising_old_state = 0; } bt_dev_dbg(hdev, "Resuming advertising instances"); if (ext_adv_capable(hdev)) { /* Call for each tracked instance to be re-enabled */ list_for_each_entry_safe(adv, tmp, &hdev->adv_instances, list) { err = hci_enable_ext_advertising_sync(hdev, adv->instance); if (!err) continue; /* If the instance cannot be resumed remove it */ hci_remove_ext_adv_instance_sync(hdev, adv->instance, NULL); } } else { /* Schedule for most recent instance to be restarted and begin * the software rotation loop */ err = hci_schedule_adv_instance_sync(hdev, hdev->cur_adv_instance, true); } hdev->advertising_paused = false; return err; } static int hci_pause_addr_resolution(struct hci_dev *hdev) { int err; if (!use_ll_privacy(hdev)) return 0; if (!hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) return 0; /* Cannot disable addr resolution if scanning is enabled or * when initiating an LE connection. */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN) || hci_lookup_le_connect(hdev)) { bt_dev_err(hdev, "Command not allowed when scan/LE connect"); return -EPERM; } /* Cannot disable addr resolution if advertising is enabled. */ err = hci_pause_advertising_sync(hdev); if (err) { bt_dev_err(hdev, "Pause advertising failed: %d", err); return err; } err = hci_le_set_addr_resolution_enable_sync(hdev, 0x00); if (err) bt_dev_err(hdev, "Unable to disable Address Resolution: %d", err); /* Return if address resolution is disabled and RPA is not used. */ if (!err && scan_use_rpa(hdev)) return 0; hci_resume_advertising_sync(hdev); return err; } struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool extended, struct sock *sk) { u16 opcode = extended ? HCI_OP_READ_LOCAL_OOB_EXT_DATA : HCI_OP_READ_LOCAL_OOB_DATA; return __hci_cmd_sync_sk(hdev, opcode, 0, NULL, 0, HCI_CMD_TIMEOUT, sk); } static struct conn_params *conn_params_copy(struct list_head *list, size_t *n) { struct hci_conn_params *params; struct conn_params *p; size_t i; rcu_read_lock(); i = 0; list_for_each_entry_rcu(params, list, action) ++i; *n = i; rcu_read_unlock(); p = kvcalloc(*n, sizeof(struct conn_params), GFP_KERNEL); if (!p) return NULL; rcu_read_lock(); i = 0; list_for_each_entry_rcu(params, list, action) { /* Racing adds are handled in next scan update */ if (i >= *n) break; /* No hdev->lock, but: addr, addr_type are immutable. * privacy_mode is only written by us or in * hci_cc_le_set_privacy_mode that we wait for. * We should be idempotent so MGMT updating flags * while we are processing is OK. */ bacpy(&p[i].addr, &params->addr); p[i].addr_type = params->addr_type; p[i].flags = READ_ONCE(params->flags); p[i].privacy_mode = READ_ONCE(params->privacy_mode); ++i; } rcu_read_unlock(); *n = i; return p; } /* Clear LE Accept List */ static int hci_le_clear_accept_list_sync(struct hci_dev *hdev) { if (!(hdev->commands[26] & 0x80)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL, HCI_CMD_TIMEOUT); } /* Device must not be scanning when updating the accept list. * * Update is done using the following sequence: * * use_ll_privacy((Disable Advertising) -> Disable Resolving List) -> * Remove Devices From Accept List -> * (has IRK && use_ll_privacy(Remove Devices From Resolving List))-> * Add Devices to Accept List -> * (has IRK && use_ll_privacy(Remove Devices From Resolving List)) -> * use_ll_privacy(Enable Resolving List -> (Enable Advertising)) -> * Enable Scanning * * In case of failure advertising shall be restored to its original state and * return would disable accept list since either accept or resolving list could * not be programmed. * */ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) { struct conn_params *params; struct bdaddr_list *b, *t; u8 num_entries = 0; bool pend_conn, pend_report; u8 filter_policy; size_t i, n; int err; /* Pause advertising if resolving list can be used as controllers * cannot accept resolving list modifications while advertising. */ if (use_ll_privacy(hdev)) { err = hci_pause_advertising_sync(hdev); if (err) { bt_dev_err(hdev, "pause advertising failed: %d", err); return 0x00; } } /* Disable address resolution while reprogramming accept list since * devices that do have an IRK will be programmed in the resolving list * when LL Privacy is enabled. */ err = hci_le_set_addr_resolution_enable_sync(hdev, 0x00); if (err) { bt_dev_err(hdev, "Unable to disable LL privacy: %d", err); goto done; } /* Force address filtering if PA Sync is in progress */ if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) { struct hci_cp_le_pa_create_sync *sent; sent = hci_sent_cmd_data(hdev, HCI_OP_LE_PA_CREATE_SYNC); if (sent) { struct conn_params pa; memset(&pa, 0, sizeof(pa)); bacpy(&pa.addr, &sent->addr); pa.addr_type = sent->addr_type; /* Clear first since there could be addresses left * behind. */ hci_le_clear_accept_list_sync(hdev); num_entries = 1; err = hci_le_add_accept_list_sync(hdev, &pa, &num_entries); goto done; } } /* Go through the current accept list programmed into the * controller one by one and check if that address is connected or is * still in the list of pending connections or list of devices to * report. If not present in either list, then remove it from * the controller. */ list_for_each_entry_safe(b, t, &hdev->le_accept_list, list) { if (hci_conn_hash_lookup_le(hdev, &b->bdaddr, b->bdaddr_type)) continue; /* Pointers not dereferenced, no locks needed */ pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns, &b->bdaddr, b->bdaddr_type); pend_report = hci_pend_le_action_lookup(&hdev->pend_le_reports, &b->bdaddr, b->bdaddr_type); /* If the device is not likely to connect or report, * remove it from the acceptlist. */ if (!pend_conn && !pend_report) { hci_le_del_accept_list_sync(hdev, &b->bdaddr, b->bdaddr_type); continue; } num_entries++; } /* Since all no longer valid accept list entries have been * removed, walk through the list of pending connections * and ensure that any new device gets programmed into * the controller. * * If the list of the devices is larger than the list of * available accept list entries in the controller, then * just abort and return filer policy value to not use the * accept list. * * The list and params may be mutated while we wait for events, * so make a copy and iterate it. */ params = conn_params_copy(&hdev->pend_le_conns, &n); if (!params) { err = -ENOMEM; goto done; } for (i = 0; i < n; ++i) { err = hci_le_add_accept_list_sync(hdev, &params[i], &num_entries); if (err) { kvfree(params); goto done; } } kvfree(params); /* After adding all new pending connections, walk through * the list of pending reports and also add these to the * accept list if there is still space. Abort if space runs out. */ params = conn_params_copy(&hdev->pend_le_reports, &n); if (!params) { err = -ENOMEM; goto done; } for (i = 0; i < n; ++i) { err = hci_le_add_accept_list_sync(hdev, &params[i], &num_entries); if (err) { kvfree(params); goto done; } } kvfree(params); /* Use the allowlist unless the following conditions are all true: * - We are not currently suspending * - There are 1 or more ADV monitors registered and it's not offloaded * - Interleaved scanning is not currently using the allowlist */ if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended && hci_get_adv_monitor_offload_ext(hdev) == HCI_ADV_MONITOR_EXT_NONE && hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST) err = -EINVAL; done: filter_policy = err ? 0x00 : 0x01; /* Enable address resolution when LL Privacy is enabled. */ err = hci_le_set_addr_resolution_enable_sync(hdev, 0x01); if (err) bt_dev_err(hdev, "Unable to enable LL privacy: %d", err); /* Resume advertising if it was paused */ if (use_ll_privacy(hdev)) hci_resume_advertising_sync(hdev); /* Select filter policy to use accept list */ return filter_policy; } static void hci_le_scan_phy_params(struct hci_cp_le_scan_phy_params *cp, u8 type, u16 interval, u16 window) { cp->type = type; cp->interval = cpu_to_le16(interval); cp->window = cpu_to_le16(window); } static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type, u16 interval, u16 window, u8 own_addr_type, u8 filter_policy) { struct hci_cp_le_set_ext_scan_params *cp; struct hci_cp_le_scan_phy_params *phy; u8 data[sizeof(*cp) + sizeof(*phy) * 2]; u8 num_phy = 0x00; cp = (void *)data; phy = (void *)cp->data; memset(data, 0, sizeof(data)); cp->own_addr_type = own_addr_type; cp->filter_policy = filter_policy; /* Check if PA Sync is in progress then select the PHY based on the * hci_conn.iso_qos. */ if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) { struct hci_cp_le_add_to_accept_list *sent; sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST); if (sent) { struct hci_conn *conn; conn = hci_conn_hash_lookup_ba(hdev, ISO_LINK, &sent->bdaddr); if (conn) { struct bt_iso_qos *qos = &conn->iso_qos; if (qos->bcast.in.phy & BT_ISO_PHY_1M || qos->bcast.in.phy & BT_ISO_PHY_2M) { cp->scanning_phys |= LE_SCAN_PHY_1M; hci_le_scan_phy_params(phy, type, interval, window); num_phy++; phy++; } if (qos->bcast.in.phy & BT_ISO_PHY_CODED) { cp->scanning_phys |= LE_SCAN_PHY_CODED; hci_le_scan_phy_params(phy, type, interval * 3, window * 3); num_phy++; phy++; } if (num_phy) goto done; } } } if (scan_1m(hdev) || scan_2m(hdev)) { cp->scanning_phys |= LE_SCAN_PHY_1M; hci_le_scan_phy_params(phy, type, interval, window); num_phy++; phy++; } if (scan_coded(hdev)) { cp->scanning_phys |= LE_SCAN_PHY_CODED; hci_le_scan_phy_params(phy, type, interval * 3, window * 3); num_phy++; phy++; } done: if (!num_phy) return -EINVAL; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_PARAMS, sizeof(*cp) + sizeof(*phy) * num_phy, data, HCI_CMD_TIMEOUT); } static int hci_le_set_scan_param_sync(struct hci_dev *hdev, u8 type, u16 interval, u16 window, u8 own_addr_type, u8 filter_policy) { struct hci_cp_le_set_scan_param cp; if (use_ext_scan(hdev)) return hci_le_set_ext_scan_param_sync(hdev, type, interval, window, own_addr_type, filter_policy); memset(&cp, 0, sizeof(cp)); cp.type = type; cp.interval = cpu_to_le16(interval); cp.window = cpu_to_le16(window); cp.own_address_type = own_addr_type; cp.filter_policy = filter_policy; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_start_scan_sync(struct hci_dev *hdev, u8 type, u16 interval, u16 window, u8 own_addr_type, u8 filter_policy, u8 filter_dup) { int err; if (hdev->scanning_paused) { bt_dev_dbg(hdev, "Scanning is paused for suspend"); return 0; } err = hci_le_set_scan_param_sync(hdev, type, interval, window, own_addr_type, filter_policy); if (err) return err; return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE, filter_dup); } static int hci_passive_scan_sync(struct hci_dev *hdev) { u8 own_addr_type; u8 filter_policy; u16 window, interval; u8 filter_dups = LE_SCAN_FILTER_DUP_ENABLE; int err; if (hdev->scanning_paused) { bt_dev_dbg(hdev, "Scanning is paused for suspend"); return 0; } err = hci_scan_disable_sync(hdev); if (err) { bt_dev_err(hdev, "disable scanning failed: %d", err); return err; } /* Set require_privacy to false since no SCAN_REQ are send * during passive scanning. Not using an non-resolvable address * here is important so that peer devices using direct * advertising with our address will be correctly reported * by the controller. */ if (hci_update_random_address_sync(hdev, false, scan_use_rpa(hdev), &own_addr_type)) return 0; if (hdev->enable_advmon_interleave_scan && hci_update_interleaved_scan_sync(hdev)) return 0; bt_dev_dbg(hdev, "interleave state %d", hdev->interleave_scan_state); /* Adding or removing entries from the accept list must * happen before enabling scanning. The controller does * not allow accept list modification while scanning. */ filter_policy = hci_update_accept_list_sync(hdev); /* If suspended and filter_policy set to 0x00 (no acceptlist) then * passive scanning cannot be started since that would require the host * to be woken up to process the reports. */ if (hdev->suspended && !filter_policy) { /* Check if accept list is empty then there is no need to scan * while suspended. */ if (list_empty(&hdev->le_accept_list)) return 0; /* If there are devices is the accept_list that means some * devices could not be programmed which in non-suspended case * means filter_policy needs to be set to 0x00 so the host needs * to filter, but since this is treating suspended case we * can ignore device needing host to filter to allow devices in * the acceptlist to be able to wakeup the system. */ filter_policy = 0x01; } /* When the controller is using random resolvable addresses and * with that having LE privacy enabled, then controllers with * Extended Scanner Filter Policies support can now enable support * for handling directed advertising. * * So instead of using filter polices 0x00 (no acceptlist) * and 0x01 (acceptlist enabled) use the new filter policies * 0x02 (no acceptlist) and 0x03 (acceptlist enabled). */ if (hci_dev_test_flag(hdev, HCI_PRIVACY) && (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) filter_policy |= 0x02; if (hdev->suspended) { window = hdev->le_scan_window_suspend; interval = hdev->le_scan_int_suspend; } else if (hci_is_le_conn_scanning(hdev)) { window = hdev->le_scan_window_connect; interval = hdev->le_scan_int_connect; } else if (hci_is_adv_monitoring(hdev)) { window = hdev->le_scan_window_adv_monitor; interval = hdev->le_scan_int_adv_monitor; /* Disable duplicates filter when scanning for advertisement * monitor for the following reasons. * * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm * controllers ignore RSSI_Sampling_Period when the duplicates * filter is enabled. * * For SW pattern filtering, when we're not doing interleaved * scanning, it is necessary to disable duplicates filter, * otherwise hosts can only receive one advertisement and it's * impossible to know if a peer is still in range. */ filter_dups = LE_SCAN_FILTER_DUP_DISABLE; } else { window = hdev->le_scan_window; interval = hdev->le_scan_interval; } /* Disable all filtering for Mesh */ if (hci_dev_test_flag(hdev, HCI_MESH)) { filter_policy = 0; filter_dups = LE_SCAN_FILTER_DUP_DISABLE; } bt_dev_dbg(hdev, "LE passive scan with acceptlist = %d", filter_policy); return hci_start_scan_sync(hdev, LE_SCAN_PASSIVE, interval, window, own_addr_type, filter_policy, filter_dups); } /* This function controls the passive scanning based on hdev->pend_le_conns * list. If there are pending LE connection we start the background scanning, * otherwise we stop it in the following sequence: * * If there are devices to scan: * * Disable Scanning -> Update Accept List -> * use_ll_privacy((Disable Advertising) -> Disable Resolving List -> * Update Resolving List -> Enable Resolving List -> (Enable Advertising)) -> * Enable Scanning * * Otherwise: * * Disable Scanning */ int hci_update_passive_scan_sync(struct hci_dev *hdev) { int err; if (!test_bit(HCI_UP, &hdev->flags) || test_bit(HCI_INIT, &hdev->flags) || hci_dev_test_flag(hdev, HCI_SETUP) || hci_dev_test_flag(hdev, HCI_CONFIG) || hci_dev_test_flag(hdev, HCI_AUTO_OFF) || hci_dev_test_flag(hdev, HCI_UNREGISTER)) return 0; /* No point in doing scanning if LE support hasn't been enabled */ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return 0; /* If discovery is active don't interfere with it */ if (hdev->discovery.state != DISCOVERY_STOPPED) return 0; /* Reset RSSI and UUID filters when starting background scanning * since these filters are meant for service discovery only. * * The Start Discovery and Start Service Discovery operations * ensure to set proper values for RSSI threshold and UUID * filter list. So it is safe to just reset them here. */ hci_discovery_filter_clear(hdev); bt_dev_dbg(hdev, "ADV monitoring is %s", hci_is_adv_monitoring(hdev) ? "on" : "off"); if (!hci_dev_test_flag(hdev, HCI_MESH) && list_empty(&hdev->pend_le_conns) && list_empty(&hdev->pend_le_reports) && !hci_is_adv_monitoring(hdev) && !hci_dev_test_flag(hdev, HCI_PA_SYNC)) { /* If there is no pending LE connections or devices * to be scanned for or no ADV monitors, we should stop the * background scanning. */ bt_dev_dbg(hdev, "stopping background scanning"); err = hci_scan_disable_sync(hdev); if (err) bt_dev_err(hdev, "stop background scanning failed: %d", err); } else { /* If there is at least one pending LE connection, we should * keep the background scan running. */ /* If controller is connecting, we should not start scanning * since some controllers are not able to scan and connect at * the same time. */ if (hci_lookup_le_connect(hdev)) return 0; bt_dev_dbg(hdev, "start background scanning"); err = hci_passive_scan_sync(hdev); if (err) bt_dev_err(hdev, "start background scanning failed: %d", err); } return err; } static int update_scan_sync(struct hci_dev *hdev, void *data) { return hci_update_scan_sync(hdev); } int hci_update_scan(struct hci_dev *hdev) { return hci_cmd_sync_queue(hdev, update_scan_sync, NULL, NULL); } static int update_passive_scan_sync(struct hci_dev *hdev, void *data) { return hci_update_passive_scan_sync(hdev); } int hci_update_passive_scan(struct hci_dev *hdev) { /* Only queue if it would have any effect */ if (!test_bit(HCI_UP, &hdev->flags) || test_bit(HCI_INIT, &hdev->flags) || hci_dev_test_flag(hdev, HCI_SETUP) || hci_dev_test_flag(hdev, HCI_CONFIG) || hci_dev_test_flag(hdev, HCI_AUTO_OFF) || hci_dev_test_flag(hdev, HCI_UNREGISTER)) return 0; return hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL, NULL); } int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val) { int err; if (!bredr_sc_enabled(hdev) || lmp_host_sc_capable(hdev)) return 0; err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SC_SUPPORT, sizeof(val), &val, HCI_CMD_TIMEOUT); if (!err) { if (val) { hdev->features[1][0] |= LMP_HOST_SC; hci_dev_set_flag(hdev, HCI_SC_ENABLED); } else { hdev->features[1][0] &= ~LMP_HOST_SC; hci_dev_clear_flag(hdev, HCI_SC_ENABLED); } } return err; } int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode) { int err; if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED) || lmp_host_ssp_capable(hdev)) return 0; if (!mode && hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(mode), &mode, HCI_CMD_TIMEOUT); } err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode, HCI_CMD_TIMEOUT); if (err) return err; return hci_write_sc_support_sync(hdev, 0x01); } int hci_write_le_host_supported_sync(struct hci_dev *hdev, u8 le, u8 simul) { struct hci_cp_write_le_host_supported cp; if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) || !lmp_bredr_capable(hdev)) return 0; /* Check first if we already have the right host state * (host features set) */ if (le == lmp_host_le_capable(hdev) && simul == lmp_host_le_br_capable(hdev)) return 0; memset(&cp, 0, sizeof(cp)); cp.le = le; cp.simul = simul; return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_powered_update_adv_sync(struct hci_dev *hdev) { struct adv_info *adv, *tmp; int err; if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) return 0; /* If RPA Resolution has not been enable yet it means the * resolving list is empty and we should attempt to program the * local IRK in order to support using own_addr_type * ADDR_LE_DEV_RANDOM_RESOLVED (0x03). */ if (!hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) { hci_le_add_resolve_list_sync(hdev, NULL); hci_le_set_addr_resolution_enable_sync(hdev, 0x01); } /* Make sure the controller has a good default for * advertising data. This also applies to the case * where BR/EDR was toggled during the AUTO_OFF phase. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || list_empty(&hdev->adv_instances)) { if (ext_adv_capable(hdev)) { err = hci_setup_ext_adv_instance_sync(hdev, 0x00); if (!err) hci_update_scan_rsp_data_sync(hdev, 0x00); } else { err = hci_update_adv_data_sync(hdev, 0x00); if (!err) hci_update_scan_rsp_data_sync(hdev, 0x00); } if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) hci_enable_advertising_sync(hdev); } /* Call for each tracked instance to be scheduled */ list_for_each_entry_safe(adv, tmp, &hdev->adv_instances, list) hci_schedule_adv_instance_sync(hdev, adv->instance, true); return 0; } static int hci_write_auth_enable_sync(struct hci_dev *hdev) { u8 link_sec; link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); if (link_sec == test_bit(HCI_AUTH, &hdev->flags)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE, sizeof(link_sec), &link_sec, HCI_CMD_TIMEOUT); } int hci_write_fast_connectable_sync(struct hci_dev *hdev, bool enable) { struct hci_cp_write_page_scan_activity cp; u8 type; int err = 0; if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return 0; if (hdev->hci_ver < BLUETOOTH_VER_1_2) return 0; memset(&cp, 0, sizeof(cp)); if (enable) { type = PAGE_SCAN_TYPE_INTERLACED; /* 160 msec page scan interval */ cp.interval = cpu_to_le16(0x0100); } else { type = hdev->def_page_scan_type; cp.interval = cpu_to_le16(hdev->def_page_scan_int); } cp.window = cpu_to_le16(hdev->def_page_scan_window); if (__cpu_to_le16(hdev->page_scan_interval) != cp.interval || __cpu_to_le16(hdev->page_scan_window) != cp.window) { err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, sizeof(cp), &cp, HCI_CMD_TIMEOUT); if (err) return err; } if (hdev->page_scan_type != type) err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE, sizeof(type), &type, HCI_CMD_TIMEOUT); return err; } static bool disconnected_accept_list_entries(struct hci_dev *hdev) { struct bdaddr_list *b; list_for_each_entry(b, &hdev->accept_list, list) { struct hci_conn *conn; conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr); if (!conn) return true; if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) return true; } return false; } static int hci_write_scan_enable_sync(struct hci_dev *hdev, u8 val) { return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SCAN_ENABLE, sizeof(val), &val, HCI_CMD_TIMEOUT); } int hci_update_scan_sync(struct hci_dev *hdev) { u8 scan; if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return 0; if (!hdev_is_powered(hdev)) return 0; if (mgmt_powering_down(hdev)) return 0; if (hdev->scanning_paused) return 0; if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) || disconnected_accept_list_entries(hdev)) scan = SCAN_PAGE; else scan = SCAN_DISABLED; if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) scan |= SCAN_INQUIRY; if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE) && test_bit(HCI_ISCAN, &hdev->flags) == !!(scan & SCAN_INQUIRY)) return 0; return hci_write_scan_enable_sync(hdev, scan); } int hci_update_name_sync(struct hci_dev *hdev) { struct hci_cp_write_local_name cp; memset(&cp, 0, sizeof(cp)); memcpy(cp.name, hdev->dev_name, sizeof(cp.name)); return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } /* This function perform powered update HCI command sequence after the HCI init * sequence which end up resetting all states, the sequence is as follows: * * HCI_SSP_ENABLED(Enable SSP) * HCI_LE_ENABLED(Enable LE) * HCI_LE_ENABLED(use_ll_privacy(Add local IRK to Resolving List) -> * Update adv data) * Enable Authentication * lmp_bredr_capable(Set Fast Connectable -> Set Scan Type -> Set Class -> * Set Name -> Set EIR) * HCI_FORCE_STATIC_ADDR | BDADDR_ANY && !HCI_BREDR_ENABLED (Set Static Address) */ int hci_powered_update_sync(struct hci_dev *hdev) { int err; /* Register the available SMP channels (BR/EDR and LE) only when * successfully powering on the controller. This late * registration is required so that LE SMP can clearly decide if * the public address or static address is used. */ smp_register(hdev); err = hci_write_ssp_mode_sync(hdev, 0x01); if (err) return err; err = hci_write_le_host_supported_sync(hdev, 0x01, 0x00); if (err) return err; err = hci_powered_update_adv_sync(hdev); if (err) return err; err = hci_write_auth_enable_sync(hdev); if (err) return err; if (lmp_bredr_capable(hdev)) { if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) hci_write_fast_connectable_sync(hdev, true); else hci_write_fast_connectable_sync(hdev, false); hci_update_scan_sync(hdev); hci_update_class_sync(hdev); hci_update_name_sync(hdev); hci_update_eir_sync(hdev); } /* If forcing static address is in use or there is no public * address use the static address as random address (but skip * the HCI command if the current random address is already the * static one. * * In case BR/EDR has been disabled on a dual-mode controller * and a static address has been configured, then use that * address instead of the public BR/EDR address. */ if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || (!bacmp(&hdev->bdaddr, BDADDR_ANY) && !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))) { if (bacmp(&hdev->static_addr, BDADDR_ANY)) return hci_set_random_addr_sync(hdev, &hdev->static_addr); } return 0; } /** * hci_dev_get_bd_addr_from_property - Get the Bluetooth Device Address * (BD_ADDR) for a HCI device from * a firmware node property. * @hdev: The HCI device * * Search the firmware node for 'local-bd-address'. * * All-zero BD addresses are rejected, because those could be properties * that exist in the firmware tables, but were not updated by the firmware. For * example, the DTS could define 'local-bd-address', with zero BD addresses. */ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev) { struct fwnode_handle *fwnode = dev_fwnode(hdev->dev.parent); bdaddr_t ba; int ret; ret = fwnode_property_read_u8_array(fwnode, "local-bd-address", (u8 *)&ba, sizeof(ba)); if (ret < 0 || !bacmp(&ba, BDADDR_ANY)) return; if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks)) baswap(&hdev->public_addr, &ba); else bacpy(&hdev->public_addr, &ba); } struct hci_init_stage { int (*func)(struct hci_dev *hdev); }; /* Run init stage NULL terminated function table */ static int hci_init_stage_sync(struct hci_dev *hdev, const struct hci_init_stage *stage) { size_t i; for (i = 0; stage[i].func; i++) { int err; err = stage[i].func(hdev); if (err) return err; } return 0; } /* Read Local Version */ static int hci_read_local_version_sync(struct hci_dev *hdev) { return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL, HCI_CMD_TIMEOUT); } /* Read BD Address */ static int hci_read_bd_addr_sync(struct hci_dev *hdev) { return __hci_cmd_sync_status(hdev, HCI_OP_READ_BD_ADDR, 0, NULL, HCI_CMD_TIMEOUT); } #define HCI_INIT(_func) \ { \ .func = _func, \ } static const struct hci_init_stage hci_init0[] = { /* HCI_OP_READ_LOCAL_VERSION */ HCI_INIT(hci_read_local_version_sync), /* HCI_OP_READ_BD_ADDR */ HCI_INIT(hci_read_bd_addr_sync), {} }; int hci_reset_sync(struct hci_dev *hdev) { int err; set_bit(HCI_RESET, &hdev->flags); err = __hci_cmd_sync_status(hdev, HCI_OP_RESET, 0, NULL, HCI_CMD_TIMEOUT); if (err) return err; return 0; } static int hci_init0_sync(struct hci_dev *hdev) { int err; bt_dev_dbg(hdev, ""); /* Reset */ if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { err = hci_reset_sync(hdev); if (err) return err; } return hci_init_stage_sync(hdev, hci_init0); } static int hci_unconf_init_sync(struct hci_dev *hdev) { int err; if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return 0; err = hci_init0_sync(hdev); if (err < 0) return err; if (hci_dev_test_flag(hdev, HCI_SETUP)) hci_debugfs_create_basic(hdev); return 0; } /* Read Local Supported Features. */ static int hci_read_local_features_sync(struct hci_dev *hdev) { return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL, HCI_CMD_TIMEOUT); } /* BR Controller init stage 1 command sequence */ static const struct hci_init_stage br_init1[] = { /* HCI_OP_READ_LOCAL_FEATURES */ HCI_INIT(hci_read_local_features_sync), /* HCI_OP_READ_LOCAL_VERSION */ HCI_INIT(hci_read_local_version_sync), /* HCI_OP_READ_BD_ADDR */ HCI_INIT(hci_read_bd_addr_sync), {} }; /* Read Local Commands */ static int hci_read_local_cmds_sync(struct hci_dev *hdev) { /* All Bluetooth 1.2 and later controllers should support the * HCI command for reading the local supported commands. * * Unfortunately some controllers indicate Bluetooth 1.2 support, * but do not have support for this command. If that is the case, * the driver can quirk the behavior and skip reading the local * supported commands. */ if (hdev->hci_ver > BLUETOOTH_VER_1_1 && !test_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks)) return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL, HCI_CMD_TIMEOUT); return 0; } static int hci_init1_sync(struct hci_dev *hdev) { int err; bt_dev_dbg(hdev, ""); /* Reset */ if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { err = hci_reset_sync(hdev); if (err) return err; } return hci_init_stage_sync(hdev, br_init1); } /* Read Buffer Size (ACL mtu, max pkt, etc.) */ static int hci_read_buffer_size_sync(struct hci_dev *hdev) { return __hci_cmd_sync_status(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL, HCI_CMD_TIMEOUT); } /* Read Class of Device */ static int hci_read_dev_class_sync(struct hci_dev *hdev) { return __hci_cmd_sync_status(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL, HCI_CMD_TIMEOUT); } /* Read Local Name */ static int hci_read_local_name_sync(struct hci_dev *hdev) { return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL, HCI_CMD_TIMEOUT); } /* Read Voice Setting */ static int hci_read_voice_setting_sync(struct hci_dev *hdev) { return __hci_cmd_sync_status(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL, HCI_CMD_TIMEOUT); } /* Read Number of Supported IAC */ static int hci_read_num_supported_iac_sync(struct hci_dev *hdev) { return __hci_cmd_sync_status(hdev, HCI_OP_READ_NUM_SUPPORTED_IAC, 0, NULL, HCI_CMD_TIMEOUT); } /* Read Current IAC LAP */ static int hci_read_current_iac_lap_sync(struct hci_dev *hdev) { return __hci_cmd_sync_status(hdev, HCI_OP_READ_CURRENT_IAC_LAP, 0, NULL, HCI_CMD_TIMEOUT); } static int hci_set_event_filter_sync(struct hci_dev *hdev, u8 flt_type, u8 cond_type, bdaddr_t *bdaddr, u8 auto_accept) { struct hci_cp_set_event_filter cp; if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return 0; if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) return 0; memset(&cp, 0, sizeof(cp)); cp.flt_type = flt_type; if (flt_type != HCI_FLT_CLEAR_ALL) { cp.cond_type = cond_type; bacpy(&cp.addr_conn_flt.bdaddr, bdaddr); cp.addr_conn_flt.auto_accept = auto_accept; } return __hci_cmd_sync_status(hdev, HCI_OP_SET_EVENT_FLT, flt_type == HCI_FLT_CLEAR_ALL ? sizeof(cp.flt_type) : sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_clear_event_filter_sync(struct hci_dev *hdev) { if (!hci_dev_test_flag(hdev, HCI_EVENT_FILTER_CONFIGURED)) return 0; /* In theory the state machine should not reach here unless * a hci_set_event_filter_sync() call succeeds, but we do * the check both for parity and as a future reminder. */ if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) return 0; return hci_set_event_filter_sync(hdev, HCI_FLT_CLEAR_ALL, 0x00, BDADDR_ANY, 0x00); } /* Connection accept timeout ~20 secs */ static int hci_write_ca_timeout_sync(struct hci_dev *hdev) { __le16 param = cpu_to_le16(0x7d00); return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_CA_TIMEOUT, sizeof(param), &param, HCI_CMD_TIMEOUT); } /* BR Controller init stage 2 command sequence */ static const struct hci_init_stage br_init2[] = { /* HCI_OP_READ_BUFFER_SIZE */ HCI_INIT(hci_read_buffer_size_sync), /* HCI_OP_READ_CLASS_OF_DEV */ HCI_INIT(hci_read_dev_class_sync), /* HCI_OP_READ_LOCAL_NAME */ HCI_INIT(hci_read_local_name_sync), /* HCI_OP_READ_VOICE_SETTING */ HCI_INIT(hci_read_voice_setting_sync), /* HCI_OP_READ_NUM_SUPPORTED_IAC */ HCI_INIT(hci_read_num_supported_iac_sync), /* HCI_OP_READ_CURRENT_IAC_LAP */ HCI_INIT(hci_read_current_iac_lap_sync), /* HCI_OP_SET_EVENT_FLT */ HCI_INIT(hci_clear_event_filter_sync), /* HCI_OP_WRITE_CA_TIMEOUT */ HCI_INIT(hci_write_ca_timeout_sync), {} }; static int hci_write_ssp_mode_1_sync(struct hci_dev *hdev) { u8 mode = 0x01; if (!lmp_ssp_capable(hdev) || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) return 0; /* When SSP is available, then the host features page * should also be available as well. However some * controllers list the max_page as 0 as long as SSP * has not been enabled. To achieve proper debugging * output, force the minimum max_page to 1 at least. */ hdev->max_page = 0x01; return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode, HCI_CMD_TIMEOUT); } static int hci_write_eir_sync(struct hci_dev *hdev) { struct hci_cp_write_eir cp; if (!lmp_ssp_capable(hdev) || hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) return 0; memset(hdev->eir, 0, sizeof(hdev->eir)); memset(&cp, 0, sizeof(cp)); return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_write_inquiry_mode_sync(struct hci_dev *hdev) { u8 mode; if (!lmp_inq_rssi_capable(hdev) && !test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) return 0; /* If Extended Inquiry Result events are supported, then * they are clearly preferred over Inquiry Result with RSSI * events. */ mode = lmp_ext_inq_capable(hdev) ? 0x02 : 0x01; return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_INQUIRY_MODE, sizeof(mode), &mode, HCI_CMD_TIMEOUT); } static int hci_read_inq_rsp_tx_power_sync(struct hci_dev *hdev) { if (!lmp_inq_tx_pwr_capable(hdev)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL, HCI_CMD_TIMEOUT); } static int hci_read_local_ext_features_sync(struct hci_dev *hdev, u8 page) { struct hci_cp_read_local_ext_features cp; if (!lmp_ext_feat_capable(hdev)) return 0; memset(&cp, 0, sizeof(cp)); cp.page = page; return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_read_local_ext_features_1_sync(struct hci_dev *hdev) { return hci_read_local_ext_features_sync(hdev, 0x01); } /* HCI Controller init stage 2 command sequence */ static const struct hci_init_stage hci_init2[] = { /* HCI_OP_READ_LOCAL_COMMANDS */ HCI_INIT(hci_read_local_cmds_sync), /* HCI_OP_WRITE_SSP_MODE */ HCI_INIT(hci_write_ssp_mode_1_sync), /* HCI_OP_WRITE_EIR */ HCI_INIT(hci_write_eir_sync), /* HCI_OP_WRITE_INQUIRY_MODE */ HCI_INIT(hci_write_inquiry_mode_sync), /* HCI_OP_READ_INQ_RSP_TX_POWER */ HCI_INIT(hci_read_inq_rsp_tx_power_sync), /* HCI_OP_READ_LOCAL_EXT_FEATURES */ HCI_INIT(hci_read_local_ext_features_1_sync), /* HCI_OP_WRITE_AUTH_ENABLE */ HCI_INIT(hci_write_auth_enable_sync), {} }; /* Read LE Buffer Size */ static int hci_le_read_buffer_size_sync(struct hci_dev *hdev) { /* Use Read LE Buffer Size V2 if supported */ if (iso_capable(hdev) && hdev->commands[41] & 0x20) return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_BUFFER_SIZE_V2, 0, NULL, HCI_CMD_TIMEOUT); return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL, HCI_CMD_TIMEOUT); } /* Read LE Local Supported Features */ static int hci_le_read_local_features_sync(struct hci_dev *hdev) { return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL, HCI_CMD_TIMEOUT); } /* Read LE Supported States */ static int hci_le_read_supported_states_sync(struct hci_dev *hdev) { return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL, HCI_CMD_TIMEOUT); } /* LE Controller init stage 2 command sequence */ static const struct hci_init_stage le_init2[] = { /* HCI_OP_LE_READ_LOCAL_FEATURES */ HCI_INIT(hci_le_read_local_features_sync), /* HCI_OP_LE_READ_BUFFER_SIZE */ HCI_INIT(hci_le_read_buffer_size_sync), /* HCI_OP_LE_READ_SUPPORTED_STATES */ HCI_INIT(hci_le_read_supported_states_sync), {} }; static int hci_init2_sync(struct hci_dev *hdev) { int err; bt_dev_dbg(hdev, ""); err = hci_init_stage_sync(hdev, hci_init2); if (err) return err; if (lmp_bredr_capable(hdev)) { err = hci_init_stage_sync(hdev, br_init2); if (err) return err; } else { hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); } if (lmp_le_capable(hdev)) { err = hci_init_stage_sync(hdev, le_init2); if (err) return err; /* LE-only controllers have LE implicitly enabled */ if (!lmp_bredr_capable(hdev)) hci_dev_set_flag(hdev, HCI_LE_ENABLED); } return 0; } static int hci_set_event_mask_sync(struct hci_dev *hdev) { /* The second byte is 0xff instead of 0x9f (two reserved bits * disabled) since a Broadcom 1.2 dongle doesn't respond to the * command otherwise. */ u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 }; /* CSR 1.1 dongles does not accept any bitfield so don't try to set * any event mask for pre 1.2 devices. */ if (hdev->hci_ver < BLUETOOTH_VER_1_2) return 0; if (lmp_bredr_capable(hdev)) { events[4] |= 0x01; /* Flow Specification Complete */ /* Don't set Disconnect Complete and mode change when * suspended as that would wakeup the host when disconnecting * due to suspend. */ if (hdev->suspended) { events[0] &= 0xef; events[2] &= 0xf7; } } else { /* Use a different default for LE-only devices */ memset(events, 0, sizeof(events)); events[1] |= 0x20; /* Command Complete */ events[1] |= 0x40; /* Command Status */ events[1] |= 0x80; /* Hardware Error */ /* If the controller supports the Disconnect command, enable * the corresponding event. In addition enable packet flow * control related events. */ if (hdev->commands[0] & 0x20) { /* Don't set Disconnect Complete when suspended as that * would wakeup the host when disconnecting due to * suspend. */ if (!hdev->suspended) events[0] |= 0x10; /* Disconnection Complete */ events[2] |= 0x04; /* Number of Completed Packets */ events[3] |= 0x02; /* Data Buffer Overflow */ } /* If the controller supports the Read Remote Version * Information command, enable the corresponding event. */ if (hdev->commands[2] & 0x80) events[1] |= 0x08; /* Read Remote Version Information * Complete */ if (hdev->le_features[0] & HCI_LE_ENCRYPTION) { events[0] |= 0x80; /* Encryption Change */ events[5] |= 0x80; /* Encryption Key Refresh Complete */ } } if (lmp_inq_rssi_capable(hdev) || test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) events[4] |= 0x02; /* Inquiry Result with RSSI */ if (lmp_ext_feat_capable(hdev)) events[4] |= 0x04; /* Read Remote Extended Features Complete */ if (lmp_esco_capable(hdev)) { events[5] |= 0x08; /* Synchronous Connection Complete */ events[5] |= 0x10; /* Synchronous Connection Changed */ } if (lmp_sniffsubr_capable(hdev)) events[5] |= 0x20; /* Sniff Subrating */ if (lmp_pause_enc_capable(hdev)) events[5] |= 0x80; /* Encryption Key Refresh Complete */ if (lmp_ext_inq_capable(hdev)) events[5] |= 0x40; /* Extended Inquiry Result */ if (lmp_no_flush_capable(hdev)) events[7] |= 0x01; /* Enhanced Flush Complete */ if (lmp_lsto_capable(hdev)) events[6] |= 0x80; /* Link Supervision Timeout Changed */ if (lmp_ssp_capable(hdev)) { events[6] |= 0x01; /* IO Capability Request */ events[6] |= 0x02; /* IO Capability Response */ events[6] |= 0x04; /* User Confirmation Request */ events[6] |= 0x08; /* User Passkey Request */ events[6] |= 0x10; /* Remote OOB Data Request */ events[6] |= 0x20; /* Simple Pairing Complete */ events[7] |= 0x04; /* User Passkey Notification */ events[7] |= 0x08; /* Keypress Notification */ events[7] |= 0x10; /* Remote Host Supported * Features Notification */ } if (lmp_le_capable(hdev)) events[7] |= 0x20; /* LE Meta-Event */ return __hci_cmd_sync_status(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events, HCI_CMD_TIMEOUT); } static int hci_read_stored_link_key_sync(struct hci_dev *hdev) { struct hci_cp_read_stored_link_key cp; if (!(hdev->commands[6] & 0x20) || test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) return 0; memset(&cp, 0, sizeof(cp)); bacpy(&cp.bdaddr, BDADDR_ANY); cp.read_all = 0x01; return __hci_cmd_sync_status(hdev, HCI_OP_READ_STORED_LINK_KEY, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_setup_link_policy_sync(struct hci_dev *hdev) { struct hci_cp_write_def_link_policy cp; u16 link_policy = 0; if (!(hdev->commands[5] & 0x10)) return 0; memset(&cp, 0, sizeof(cp)); if (lmp_rswitch_capable(hdev)) link_policy |= HCI_LP_RSWITCH; if (lmp_hold_capable(hdev)) link_policy |= HCI_LP_HOLD; if (lmp_sniff_capable(hdev)) link_policy |= HCI_LP_SNIFF; if (lmp_park_capable(hdev)) link_policy |= HCI_LP_PARK; cp.policy = cpu_to_le16(link_policy); return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_read_page_scan_activity_sync(struct hci_dev *hdev) { if (!(hdev->commands[8] & 0x01)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL, HCI_CMD_TIMEOUT); } static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev) { if (!(hdev->commands[18] & 0x04) || !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING, 0, NULL, HCI_CMD_TIMEOUT); } static int hci_read_page_scan_type_sync(struct hci_dev *hdev) { /* Some older Broadcom based Bluetooth 1.2 controllers do not * support the Read Page Scan Type command. Check support for * this command in the bit mask of supported commands. */ if (!(hdev->commands[13] & 0x01)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL, HCI_CMD_TIMEOUT); } /* Read features beyond page 1 if available */ static int hci_read_local_ext_features_all_sync(struct hci_dev *hdev) { u8 page; int err; if (!lmp_ext_feat_capable(hdev)) return 0; for (page = 2; page < HCI_MAX_PAGES && page <= hdev->max_page; page++) { err = hci_read_local_ext_features_sync(hdev, page); if (err) return err; } return 0; } /* HCI Controller init stage 3 command sequence */ static const struct hci_init_stage hci_init3[] = { /* HCI_OP_SET_EVENT_MASK */ HCI_INIT(hci_set_event_mask_sync), /* HCI_OP_READ_STORED_LINK_KEY */ HCI_INIT(hci_read_stored_link_key_sync), /* HCI_OP_WRITE_DEF_LINK_POLICY */ HCI_INIT(hci_setup_link_policy_sync), /* HCI_OP_READ_PAGE_SCAN_ACTIVITY */ HCI_INIT(hci_read_page_scan_activity_sync), /* HCI_OP_READ_DEF_ERR_DATA_REPORTING */ HCI_INIT(hci_read_def_err_data_reporting_sync), /* HCI_OP_READ_PAGE_SCAN_TYPE */ HCI_INIT(hci_read_page_scan_type_sync), /* HCI_OP_READ_LOCAL_EXT_FEATURES */ HCI_INIT(hci_read_local_ext_features_all_sync), {} }; static int hci_le_set_event_mask_sync(struct hci_dev *hdev) { u8 events[8]; if (!lmp_le_capable(hdev)) return 0; memset(events, 0, sizeof(events)); if (hdev->le_features[0] & HCI_LE_ENCRYPTION) events[0] |= 0x10; /* LE Long Term Key Request */ /* If controller supports the Connection Parameters Request * Link Layer Procedure, enable the corresponding event. */ if (hdev->le_features[0] & HCI_LE_CONN_PARAM_REQ_PROC) /* LE Remote Connection Parameter Request */ events[0] |= 0x20; /* If the controller supports the Data Length Extension * feature, enable the corresponding event. */ if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) events[0] |= 0x40; /* LE Data Length Change */ /* If the controller supports LL Privacy feature or LE Extended Adv, * enable the corresponding event. */ if (use_enhanced_conn_complete(hdev)) events[1] |= 0x02; /* LE Enhanced Connection Complete */ /* If the controller supports Extended Scanner Filter * Policies, enable the corresponding event. */ if (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY) events[1] |= 0x04; /* LE Direct Advertising Report */ /* If the controller supports Channel Selection Algorithm #2 * feature, enable the corresponding event. */ if (hdev->le_features[1] & HCI_LE_CHAN_SEL_ALG2) events[2] |= 0x08; /* LE Channel Selection Algorithm */ /* If the controller supports the LE Set Scan Enable command, * enable the corresponding advertising report event. */ if (hdev->commands[26] & 0x08) events[0] |= 0x02; /* LE Advertising Report */ /* If the controller supports the LE Create Connection * command, enable the corresponding event. */ if (hdev->commands[26] & 0x10) events[0] |= 0x01; /* LE Connection Complete */ /* If the controller supports the LE Connection Update * command, enable the corresponding event. */ if (hdev->commands[27] & 0x04) events[0] |= 0x04; /* LE Connection Update Complete */ /* If the controller supports the LE Read Remote Used Features * command, enable the corresponding event. */ if (hdev->commands[27] & 0x20) /* LE Read Remote Used Features Complete */ events[0] |= 0x08; /* If the controller supports the LE Read Local P-256 * Public Key command, enable the corresponding event. */ if (hdev->commands[34] & 0x02) /* LE Read Local P-256 Public Key Complete */ events[0] |= 0x80; /* If the controller supports the LE Generate DHKey * command, enable the corresponding event. */ if (hdev->commands[34] & 0x04) events[1] |= 0x01; /* LE Generate DHKey Complete */ /* If the controller supports the LE Set Default PHY or * LE Set PHY commands, enable the corresponding event. */ if (hdev->commands[35] & (0x20 | 0x40)) events[1] |= 0x08; /* LE PHY Update Complete */ /* If the controller supports LE Set Extended Scan Parameters * and LE Set Extended Scan Enable commands, enable the * corresponding event. */ if (use_ext_scan(hdev)) events[1] |= 0x10; /* LE Extended Advertising Report */ /* If the controller supports the LE Extended Advertising * command, enable the corresponding event. */ if (ext_adv_capable(hdev)) events[2] |= 0x02; /* LE Advertising Set Terminated */ if (cis_capable(hdev)) { events[3] |= 0x01; /* LE CIS Established */ if (cis_peripheral_capable(hdev)) events[3] |= 0x02; /* LE CIS Request */ } if (bis_capable(hdev)) { events[1] |= 0x20; /* LE PA Report */ events[1] |= 0x40; /* LE PA Sync Established */ events[3] |= 0x04; /* LE Create BIG Complete */ events[3] |= 0x08; /* LE Terminate BIG Complete */ events[3] |= 0x10; /* LE BIG Sync Established */ events[3] |= 0x20; /* LE BIG Sync Loss */ events[4] |= 0x02; /* LE BIG Info Advertising Report */ } return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EVENT_MASK, sizeof(events), events, HCI_CMD_TIMEOUT); } /* Read LE Advertising Channel TX Power */ static int hci_le_read_adv_tx_power_sync(struct hci_dev *hdev) { if ((hdev->commands[25] & 0x40) && !ext_adv_capable(hdev)) { /* HCI TS spec forbids mixing of legacy and extended * advertising commands wherein READ_ADV_TX_POWER is * also included. So do not call it if extended adv * is supported otherwise controller will return * COMMAND_DISALLOWED for extended commands. */ return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL, HCI_CMD_TIMEOUT); } return 0; } /* Read LE Min/Max Tx Power*/ static int hci_le_read_tx_power_sync(struct hci_dev *hdev) { if (!(hdev->commands[38] & 0x80) || test_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_TRANSMIT_POWER, 0, NULL, HCI_CMD_TIMEOUT); } /* Read LE Accept List Size */ static int hci_le_read_accept_list_size_sync(struct hci_dev *hdev) { if (!(hdev->commands[26] & 0x40)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_ACCEPT_LIST_SIZE, 0, NULL, HCI_CMD_TIMEOUT); } /* Read LE Resolving List Size */ static int hci_le_read_resolv_list_size_sync(struct hci_dev *hdev) { if (!(hdev->commands[34] & 0x40)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_RESOLV_LIST_SIZE, 0, NULL, HCI_CMD_TIMEOUT); } /* Clear LE Resolving List */ static int hci_le_clear_resolv_list_sync(struct hci_dev *hdev) { if (!(hdev->commands[34] & 0x20)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_CLEAR_RESOLV_LIST, 0, NULL, HCI_CMD_TIMEOUT); } /* Set RPA timeout */ static int hci_le_set_rpa_timeout_sync(struct hci_dev *hdev) { __le16 timeout = cpu_to_le16(hdev->rpa_timeout); if (!(hdev->commands[35] & 0x04) || test_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_RPA_TIMEOUT, sizeof(timeout), &timeout, HCI_CMD_TIMEOUT); } /* Read LE Maximum Data Length */ static int hci_le_read_max_data_len_sync(struct hci_dev *hdev) { if (!(hdev->le_features[0] & HCI_LE_DATA_LEN_EXT)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL, HCI_CMD_TIMEOUT); } /* Read LE Suggested Default Data Length */ static int hci_le_read_def_data_len_sync(struct hci_dev *hdev) { if (!(hdev->le_features[0] & HCI_LE_DATA_LEN_EXT)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_DEF_DATA_LEN, 0, NULL, HCI_CMD_TIMEOUT); } /* Read LE Number of Supported Advertising Sets */ static int hci_le_read_num_support_adv_sets_sync(struct hci_dev *hdev) { if (!ext_adv_capable(hdev)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS, 0, NULL, HCI_CMD_TIMEOUT); } /* Write LE Host Supported */ static int hci_set_le_support_sync(struct hci_dev *hdev) { struct hci_cp_write_le_host_supported cp; /* LE-only devices do not support explicit enablement */ if (!lmp_bredr_capable(hdev)) return 0; memset(&cp, 0, sizeof(cp)); if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { cp.le = 0x01; cp.simul = 0x00; } if (cp.le == lmp_host_le_capable(hdev)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } /* LE Set Host Feature */ static int hci_le_set_host_feature_sync(struct hci_dev *hdev) { struct hci_cp_le_set_host_feature cp; if (!cis_capable(hdev)) return 0; memset(&cp, 0, sizeof(cp)); /* Connected Isochronous Channels (Host Support) */ cp.bit_number = 32; cp.bit_value = 1; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_HOST_FEATURE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } /* LE Controller init stage 3 command sequence */ static const struct hci_init_stage le_init3[] = { /* HCI_OP_LE_SET_EVENT_MASK */ HCI_INIT(hci_le_set_event_mask_sync), /* HCI_OP_LE_READ_ADV_TX_POWER */ HCI_INIT(hci_le_read_adv_tx_power_sync), /* HCI_OP_LE_READ_TRANSMIT_POWER */ HCI_INIT(hci_le_read_tx_power_sync), /* HCI_OP_LE_READ_ACCEPT_LIST_SIZE */ HCI_INIT(hci_le_read_accept_list_size_sync), /* HCI_OP_LE_CLEAR_ACCEPT_LIST */ HCI_INIT(hci_le_clear_accept_list_sync), /* HCI_OP_LE_READ_RESOLV_LIST_SIZE */ HCI_INIT(hci_le_read_resolv_list_size_sync), /* HCI_OP_LE_CLEAR_RESOLV_LIST */ HCI_INIT(hci_le_clear_resolv_list_sync), /* HCI_OP_LE_SET_RPA_TIMEOUT */ HCI_INIT(hci_le_set_rpa_timeout_sync), /* HCI_OP_LE_READ_MAX_DATA_LEN */ HCI_INIT(hci_le_read_max_data_len_sync), /* HCI_OP_LE_READ_DEF_DATA_LEN */ HCI_INIT(hci_le_read_def_data_len_sync), /* HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS */ HCI_INIT(hci_le_read_num_support_adv_sets_sync), /* HCI_OP_WRITE_LE_HOST_SUPPORTED */ HCI_INIT(hci_set_le_support_sync), /* HCI_OP_LE_SET_HOST_FEATURE */ HCI_INIT(hci_le_set_host_feature_sync), {} }; static int hci_init3_sync(struct hci_dev *hdev) { int err; bt_dev_dbg(hdev, ""); err = hci_init_stage_sync(hdev, hci_init3); if (err) return err; if (lmp_le_capable(hdev)) return hci_init_stage_sync(hdev, le_init3); return 0; } static int hci_delete_stored_link_key_sync(struct hci_dev *hdev) { struct hci_cp_delete_stored_link_key cp; /* Some Broadcom based Bluetooth controllers do not support the * Delete Stored Link Key command. They are clearly indicating its * absence in the bit mask of supported commands. * * Check the supported commands and only if the command is marked * as supported send it. If not supported assume that the controller * does not have actual support for stored link keys which makes this * command redundant anyway. * * Some controllers indicate that they support handling deleting * stored link keys, but they don't. The quirk lets a driver * just disable this command. */ if (!(hdev->commands[6] & 0x80) || test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) return 0; memset(&cp, 0, sizeof(cp)); bacpy(&cp.bdaddr, BDADDR_ANY); cp.delete_all = 0x01; return __hci_cmd_sync_status(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev) { u8 events[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; bool changed = false; /* Set event mask page 2 if the HCI command for it is supported */ if (!(hdev->commands[22] & 0x04)) return 0; /* If Connectionless Peripheral Broadcast central role is supported * enable all necessary events for it. */ if (lmp_cpb_central_capable(hdev)) { events[1] |= 0x40; /* Triggered Clock Capture */ events[1] |= 0x80; /* Synchronization Train Complete */ events[2] |= 0x08; /* Truncated Page Complete */ events[2] |= 0x20; /* CPB Channel Map Change */ changed = true; } /* If Connectionless Peripheral Broadcast peripheral role is supported * enable all necessary events for it. */ if (lmp_cpb_peripheral_capable(hdev)) { events[2] |= 0x01; /* Synchronization Train Received */ events[2] |= 0x02; /* CPB Receive */ events[2] |= 0x04; /* CPB Timeout */ events[2] |= 0x10; /* Peripheral Page Response Timeout */ changed = true; } /* Enable Authenticated Payload Timeout Expired event if supported */ if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) { events[2] |= 0x80; changed = true; } /* Some Broadcom based controllers indicate support for Set Event * Mask Page 2 command, but then actually do not support it. Since * the default value is all bits set to zero, the command is only * required if the event mask has to be changed. In case no change * to the event mask is needed, skip this command. */ if (!changed) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events, HCI_CMD_TIMEOUT); } /* Read local codec list if the HCI command is supported */ static int hci_read_local_codecs_sync(struct hci_dev *hdev) { if (hdev->commands[45] & 0x04) hci_read_supported_codecs_v2(hdev); else if (hdev->commands[29] & 0x20) hci_read_supported_codecs(hdev); return 0; } /* Read local pairing options if the HCI command is supported */ static int hci_read_local_pairing_opts_sync(struct hci_dev *hdev) { if (!(hdev->commands[41] & 0x08)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_PAIRING_OPTS, 0, NULL, HCI_CMD_TIMEOUT); } /* Get MWS transport configuration if the HCI command is supported */ static int hci_get_mws_transport_config_sync(struct hci_dev *hdev) { if (!mws_transport_config_capable(hdev)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_GET_MWS_TRANSPORT_CONFIG, 0, NULL, HCI_CMD_TIMEOUT); } /* Check for Synchronization Train support */ static int hci_read_sync_train_params_sync(struct hci_dev *hdev) { if (!lmp_sync_train_capable(hdev)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL, HCI_CMD_TIMEOUT); } /* Enable Secure Connections if supported and configured */ static int hci_write_sc_support_1_sync(struct hci_dev *hdev) { u8 support = 0x01; if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED) || !bredr_sc_enabled(hdev)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SC_SUPPORT, sizeof(support), &support, HCI_CMD_TIMEOUT); } /* Set erroneous data reporting if supported to the wideband speech * setting value */ static int hci_set_err_data_report_sync(struct hci_dev *hdev) { struct hci_cp_write_def_err_data_reporting cp; bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED); if (!(hdev->commands[18] & 0x08) || !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) return 0; if (enabled == hdev->err_data_reporting) return 0; memset(&cp, 0, sizeof(cp)); cp.err_data_reporting = enabled ? ERR_DATA_REPORTING_ENABLED : ERR_DATA_REPORTING_DISABLED; return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_ERR_DATA_REPORTING, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static const struct hci_init_stage hci_init4[] = { /* HCI_OP_DELETE_STORED_LINK_KEY */ HCI_INIT(hci_delete_stored_link_key_sync), /* HCI_OP_SET_EVENT_MASK_PAGE_2 */ HCI_INIT(hci_set_event_mask_page_2_sync), /* HCI_OP_READ_LOCAL_CODECS */ HCI_INIT(hci_read_local_codecs_sync), /* HCI_OP_READ_LOCAL_PAIRING_OPTS */ HCI_INIT(hci_read_local_pairing_opts_sync), /* HCI_OP_GET_MWS_TRANSPORT_CONFIG */ HCI_INIT(hci_get_mws_transport_config_sync), /* HCI_OP_READ_SYNC_TRAIN_PARAMS */ HCI_INIT(hci_read_sync_train_params_sync), /* HCI_OP_WRITE_SC_SUPPORT */ HCI_INIT(hci_write_sc_support_1_sync), /* HCI_OP_WRITE_DEF_ERR_DATA_REPORTING */ HCI_INIT(hci_set_err_data_report_sync), {} }; /* Set Suggested Default Data Length to maximum if supported */ static int hci_le_set_write_def_data_len_sync(struct hci_dev *hdev) { struct hci_cp_le_write_def_data_len cp; if (!(hdev->le_features[0] & HCI_LE_DATA_LEN_EXT)) return 0; memset(&cp, 0, sizeof(cp)); cp.tx_len = cpu_to_le16(hdev->le_max_tx_len); cp.tx_time = cpu_to_le16(hdev->le_max_tx_time); return __hci_cmd_sync_status(hdev, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } /* Set Default PHY parameters if command is supported, enables all supported * PHYs according to the LE Features bits. */ static int hci_le_set_default_phy_sync(struct hci_dev *hdev) { struct hci_cp_le_set_default_phy cp; if (!(hdev->commands[35] & 0x20)) { /* If the command is not supported it means only 1M PHY is * supported. */ hdev->le_tx_def_phys = HCI_LE_SET_PHY_1M; hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M; return 0; } memset(&cp, 0, sizeof(cp)); cp.all_phys = 0x00; cp.tx_phys = HCI_LE_SET_PHY_1M; cp.rx_phys = HCI_LE_SET_PHY_1M; /* Enables 2M PHY if supported */ if (le_2m_capable(hdev)) { cp.tx_phys |= HCI_LE_SET_PHY_2M; cp.rx_phys |= HCI_LE_SET_PHY_2M; } /* Enables Coded PHY if supported */ if (le_coded_capable(hdev)) { cp.tx_phys |= HCI_LE_SET_PHY_CODED; cp.rx_phys |= HCI_LE_SET_PHY_CODED; } return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_DEFAULT_PHY, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static const struct hci_init_stage le_init4[] = { /* HCI_OP_LE_WRITE_DEF_DATA_LEN */ HCI_INIT(hci_le_set_write_def_data_len_sync), /* HCI_OP_LE_SET_DEFAULT_PHY */ HCI_INIT(hci_le_set_default_phy_sync), {} }; static int hci_init4_sync(struct hci_dev *hdev) { int err; bt_dev_dbg(hdev, ""); err = hci_init_stage_sync(hdev, hci_init4); if (err) return err; if (lmp_le_capable(hdev)) return hci_init_stage_sync(hdev, le_init4); return 0; } static int hci_init_sync(struct hci_dev *hdev) { int err; err = hci_init1_sync(hdev); if (err < 0) return err; if (hci_dev_test_flag(hdev, HCI_SETUP)) hci_debugfs_create_basic(hdev); err = hci_init2_sync(hdev); if (err < 0) return err; err = hci_init3_sync(hdev); if (err < 0) return err; err = hci_init4_sync(hdev); if (err < 0) return err; /* This function is only called when the controller is actually in * configured state. When the controller is marked as unconfigured, * this initialization procedure is not run. * * It means that it is possible that a controller runs through its * setup phase and then discovers missing settings. If that is the * case, then this function will not be called. It then will only * be called during the config phase. * * So only when in setup phase or config phase, create the debugfs * entries and register the SMP channels. */ if (!hci_dev_test_flag(hdev, HCI_SETUP) && !hci_dev_test_flag(hdev, HCI_CONFIG)) return 0; if (hci_dev_test_and_set_flag(hdev, HCI_DEBUGFS_CREATED)) return 0; hci_debugfs_create_common(hdev); if (lmp_bredr_capable(hdev)) hci_debugfs_create_bredr(hdev); if (lmp_le_capable(hdev)) hci_debugfs_create_le(hdev); return 0; } #define HCI_QUIRK_BROKEN(_quirk, _desc) { HCI_QUIRK_BROKEN_##_quirk, _desc } static const struct { unsigned long quirk; const char *desc; } hci_broken_table[] = { HCI_QUIRK_BROKEN(LOCAL_COMMANDS, "HCI Read Local Supported Commands not supported"), HCI_QUIRK_BROKEN(STORED_LINK_KEY, "HCI Delete Stored Link Key command is advertised, " "but not supported."), HCI_QUIRK_BROKEN(ERR_DATA_REPORTING, "HCI Read Default Erroneous Data Reporting command is " "advertised, but not supported."), HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER, "HCI Read Transmit Power Level command is advertised, " "but not supported."), HCI_QUIRK_BROKEN(FILTER_CLEAR_ALL, "HCI Set Event Filter command not supported."), HCI_QUIRK_BROKEN(ENHANCED_SETUP_SYNC_CONN, "HCI Enhanced Setup Synchronous Connection command is " "advertised, but not supported."), HCI_QUIRK_BROKEN(SET_RPA_TIMEOUT, "HCI LE Set Random Private Address Timeout command is " "advertised, but not supported."), HCI_QUIRK_BROKEN(LE_CODED, "HCI LE Coded PHY feature bit is set, " "but its usage is not supported.") }; /* This function handles hdev setup stage: * * Calls hdev->setup * Setup address if HCI_QUIRK_USE_BDADDR_PROPERTY is set. */ static int hci_dev_setup_sync(struct hci_dev *hdev) { int ret = 0; bool invalid_bdaddr; size_t i; if (!hci_dev_test_flag(hdev, HCI_SETUP) && !test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) return 0; bt_dev_dbg(hdev, ""); hci_sock_dev_event(hdev, HCI_DEV_SETUP); if (hdev->setup) ret = hdev->setup(hdev); for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) { if (test_bit(hci_broken_table[i].quirk, &hdev->quirks)) bt_dev_warn(hdev, "%s", hci_broken_table[i].desc); } /* The transport driver can set the quirk to mark the * BD_ADDR invalid before creating the HCI device or in * its setup callback. */ invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) || test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); if (!ret) { if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks) && !bacmp(&hdev->public_addr, BDADDR_ANY)) hci_dev_get_bd_addr_from_property(hdev); if (invalid_bdaddr && bacmp(&hdev->public_addr, BDADDR_ANY) && hdev->set_bdaddr) { ret = hdev->set_bdaddr(hdev, &hdev->public_addr); if (!ret) invalid_bdaddr = false; } } /* The transport driver can set these quirks before * creating the HCI device or in its setup callback. * * For the invalid BD_ADDR quirk it is possible that * it becomes a valid address if the bootloader does * provide it (see above). * * In case any of them is set, the controller has to * start up as unconfigured. */ if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || invalid_bdaddr) hci_dev_set_flag(hdev, HCI_UNCONFIGURED); /* For an unconfigured controller it is required to * read at least the version information provided by * the Read Local Version Information command. * * If the set_bdaddr driver callback is provided, then * also the original Bluetooth public device address * will be read using the Read BD Address command. */ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) return hci_unconf_init_sync(hdev); return ret; } /* This function handles hdev init stage: * * Calls hci_dev_setup_sync to perform setup stage * Calls hci_init_sync to perform HCI command init sequence */ static int hci_dev_init_sync(struct hci_dev *hdev) { int ret; bt_dev_dbg(hdev, ""); atomic_set(&hdev->cmd_cnt, 1); set_bit(HCI_INIT, &hdev->flags); ret = hci_dev_setup_sync(hdev); if (hci_dev_test_flag(hdev, HCI_CONFIG)) { /* If public address change is configured, ensure that * the address gets programmed. If the driver does not * support changing the public address, fail the power * on procedure. */ if (bacmp(&hdev->public_addr, BDADDR_ANY) && hdev->set_bdaddr) ret = hdev->set_bdaddr(hdev, &hdev->public_addr); else ret = -EADDRNOTAVAIL; } if (!ret) { if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { ret = hci_init_sync(hdev); if (!ret && hdev->post_init) ret = hdev->post_init(hdev); } } /* If the HCI Reset command is clearing all diagnostic settings, * then they need to be reprogrammed after the init procedure * completed. */ if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag) ret = hdev->set_diag(hdev, true); if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { msft_do_open(hdev); aosp_do_open(hdev); } clear_bit(HCI_INIT, &hdev->flags); return ret; } int hci_dev_open_sync(struct hci_dev *hdev) { int ret; bt_dev_dbg(hdev, ""); if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { ret = -ENODEV; goto done; } if (!hci_dev_test_flag(hdev, HCI_SETUP) && !hci_dev_test_flag(hdev, HCI_CONFIG)) { /* Check for rfkill but allow the HCI setup stage to * proceed (which in itself doesn't cause any RF activity). */ if (hci_dev_test_flag(hdev, HCI_RFKILLED)) { ret = -ERFKILL; goto done; } /* Check for valid public address or a configured static * random address, but let the HCI setup proceed to * be able to determine if there is a public address * or not. * * In case of user channel usage, it is not important * if a public address or static random address is * available. */ if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && !bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY)) { ret = -EADDRNOTAVAIL; goto done; } } if (test_bit(HCI_UP, &hdev->flags)) { ret = -EALREADY; goto done; } if (hdev->open(hdev)) { ret = -EIO; goto done; } hci_devcd_reset(hdev); set_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_OPEN); ret = hci_dev_init_sync(hdev); if (!ret) { hci_dev_hold(hdev); hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); hci_adv_instances_set_rpa_expired(hdev, true); set_bit(HCI_UP, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_UP); hci_leds_update_powered(hdev, true); if (!hci_dev_test_flag(hdev, HCI_SETUP) && !hci_dev_test_flag(hdev, HCI_CONFIG) && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_MGMT)) { ret = hci_powered_update_sync(hdev); mgmt_power_on(hdev, ret); } } else { /* Init failed, cleanup */ flush_work(&hdev->tx_work); /* Since hci_rx_work() is possible to awake new cmd_work * it should be flushed first to avoid unexpected call of * hci_cmd_work() */ flush_work(&hdev->rx_work); flush_work(&hdev->cmd_work); skb_queue_purge(&hdev->cmd_q); skb_queue_purge(&hdev->rx_q); if (hdev->flush) hdev->flush(hdev); if (hdev->sent_cmd) { cancel_delayed_work_sync(&hdev->cmd_timer); kfree_skb(hdev->sent_cmd); hdev->sent_cmd = NULL; } if (hdev->req_skb) { kfree_skb(hdev->req_skb); hdev->req_skb = NULL; } clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); hdev->close(hdev); hdev->flags &= BIT(HCI_RAW); } done: return ret; } /* This function requires the caller holds hdev->lock */ static void hci_pend_le_actions_clear(struct hci_dev *hdev) { struct hci_conn_params *p; list_for_each_entry(p, &hdev->le_conn_params, list) { hci_pend_le_list_del_init(p); if (p->conn) { hci_conn_drop(p->conn); hci_conn_put(p->conn); p->conn = NULL; } } BT_DBG("All LE pending actions cleared"); } static int hci_dev_shutdown(struct hci_dev *hdev) { int err = 0; /* Similar to how we first do setup and then set the exclusive access * bit for userspace, we must first unset userchannel and then clean up. * Otherwise, the kernel can't properly use the hci channel to clean up * the controller (some shutdown routines require sending additional * commands to the controller for example). */ bool was_userchannel = hci_dev_test_and_clear_flag(hdev, HCI_USER_CHANNEL); if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && test_bit(HCI_UP, &hdev->flags)) { /* Execute vendor specific shutdown routine */ if (hdev->shutdown) err = hdev->shutdown(hdev); } if (was_userchannel) hci_dev_set_flag(hdev, HCI_USER_CHANNEL); return err; } int hci_dev_close_sync(struct hci_dev *hdev) { bool auto_off; int err = 0; bt_dev_dbg(hdev, ""); cancel_delayed_work(&hdev->power_off); cancel_delayed_work(&hdev->ncmd_timer); cancel_delayed_work(&hdev->le_scan_disable); hci_cmd_sync_cancel_sync(hdev, ENODEV); cancel_interleave_scan(hdev); if (hdev->adv_instance_timeout) { cancel_delayed_work_sync(&hdev->adv_instance_expire); hdev->adv_instance_timeout = 0; } err = hci_dev_shutdown(hdev); if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { cancel_delayed_work_sync(&hdev->cmd_timer); return err; } hci_leds_update_powered(hdev, false); /* Flush RX and TX works */ flush_work(&hdev->tx_work); flush_work(&hdev->rx_work); if (hdev->discov_timeout > 0) { hdev->discov_timeout = 0; hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); } if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) cancel_delayed_work(&hdev->service_cache); if (hci_dev_test_flag(hdev, HCI_MGMT)) { struct adv_info *adv_instance; cancel_delayed_work_sync(&hdev->rpa_expired); list_for_each_entry(adv_instance, &hdev->adv_instances, list) cancel_delayed_work_sync(&adv_instance->rpa_expired_cb); } /* Avoid potential lockdep warnings from the *_flush() calls by * ensuring the workqueue is empty up front. */ drain_workqueue(hdev->workqueue); hci_dev_lock(hdev); hci_discovery_set_state(hdev, DISCOVERY_STOPPED); auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); if (!auto_off && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_MGMT)) __mgmt_power_off(hdev); hci_inquiry_cache_flush(hdev); hci_pend_le_actions_clear(hdev); hci_conn_hash_flush(hdev); /* Prevent data races on hdev->smp_data or hdev->smp_bredr_data */ smp_unregister(hdev); hci_dev_unlock(hdev); hci_sock_dev_event(hdev, HCI_DEV_DOWN); if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { aosp_do_close(hdev); msft_do_close(hdev); } if (hdev->flush) hdev->flush(hdev); /* Reset device */ skb_queue_purge(&hdev->cmd_q); atomic_set(&hdev->cmd_cnt, 1); if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) && !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { set_bit(HCI_INIT, &hdev->flags); hci_reset_sync(hdev); clear_bit(HCI_INIT, &hdev->flags); } /* flush cmd work */ flush_work(&hdev->cmd_work); /* Drop queues */ skb_queue_purge(&hdev->rx_q); skb_queue_purge(&hdev->cmd_q); skb_queue_purge(&hdev->raw_q); /* Drop last sent command */ if (hdev->sent_cmd) { cancel_delayed_work_sync(&hdev->cmd_timer); kfree_skb(hdev->sent_cmd); hdev->sent_cmd = NULL; } /* Drop last request */ if (hdev->req_skb) { kfree_skb(hdev->req_skb); hdev->req_skb = NULL; } clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); /* After this point our queues are empty and no tasks are scheduled. */ hdev->close(hdev); /* Clear flags */ hdev->flags &= BIT(HCI_RAW); hci_dev_clear_volatile_flags(hdev); memset(hdev->eir, 0, sizeof(hdev->eir)); memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); bacpy(&hdev->random_addr, BDADDR_ANY); hci_codec_list_clear(&hdev->local_codecs); hci_dev_put(hdev); return err; } /* This function perform power on HCI command sequence as follows: * * If controller is already up (HCI_UP) performs hci_powered_update_sync * sequence otherwise run hci_dev_open_sync which will follow with * hci_powered_update_sync after the init sequence is completed. */ static int hci_power_on_sync(struct hci_dev *hdev) { int err; if (test_bit(HCI_UP, &hdev->flags) && hci_dev_test_flag(hdev, HCI_MGMT) && hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { cancel_delayed_work(&hdev->power_off); return hci_powered_update_sync(hdev); } err = hci_dev_open_sync(hdev); if (err < 0) return err; /* During the HCI setup phase, a few error conditions are * ignored and they need to be checked now. If they are still * valid, it is important to return the device back off. */ if (hci_dev_test_flag(hdev, HCI_RFKILLED) || hci_dev_test_flag(hdev, HCI_UNCONFIGURED) || (!bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY))) { hci_dev_clear_flag(hdev, HCI_AUTO_OFF); hci_dev_close_sync(hdev); } else if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) { queue_delayed_work(hdev->req_workqueue, &hdev->power_off, HCI_AUTO_OFF_TIMEOUT); } if (hci_dev_test_and_clear_flag(hdev, HCI_SETUP)) { /* For unconfigured devices, set the HCI_RAW flag * so that userspace can easily identify them. */ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) set_bit(HCI_RAW, &hdev->flags); /* For fully configured devices, this will send * the Index Added event. For unconfigured devices, * it will send Unconfigued Index Added event. * * Devices with HCI_QUIRK_RAW_DEVICE are ignored * and no event will be send. */ mgmt_index_added(hdev); } else if (hci_dev_test_and_clear_flag(hdev, HCI_CONFIG)) { /* When the controller is now configured, then it * is important to clear the HCI_RAW flag. */ if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) clear_bit(HCI_RAW, &hdev->flags); /* Powering on the controller with HCI_CONFIG set only * happens with the transition from unconfigured to * configured. This will send the Index Added event. */ mgmt_index_added(hdev); } return 0; } static int hci_remote_name_cancel_sync(struct hci_dev *hdev, bdaddr_t *addr) { struct hci_cp_remote_name_req_cancel cp; memset(&cp, 0, sizeof(cp)); bacpy(&cp.bdaddr, addr); return __hci_cmd_sync_status(hdev, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } int hci_stop_discovery_sync(struct hci_dev *hdev) { struct discovery_state *d = &hdev->discovery; struct inquiry_entry *e; int err; bt_dev_dbg(hdev, "state %u", hdev->discovery.state); if (d->state == DISCOVERY_FINDING || d->state == DISCOVERY_STOPPING) { if (test_bit(HCI_INQUIRY, &hdev->flags)) { err = __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); if (err) return err; } if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { cancel_delayed_work(&hdev->le_scan_disable); err = hci_scan_disable_sync(hdev); if (err) return err; } } else { err = hci_scan_disable_sync(hdev); if (err) return err; } /* Resume advertising if it was paused */ if (use_ll_privacy(hdev)) hci_resume_advertising_sync(hdev); /* No further actions needed for LE-only discovery */ if (d->type == DISCOV_TYPE_LE) return 0; if (d->state == DISCOVERY_RESOLVING || d->state == DISCOVERY_STOPPING) { e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_PENDING); if (!e) return 0; return hci_remote_name_cancel_sync(hdev, &e->data.bdaddr); } return 0; } static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { struct hci_cp_disconnect cp; if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) { /* This is a BIS connection, hci_conn_del will * do the necessary cleanup. */ hci_dev_lock(hdev); hci_conn_failed(conn, reason); hci_dev_unlock(hdev); return 0; } memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(conn->handle); cp.reason = reason; /* Wait for HCI_EV_DISCONN_COMPLETE, not HCI_EV_CMD_STATUS, when the * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is * used when suspending or powering off, where we don't want to wait * for the peer's response. */ if (reason != HCI_ERROR_REMOTE_POWER_OFF) return __hci_cmd_sync_status_sk(hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp, HCI_EV_DISCONN_COMPLETE, HCI_CMD_TIMEOUT, NULL); return __hci_cmd_sync_status(hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_le_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { /* Return reason if scanning since the connection shall probably be * cleanup directly. */ if (test_bit(HCI_CONN_SCANNING, &conn->flags)) return reason; if (conn->role == HCI_ROLE_SLAVE || test_and_set_bit(HCI_CONN_CANCEL, &conn->flags)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); } static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { if (conn->type == LE_LINK) return hci_le_connect_cancel_sync(hdev, conn, reason); if (conn->type == ISO_LINK) { /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E * page 1857: * * If this command is issued for a CIS on the Central and the * CIS is successfully terminated before being established, * then an HCI_LE_CIS_Established event shall also be sent for * this CIS with the Status Operation Cancelled by Host (0x44). */ if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) return hci_disconnect_sync(hdev, conn, reason); /* CIS with no Create CIS sent have nothing to cancel */ if (bacmp(&conn->dst, BDADDR_ANY)) return HCI_ERROR_LOCAL_HOST_TERM; /* There is no way to cancel a BIS without terminating the BIG * which is done later on connection cleanup. */ return 0; } if (hdev->hci_ver < BLUETOOTH_VER_1_2) return 0; /* Wait for HCI_EV_CONN_COMPLETE, not HCI_EV_CMD_STATUS, when the * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is * used when suspending or powering off, where we don't want to wait * for the peer's response. */ if (reason != HCI_ERROR_REMOTE_POWER_OFF) return __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN_CANCEL, 6, &conn->dst, HCI_EV_CONN_COMPLETE, HCI_CMD_TIMEOUT, NULL); return __hci_cmd_sync_status(hdev, HCI_OP_CREATE_CONN_CANCEL, 6, &conn->dst, HCI_CMD_TIMEOUT); } static int hci_reject_sco_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { struct hci_cp_reject_sync_conn_req cp; memset(&cp, 0, sizeof(cp)); bacpy(&cp.bdaddr, &conn->dst); cp.reason = reason; /* SCO rejection has its own limited set of * allowed error values (0x0D-0x0F). */ if (reason < 0x0d || reason > 0x0f) cp.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; return __hci_cmd_sync_status(hdev, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_le_reject_cis_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { struct hci_cp_le_reject_cis cp; memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(conn->handle); cp.reason = reason; return __hci_cmd_sync_status(hdev, HCI_OP_LE_REJECT_CIS, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { struct hci_cp_reject_conn_req cp; if (conn->type == ISO_LINK) return hci_le_reject_cis_sync(hdev, conn, reason); if (conn->type == SCO_LINK || conn->type == ESCO_LINK) return hci_reject_sco_sync(hdev, conn, reason); memset(&cp, 0, sizeof(cp)); bacpy(&cp.bdaddr, &conn->dst); cp.reason = reason; return __hci_cmd_sync_status(hdev, HCI_OP_REJECT_CONN_REQ, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { int err = 0; u16 handle = conn->handle; bool disconnect = false; struct hci_conn *c; switch (conn->state) { case BT_CONNECTED: case BT_CONFIG: err = hci_disconnect_sync(hdev, conn, reason); break; case BT_CONNECT: err = hci_connect_cancel_sync(hdev, conn, reason); break; case BT_CONNECT2: err = hci_reject_conn_sync(hdev, conn, reason); break; case BT_OPEN: case BT_BOUND: break; default: disconnect = true; break; } hci_dev_lock(hdev); /* Check if the connection has been cleaned up concurrently */ c = hci_conn_hash_lookup_handle(hdev, handle); if (!c || c != conn) { err = 0; goto unlock; } /* Cleanup hci_conn object if it cannot be cancelled as it * likelly means the controller and host stack are out of sync * or in case of LE it was still scanning so it can be cleanup * safely. */ if (disconnect) { conn->state = BT_CLOSED; hci_disconn_cfm(conn, reason); hci_conn_del(conn); } else { hci_conn_failed(conn, reason); } unlock: hci_dev_unlock(hdev); return err; } static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason) { struct list_head *head = &hdev->conn_hash.list; struct hci_conn *conn; rcu_read_lock(); while ((conn = list_first_or_null_rcu(head, struct hci_conn, list))) { /* Make sure the connection is not freed while unlocking */ conn = hci_conn_get(conn); rcu_read_unlock(); /* Disregard possible errors since hci_conn_del shall have been * called even in case of errors had occurred since it would * then cause hci_conn_failed to be called which calls * hci_conn_del internally. */ hci_abort_conn_sync(hdev, conn, reason); hci_conn_put(conn); rcu_read_lock(); } rcu_read_unlock(); return 0; } /* This function perform power off HCI command sequence as follows: * * Clear Advertising * Stop Discovery * Disconnect all connections * hci_dev_close_sync */ static int hci_power_off_sync(struct hci_dev *hdev) { int err; /* If controller is already down there is nothing to do */ if (!test_bit(HCI_UP, &hdev->flags)) return 0; hci_dev_set_flag(hdev, HCI_POWERING_DOWN); if (test_bit(HCI_ISCAN, &hdev->flags) || test_bit(HCI_PSCAN, &hdev->flags)) { err = hci_write_scan_enable_sync(hdev, 0x00); if (err) goto out; } err = hci_clear_adv_sync(hdev, NULL, false); if (err) goto out; err = hci_stop_discovery_sync(hdev); if (err) goto out; /* Terminated due to Power Off */ err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF); if (err) goto out; err = hci_dev_close_sync(hdev); out: hci_dev_clear_flag(hdev, HCI_POWERING_DOWN); return err; } int hci_set_powered_sync(struct hci_dev *hdev, u8 val) { if (val) return hci_power_on_sync(hdev); return hci_power_off_sync(hdev); } static int hci_write_iac_sync(struct hci_dev *hdev) { struct hci_cp_write_current_iac_lap cp; if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) return 0; memset(&cp, 0, sizeof(cp)); if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) { /* Limited discoverable mode */ cp.num_iac = min_t(u8, hdev->num_iac, 2); cp.iac_lap[0] = 0x00; /* LIAC */ cp.iac_lap[1] = 0x8b; cp.iac_lap[2] = 0x9e; cp.iac_lap[3] = 0x33; /* GIAC */ cp.iac_lap[4] = 0x8b; cp.iac_lap[5] = 0x9e; } else { /* General discoverable mode */ cp.num_iac = 1; cp.iac_lap[0] = 0x33; /* GIAC */ cp.iac_lap[1] = 0x8b; cp.iac_lap[2] = 0x9e; } return __hci_cmd_sync_status(hdev, HCI_OP_WRITE_CURRENT_IAC_LAP, (cp.num_iac * 3) + 1, &cp, HCI_CMD_TIMEOUT); } int hci_update_discoverable_sync(struct hci_dev *hdev) { int err = 0; if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = hci_write_iac_sync(hdev); if (err) return err; err = hci_update_scan_sync(hdev); if (err) return err; err = hci_update_class_sync(hdev); if (err) return err; } /* Advertising instances don't use the global discoverable setting, so * only update AD if advertising was enabled using Set Advertising. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) { err = hci_update_adv_data_sync(hdev, 0x00); if (err) return err; /* Discoverable mode affects the local advertising * address in limited privacy mode. */ if (hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY)) { if (ext_adv_capable(hdev)) err = hci_start_ext_adv_sync(hdev, 0x00); else err = hci_enable_advertising_sync(hdev); } } return err; } static int update_discoverable_sync(struct hci_dev *hdev, void *data) { return hci_update_discoverable_sync(hdev); } int hci_update_discoverable(struct hci_dev *hdev) { /* Only queue if it would have any effect */ if (hdev_is_powered(hdev) && hci_dev_test_flag(hdev, HCI_ADVERTISING) && hci_dev_test_flag(hdev, HCI_DISCOVERABLE) && hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY)) return hci_cmd_sync_queue(hdev, update_discoverable_sync, NULL, NULL); return 0; } int hci_update_connectable_sync(struct hci_dev *hdev) { int err; err = hci_update_scan_sync(hdev); if (err) return err; /* If BR/EDR is not enabled and we disable advertising as a * by-product of disabling connectable, we need to update the * advertising flags. */ if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) err = hci_update_adv_data_sync(hdev, hdev->cur_adv_instance); /* Update the advertising parameters if necessary */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || !list_empty(&hdev->adv_instances)) { if (ext_adv_capable(hdev)) err = hci_start_ext_adv_sync(hdev, hdev->cur_adv_instance); else err = hci_enable_advertising_sync(hdev); if (err) return err; } return hci_update_passive_scan_sync(hdev); } int hci_inquiry_sync(struct hci_dev *hdev, u8 length, u8 num_rsp) { const u8 giac[3] = { 0x33, 0x8b, 0x9e }; const u8 liac[3] = { 0x00, 0x8b, 0x9e }; struct hci_cp_inquiry cp; bt_dev_dbg(hdev, ""); if (test_bit(HCI_INQUIRY, &hdev->flags)) return 0; hci_dev_lock(hdev); hci_inquiry_cache_flush(hdev); hci_dev_unlock(hdev); memset(&cp, 0, sizeof(cp)); if (hdev->discovery.limited) memcpy(&cp.lap, liac, sizeof(cp.lap)); else memcpy(&cp.lap, giac, sizeof(cp.lap)); cp.length = length; cp.num_rsp = num_rsp; return __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval) { u8 own_addr_type; /* Accept list is not used for discovery */ u8 filter_policy = 0x00; /* Default is to enable duplicates filter */ u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE; int err; bt_dev_dbg(hdev, ""); /* If controller is scanning, it means the passive scanning is * running. Thus, we should temporarily stop it in order to set the * discovery scanning parameters. */ err = hci_scan_disable_sync(hdev); if (err) { bt_dev_err(hdev, "Unable to disable scanning: %d", err); return err; } cancel_interleave_scan(hdev); /* Pause address resolution for active scan and stop advertising if * privacy is enabled. */ err = hci_pause_addr_resolution(hdev); if (err) goto failed; /* All active scans will be done with either a resolvable private * address (when privacy feature has been enabled) or non-resolvable * private address. */ err = hci_update_random_address_sync(hdev, true, scan_use_rpa(hdev), &own_addr_type); if (err < 0) own_addr_type = ADDR_LE_DEV_PUBLIC; if (hci_is_adv_monitoring(hdev) || (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && hdev->discovery.result_filtering)) { /* Duplicate filter should be disabled when some advertisement * monitor is activated, otherwise AdvMon can only receive one * advertisement for one peer(*) during active scanning, and * might report loss to these peers. * * If controller does strict duplicate filtering and the * discovery requires result filtering disables controller based * filtering since that can cause reports that would match the * host filter to not be reported. */ filter_dup = LE_SCAN_FILTER_DUP_DISABLE; } err = hci_start_scan_sync(hdev, LE_SCAN_ACTIVE, interval, hdev->le_scan_window_discovery, own_addr_type, filter_policy, filter_dup); if (!err) return err; failed: /* Resume advertising if it was paused */ if (use_ll_privacy(hdev)) hci_resume_advertising_sync(hdev); /* Resume passive scanning */ hci_update_passive_scan_sync(hdev); return err; } static int hci_start_interleaved_discovery_sync(struct hci_dev *hdev) { int err; bt_dev_dbg(hdev, ""); err = hci_active_scan_sync(hdev, hdev->le_scan_int_discovery * 2); if (err) return err; return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN, 0); } int hci_start_discovery_sync(struct hci_dev *hdev) { unsigned long timeout; int err; bt_dev_dbg(hdev, "type %u", hdev->discovery.type); switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN, 0); case DISCOV_TYPE_INTERLEAVED: /* When running simultaneous discovery, the LE scanning time * should occupy the whole discovery time sine BR/EDR inquiry * and LE scanning are scheduled by the controller. * * For interleaving discovery in comparison, BR/EDR inquiry * and LE scanning are done sequentially with separate * timeouts. */ if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) { timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); /* During simultaneous discovery, we double LE scan * interval. We must leave some time for the controller * to do BR/EDR inquiry. */ err = hci_start_interleaved_discovery_sync(hdev); break; } timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); err = hci_active_scan_sync(hdev, hdev->le_scan_int_discovery); break; case DISCOV_TYPE_LE: timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); err = hci_active_scan_sync(hdev, hdev->le_scan_int_discovery); break; default: return -EINVAL; } if (err) return err; bt_dev_dbg(hdev, "timeout %u ms", jiffies_to_msecs(timeout)); queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable, timeout); return 0; } static void hci_suspend_monitor_sync(struct hci_dev *hdev) { switch (hci_get_adv_monitor_offload_ext(hdev)) { case HCI_ADV_MONITOR_EXT_MSFT: msft_suspend_sync(hdev); break; default: return; } } /* This function disables discovery and mark it as paused */ static int hci_pause_discovery_sync(struct hci_dev *hdev) { int old_state = hdev->discovery.state; int err; /* If discovery already stopped/stopping/paused there nothing to do */ if (old_state == DISCOVERY_STOPPED || old_state == DISCOVERY_STOPPING || hdev->discovery_paused) return 0; hci_discovery_set_state(hdev, DISCOVERY_STOPPING); err = hci_stop_discovery_sync(hdev); if (err) return err; hdev->discovery_paused = true; hci_discovery_set_state(hdev, DISCOVERY_STOPPED); return 0; } static int hci_update_event_filter_sync(struct hci_dev *hdev) { struct bdaddr_list_with_flags *b; u8 scan = SCAN_DISABLED; bool scanning = test_bit(HCI_PSCAN, &hdev->flags); int err; if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) return 0; /* Some fake CSR controllers lock up after setting this type of * filter, so avoid sending the request altogether. */ if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks)) return 0; /* Always clear event filter when starting */ hci_clear_event_filter_sync(hdev); list_for_each_entry(b, &hdev->accept_list, list) { if (!(b->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) continue; bt_dev_dbg(hdev, "Adding event filters for %pMR", &b->bdaddr); err = hci_set_event_filter_sync(hdev, HCI_FLT_CONN_SETUP, HCI_CONN_SETUP_ALLOW_BDADDR, &b->bdaddr, HCI_CONN_SETUP_AUTO_ON); if (err) bt_dev_dbg(hdev, "Failed to set event filter for %pMR", &b->bdaddr); else scan = SCAN_PAGE; } if (scan && !scanning) hci_write_scan_enable_sync(hdev, scan); else if (!scan && scanning) hci_write_scan_enable_sync(hdev, scan); return 0; } /* This function disables scan (BR and LE) and mark it as paused */ static int hci_pause_scan_sync(struct hci_dev *hdev) { if (hdev->scanning_paused) return 0; /* Disable page scan if enabled */ if (test_bit(HCI_PSCAN, &hdev->flags)) hci_write_scan_enable_sync(hdev, SCAN_DISABLED); hci_scan_disable_sync(hdev); hdev->scanning_paused = true; return 0; } /* This function performs the HCI suspend procedures in the follow order: * * Pause discovery (active scanning/inquiry) * Pause Directed Advertising/Advertising * Pause Scanning (passive scanning in case discovery was not active) * Disconnect all connections * Set suspend_status to BT_SUSPEND_DISCONNECT if hdev cannot wakeup * otherwise: * Update event mask (only set events that are allowed to wake up the host) * Update event filter (with devices marked with HCI_CONN_FLAG_REMOTE_WAKEUP) * Update passive scanning (lower duty cycle) * Set suspend_status to BT_SUSPEND_CONFIGURE_WAKE */ int hci_suspend_sync(struct hci_dev *hdev) { int err; /* If marked as suspended there nothing to do */ if (hdev->suspended) return 0; /* Mark device as suspended */ hdev->suspended = true; /* Pause discovery if not already stopped */ hci_pause_discovery_sync(hdev); /* Pause other advertisements */ hci_pause_advertising_sync(hdev); /* Suspend monitor filters */ hci_suspend_monitor_sync(hdev); /* Prevent disconnects from causing scanning to be re-enabled */ hci_pause_scan_sync(hdev); if (hci_conn_count(hdev)) { /* Soft disconnect everything (power off) */ err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF); if (err) { /* Set state to BT_RUNNING so resume doesn't notify */ hdev->suspend_state = BT_RUNNING; hci_resume_sync(hdev); return err; } /* Update event mask so only the allowed event can wakeup the * host. */ hci_set_event_mask_sync(hdev); } /* Only configure accept list if disconnect succeeded and wake * isn't being prevented. */ if (!hdev->wakeup || !hdev->wakeup(hdev)) { hdev->suspend_state = BT_SUSPEND_DISCONNECT; return 0; } /* Unpause to take care of updating scanning params */ hdev->scanning_paused = false; /* Enable event filter for paired devices */ hci_update_event_filter_sync(hdev); /* Update LE passive scan if enabled */ hci_update_passive_scan_sync(hdev); /* Pause scan changes again. */ hdev->scanning_paused = true; hdev->suspend_state = BT_SUSPEND_CONFIGURE_WAKE; return 0; } /* This function resumes discovery */ static int hci_resume_discovery_sync(struct hci_dev *hdev) { int err; /* If discovery not paused there nothing to do */ if (!hdev->discovery_paused) return 0; hdev->discovery_paused = false; hci_discovery_set_state(hdev, DISCOVERY_STARTING); err = hci_start_discovery_sync(hdev); hci_discovery_set_state(hdev, err ? DISCOVERY_STOPPED : DISCOVERY_FINDING); return err; } static void hci_resume_monitor_sync(struct hci_dev *hdev) { switch (hci_get_adv_monitor_offload_ext(hdev)) { case HCI_ADV_MONITOR_EXT_MSFT: msft_resume_sync(hdev); break; default: return; } } /* This function resume scan and reset paused flag */ static int hci_resume_scan_sync(struct hci_dev *hdev) { if (!hdev->scanning_paused) return 0; hdev->scanning_paused = false; hci_update_scan_sync(hdev); /* Reset passive scanning to normal */ hci_update_passive_scan_sync(hdev); return 0; } /* This function performs the HCI suspend procedures in the follow order: * * Restore event mask * Clear event filter * Update passive scanning (normal duty cycle) * Resume Directed Advertising/Advertising * Resume discovery (active scanning/inquiry) */ int hci_resume_sync(struct hci_dev *hdev) { /* If not marked as suspended there nothing to do */ if (!hdev->suspended) return 0; hdev->suspended = false; /* Restore event mask */ hci_set_event_mask_sync(hdev); /* Clear any event filters and restore scan state */ hci_clear_event_filter_sync(hdev); /* Resume scanning */ hci_resume_scan_sync(hdev); /* Resume monitor filters */ hci_resume_monitor_sync(hdev); /* Resume other advertisements */ hci_resume_advertising_sync(hdev); /* Resume discovery */ hci_resume_discovery_sync(hdev); return 0; } static bool conn_use_rpa(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; return hci_dev_test_flag(hdev, HCI_PRIVACY); } static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev, struct hci_conn *conn) { struct hci_cp_le_set_ext_adv_params cp; int err; bdaddr_t random_addr; u8 own_addr_type; err = hci_update_random_address_sync(hdev, false, conn_use_rpa(conn), &own_addr_type); if (err) return err; /* Set require_privacy to false so that the remote device has a * chance of identifying us. */ err = hci_get_random_address(hdev, false, conn_use_rpa(conn), NULL, &own_addr_type, &random_addr); if (err) return err; memset(&cp, 0, sizeof(cp)); cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_DIRECT_IND); cp.channel_map = hdev->le_adv_channel_map; cp.tx_power = HCI_TX_POWER_INVALID; cp.primary_phy = HCI_ADV_PHY_1M; cp.secondary_phy = HCI_ADV_PHY_1M; cp.handle = 0x00; /* Use instance 0 for directed adv */ cp.own_addr_type = own_addr_type; cp.peer_addr_type = conn->dst_type; bacpy(&cp.peer_addr, &conn->dst); /* As per Core Spec 5.2 Vol 2, PART E, Sec 7.8.53, for * advertising_event_property LE_LEGACY_ADV_DIRECT_IND * does not supports advertising data when the advertising set already * contains some, the controller shall return erroc code 'Invalid * HCI Command Parameters(0x12). * So it is required to remove adv set for handle 0x00. since we use * instance 0 for directed adv. */ err = hci_remove_ext_adv_instance_sync(hdev, cp.handle, NULL); if (err) return err; err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp, HCI_CMD_TIMEOUT); if (err) return err; /* Check if random address need to be updated */ if (own_addr_type == ADDR_LE_DEV_RANDOM && bacmp(&random_addr, BDADDR_ANY) && bacmp(&random_addr, &hdev->random_addr)) { err = hci_set_adv_set_random_addr_sync(hdev, 0x00, &random_addr); if (err) return err; } return hci_enable_ext_advertising_sync(hdev, 0x00); } static int hci_le_directed_advertising_sync(struct hci_dev *hdev, struct hci_conn *conn) { struct hci_cp_le_set_adv_param cp; u8 status; u8 own_addr_type; u8 enable; if (ext_adv_capable(hdev)) return hci_le_ext_directed_advertising_sync(hdev, conn); /* Clear the HCI_LE_ADV bit temporarily so that the * hci_update_random_address knows that it's safe to go ahead * and write a new random address. The flag will be set back on * as soon as the SET_ADV_ENABLE HCI command completes. */ hci_dev_clear_flag(hdev, HCI_LE_ADV); /* Set require_privacy to false so that the remote device has a * chance of identifying us. */ status = hci_update_random_address_sync(hdev, false, conn_use_rpa(conn), &own_addr_type); if (status) return status; memset(&cp, 0, sizeof(cp)); /* Some controllers might reject command if intervals are not * within range for undirected advertising. * BCM20702A0 is known to be affected by this. */ cp.min_interval = cpu_to_le16(0x0020); cp.max_interval = cpu_to_le16(0x0020); cp.type = LE_ADV_DIRECT_IND; cp.own_address_type = own_addr_type; cp.direct_addr_type = conn->dst_type; bacpy(&cp.direct_addr, &conn->dst); cp.channel_map = hdev->le_adv_channel_map; status = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp, HCI_CMD_TIMEOUT); if (status) return status; enable = 0x01; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable, HCI_CMD_TIMEOUT); } static void set_ext_conn_params(struct hci_conn *conn, struct hci_cp_le_ext_conn_param *p) { struct hci_dev *hdev = conn->hdev; memset(p, 0, sizeof(*p)); p->scan_interval = cpu_to_le16(hdev->le_scan_int_connect); p->scan_window = cpu_to_le16(hdev->le_scan_window_connect); p->conn_interval_min = cpu_to_le16(conn->le_conn_min_interval); p->conn_interval_max = cpu_to_le16(conn->le_conn_max_interval); p->conn_latency = cpu_to_le16(conn->le_conn_latency); p->supervision_timeout = cpu_to_le16(conn->le_supv_timeout); p->min_ce_len = cpu_to_le16(0x0000); p->max_ce_len = cpu_to_le16(0x0000); } static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 own_addr_type) { struct hci_cp_le_ext_create_conn *cp; struct hci_cp_le_ext_conn_param *p; u8 data[sizeof(*cp) + sizeof(*p) * 3]; u32 plen; cp = (void *)data; p = (void *)cp->data; memset(cp, 0, sizeof(*cp)); bacpy(&cp->peer_addr, &conn->dst); cp->peer_addr_type = conn->dst_type; cp->own_addr_type = own_addr_type; plen = sizeof(*cp); if (scan_1m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_1M || conn->le_adv_sec_phy == HCI_ADV_PHY_1M)) { cp->phys |= LE_SCAN_PHY_1M; set_ext_conn_params(conn, p); p++; plen += sizeof(*p); } if (scan_2m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_2M || conn->le_adv_sec_phy == HCI_ADV_PHY_2M)) { cp->phys |= LE_SCAN_PHY_2M; set_ext_conn_params(conn, p); p++; plen += sizeof(*p); } if (scan_coded(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_CODED || conn->le_adv_sec_phy == HCI_ADV_PHY_CODED)) { cp->phys |= LE_SCAN_PHY_CODED; set_ext_conn_params(conn, p); plen += sizeof(*p); } return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_EXT_CREATE_CONN, plen, data, HCI_EV_LE_ENHANCED_CONN_COMPLETE, conn->conn_timeout, NULL); } static int hci_le_create_conn_sync(struct hci_dev *hdev, void *data) { struct hci_cp_le_create_conn cp; struct hci_conn_params *params; u8 own_addr_type; int err; struct hci_conn *conn = data; if (!hci_conn_valid(hdev, conn)) return -ECANCELED; bt_dev_dbg(hdev, "conn %p", conn); clear_bit(HCI_CONN_SCANNING, &conn->flags); conn->state = BT_CONNECT; /* If requested to connect as peripheral use directed advertising */ if (conn->role == HCI_ROLE_SLAVE) { /* If we're active scanning and simultaneous roles is not * enabled simply reject the attempt. */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->le_scan_type == LE_SCAN_ACTIVE && !hci_dev_test_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES)) { hci_conn_del(conn); return -EBUSY; } /* Pause advertising while doing directed advertising. */ hci_pause_advertising_sync(hdev); err = hci_le_directed_advertising_sync(hdev, conn); goto done; } /* Disable advertising if simultaneous roles is not in use. */ if (!hci_dev_test_flag(hdev, HCI_LE_SIMULTANEOUS_ROLES)) hci_pause_advertising_sync(hdev); params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); if (params) { conn->le_conn_min_interval = params->conn_min_interval; conn->le_conn_max_interval = params->conn_max_interval; conn->le_conn_latency = params->conn_latency; conn->le_supv_timeout = params->supervision_timeout; } else { conn->le_conn_min_interval = hdev->le_conn_min_interval; conn->le_conn_max_interval = hdev->le_conn_max_interval; conn->le_conn_latency = hdev->le_conn_latency; conn->le_supv_timeout = hdev->le_supv_timeout; } /* If controller is scanning, we stop it since some controllers are * not able to scan and connect at the same time. Also set the * HCI_LE_SCAN_INTERRUPTED flag so that the command complete * handler for scan disabling knows to set the correct discovery * state. */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { hci_scan_disable_sync(hdev); hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED); } /* Update random address, but set require_privacy to false so * that we never connect with an non-resolvable address. */ err = hci_update_random_address_sync(hdev, false, conn_use_rpa(conn), &own_addr_type); if (err) goto done; if (use_ext_conn(hdev)) { err = hci_le_ext_create_conn_sync(hdev, conn, own_addr_type); goto done; } memset(&cp, 0, sizeof(cp)); cp.scan_interval = cpu_to_le16(hdev->le_scan_int_connect); cp.scan_window = cpu_to_le16(hdev->le_scan_window_connect); bacpy(&cp.peer_addr, &conn->dst); cp.peer_addr_type = conn->dst_type; cp.own_address_type = own_addr_type; cp.conn_interval_min = cpu_to_le16(conn->le_conn_min_interval); cp.conn_interval_max = cpu_to_le16(conn->le_conn_max_interval); cp.conn_latency = cpu_to_le16(conn->le_conn_latency); cp.supervision_timeout = cpu_to_le16(conn->le_supv_timeout); cp.min_ce_len = cpu_to_le16(0x0000); cp.max_ce_len = cpu_to_le16(0x0000); /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2261: * * If this event is unmasked and the HCI_LE_Connection_Complete event * is unmasked, only the HCI_LE_Enhanced_Connection_Complete event is * sent when a new connection has been created. */ err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp, use_enhanced_conn_complete(hdev) ? HCI_EV_LE_ENHANCED_CONN_COMPLETE : HCI_EV_LE_CONN_COMPLETE, conn->conn_timeout, NULL); done: if (err == -ETIMEDOUT) hci_le_connect_cancel_sync(hdev, conn, 0x00); /* Re-enable advertising after the connection attempt is finished. */ hci_resume_advertising_sync(hdev); return err; } int hci_le_create_cis_sync(struct hci_dev *hdev) { DEFINE_FLEX(struct hci_cp_le_create_cis, cmd, cis, num_cis, 0x1f); size_t aux_num_cis = 0; struct hci_conn *conn; u8 cig = BT_ISO_QOS_CIG_UNSET; /* The spec allows only one pending LE Create CIS command at a time. If * the command is pending now, don't do anything. We check for pending * connections after each CIS Established event. * * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E * page 2566: * * If the Host issues this command before all the * HCI_LE_CIS_Established events from the previous use of the * command have been generated, the Controller shall return the * error code Command Disallowed (0x0C). * * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E * page 2567: * * When the Controller receives the HCI_LE_Create_CIS command, the * Controller sends the HCI_Command_Status event to the Host. An * HCI_LE_CIS_Established event will be generated for each CIS when it * is established or if it is disconnected or considered lost before * being established; until all the events are generated, the command * remains pending. */ hci_dev_lock(hdev); rcu_read_lock(); /* Wait until previous Create CIS has completed */ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) goto done; } /* Find CIG with all CIS ready */ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { struct hci_conn *link; if (hci_conn_check_create_cis(conn)) continue; cig = conn->iso_qos.ucast.cig; list_for_each_entry_rcu(link, &hdev->conn_hash.list, list) { if (hci_conn_check_create_cis(link) > 0 && link->iso_qos.ucast.cig == cig && link->state != BT_CONNECTED) { cig = BT_ISO_QOS_CIG_UNSET; break; } } if (cig != BT_ISO_QOS_CIG_UNSET) break; } if (cig == BT_ISO_QOS_CIG_UNSET) goto done; list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { struct hci_cis *cis = &cmd->cis[aux_num_cis]; if (hci_conn_check_create_cis(conn) || conn->iso_qos.ucast.cig != cig) continue; set_bit(HCI_CONN_CREATE_CIS, &conn->flags); cis->acl_handle = cpu_to_le16(conn->parent->handle); cis->cis_handle = cpu_to_le16(conn->handle); aux_num_cis++; if (aux_num_cis >= cmd->num_cis) break; } cmd->num_cis = aux_num_cis; done: rcu_read_unlock(); hci_dev_unlock(hdev); if (!aux_num_cis) return 0; /* Wait for HCI_LE_CIS_Established */ return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CIS, struct_size(cmd, cis, cmd->num_cis), cmd, HCI_EVT_LE_CIS_ESTABLISHED, conn->conn_timeout, NULL); } int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle) { struct hci_cp_le_remove_cig cp; memset(&cp, 0, sizeof(cp)); cp.cig_id = handle; return __hci_cmd_sync_status(hdev, HCI_OP_LE_REMOVE_CIG, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle) { struct hci_cp_le_big_term_sync cp; memset(&cp, 0, sizeof(cp)); cp.handle = handle; return __hci_cmd_sync_status(hdev, HCI_OP_LE_BIG_TERM_SYNC, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle) { struct hci_cp_le_pa_term_sync cp; memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(handle); return __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_TERM_SYNC, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, bool use_rpa, struct adv_info *adv_instance, u8 *own_addr_type, bdaddr_t *rand_addr) { int err; bacpy(rand_addr, BDADDR_ANY); /* If privacy is enabled use a resolvable private address. If * current RPA has expired then generate a new one. */ if (use_rpa) { /* If Controller supports LL Privacy use own address type is * 0x03 */ if (use_ll_privacy(hdev)) *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; else *own_addr_type = ADDR_LE_DEV_RANDOM; if (adv_instance) { if (adv_rpa_valid(adv_instance)) return 0; } else { if (rpa_valid(hdev)) return 0; } err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); if (err < 0) { bt_dev_err(hdev, "failed to generate new RPA"); return err; } bacpy(rand_addr, &hdev->rpa); return 0; } /* In case of required privacy without resolvable private address, * use an non-resolvable private address. This is useful for * non-connectable advertising. */ if (require_privacy) { bdaddr_t nrpa; while (true) { /* The non-resolvable private address is generated * from random six bytes with the two most significant * bits cleared. */ get_random_bytes(&nrpa, 6); nrpa.b[5] &= 0x3f; /* The non-resolvable private address shall not be * equal to the public address. */ if (bacmp(&hdev->bdaddr, &nrpa)) break; } *own_addr_type = ADDR_LE_DEV_RANDOM; bacpy(rand_addr, &nrpa); return 0; } /* No privacy so use a public address. */ *own_addr_type = ADDR_LE_DEV_PUBLIC; return 0; } static int _update_adv_data_sync(struct hci_dev *hdev, void *data) { u8 instance = PTR_UINT(data); return hci_update_adv_data_sync(hdev, instance); } int hci_update_adv_data(struct hci_dev *hdev, u8 instance) { return hci_cmd_sync_queue(hdev, _update_adv_data_sync, UINT_PTR(instance), NULL); } static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) { struct hci_conn *conn = data; struct inquiry_entry *ie; struct hci_cp_create_conn cp; int err; if (!hci_conn_valid(hdev, conn)) return -ECANCELED; /* Many controllers disallow HCI Create Connection while it is doing * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create * Connection. This may cause the MGMT discovering state to become false * without user space's request but it is okay since the MGMT Discovery * APIs do not promise that discovery should be done forever. Instead, * the user space monitors the status of MGMT discovering and it may * request for discovery again when this flag becomes false. */ if (test_bit(HCI_INQUIRY, &hdev->flags)) { err = __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL, HCI_CMD_TIMEOUT); if (err) bt_dev_warn(hdev, "Failed to cancel inquiry %d", err); } conn->state = BT_CONNECT; conn->out = true; conn->role = HCI_ROLE_MASTER; conn->attempt++; conn->link_policy = hdev->link_policy; memset(&cp, 0, sizeof(cp)); bacpy(&cp.bdaddr, &conn->dst); cp.pscan_rep_mode = 0x02; ie = hci_inquiry_cache_lookup(hdev, &conn->dst); if (ie) { if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { cp.pscan_rep_mode = ie->data.pscan_rep_mode; cp.pscan_mode = ie->data.pscan_mode; cp.clock_offset = ie->data.clock_offset | cpu_to_le16(0x8000); } memcpy(conn->dev_class, ie->data.dev_class, 3); } cp.pkt_type = cpu_to_le16(conn->pkt_type); if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER)) cp.role_switch = 0x01; else cp.role_switch = 0x00; return __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp, HCI_EV_CONN_COMPLETE, conn->conn_timeout, NULL); } int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn) { return hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn, NULL); } static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) { struct hci_conn *conn = data; bt_dev_dbg(hdev, "err %d", err); if (err == -ECANCELED) return; hci_dev_lock(hdev); if (!hci_conn_valid(hdev, conn)) goto done; if (!err) { hci_connect_le_scan_cleanup(conn, 0x00); goto done; } /* Check if connection is still pending */ if (conn != hci_lookup_le_connect(hdev)) goto done; /* Flush to make sure we send create conn cancel command if needed */ flush_delayed_work(&conn->le_conn_timeout); hci_conn_failed(conn, bt_status(err)); done: hci_dev_unlock(hdev); } int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn) { return hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn, create_le_conn_complete); } int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn) { if (conn->state != BT_OPEN) return -EINVAL; switch (conn->type) { case ACL_LINK: return !hci_cmd_sync_dequeue_once(hdev, hci_acl_create_conn_sync, conn, NULL); case LE_LINK: return !hci_cmd_sync_dequeue_once(hdev, hci_le_create_conn_sync, conn, create_le_conn_complete); } return -ENOENT; } int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, struct hci_conn_params *params) { struct hci_cp_le_conn_update cp; memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(conn->handle); cp.conn_interval_min = cpu_to_le16(params->conn_min_interval); cp.conn_interval_max = cpu_to_le16(params->conn_max_interval); cp.conn_latency = cpu_to_le16(params->conn_latency); cp.supervision_timeout = cpu_to_le16(params->supervision_timeout); cp.min_ce_len = cpu_to_le16(0x0000); cp.max_ce_len = cpu_to_le16(0x0000); return __hci_cmd_sync_status(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); }
47244 25372 85 30301 46890 25371 49 8009 24373 8008 97 16454 96 154 10993 3165 2927 10772 2 4826 8517 20301 1188 13756 77 10861 606 77 15695 6616 6701 84 5515 1449 3898 6653 3080 101 11248 8608 433 334 15189 4894 8036 15962 6 234 172 301 577 12467 5916 10721 111 10252 5603 5362 504 774 5582 4344 416 329 23789 195 71 2 23789 71 71 155 22 22 10 149 38128 57 175 6420 1690 1695 61 298 5803 5802 5727 534 534 533 32 32 32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 /* SPDX-License-Identifier: GPL-2.0 */ /* * Macros for manipulating and testing page->flags */ #ifndef PAGE_FLAGS_H #define PAGE_FLAGS_H #include <linux/types.h> #include <linux/bug.h> #include <linux/mmdebug.h> #ifndef __GENERATING_BOUNDS_H #include <linux/mm_types.h> #include <generated/bounds.h> #endif /* !__GENERATING_BOUNDS_H */ /* * Various page->flags bits: * * PG_reserved is set for special pages. The "struct page" of such a page * should in general not be touched (e.g. set dirty) except by its owner. * Pages marked as PG_reserved include: * - Pages part of the kernel image (including vDSO) and similar (e.g. BIOS, * initrd, HW tables) * - Pages reserved or allocated early during boot (before the page allocator * was initialized). This includes (depending on the architecture) the * initial vmemmap, initial page tables, crashkernel, elfcorehdr, and much * much more. Once (if ever) freed, PG_reserved is cleared and they will * be given to the page allocator. * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying * to read/write these pages might end badly. Don't touch! * - The zero page(s) * - Pages allocated in the context of kexec/kdump (loaded kernel image, * control pages, vmcoreinfo) * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are * not marked PG_reserved (as they might be in use by somebody else who does * not respect the caching strategy). * - MCA pages on ia64 * - Pages holding CPU notes for POWER Firmware Assisted Dump * - Device memory (e.g. PMEM, DAX, HMM) * Some PG_reserved pages will be excluded from the hibernation image. * PG_reserved does in general not hinder anybody from dumping or swapping * and is no longer required for remap_pfn_range(). ioremap might require it. * Consequently, PG_reserved for a page mapped into user space can indicate * the zero page, the vDSO, MMIO pages or device memory. * * The PG_private bitflag is set on pagecache pages if they contain filesystem * specific data (which is normally at page->private). It can be used by * private allocations for its own usage. * * During initiation of disk I/O, PG_locked is set. This bit is set before I/O * and cleared when writeback _starts_ or when read _completes_. PG_writeback * is set before writeback starts and cleared when it finishes. * * PG_locked also pins a page in pagecache, and blocks truncation of the file * while it is held. * * page_waitqueue(page) is a wait queue of all tasks waiting for the page * to become unlocked. * * PG_swapbacked is set when a page uses swap as a backing storage. This are * usually PageAnon or shmem pages but please note that even anonymous pages * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as * a result of MADV_FREE). * * PG_referenced, PG_reclaim are used for page reclaim for anonymous and * file-backed pagecache (see mm/vmscan.c). * * PG_error is set to indicate that an I/O error occurred on this page. * * PG_arch_1 is an architecture specific page state bit. The generic code * guarantees that this bit is cleared for a page when it first is entered into * the page cache. * * PG_hwpoison indicates that a page got corrupted in hardware and contains * data with incorrect ECC bits that triggered a machine check. Accessing is * not safe since it may cause another machine check. Don't touch! */ /* * Don't use the pageflags directly. Use the PageFoo macros. * * The page flags field is split into two parts, the main flags area * which extends from the low bits upwards, and the fields area which * extends from the high bits downwards. * * | FIELD | ... | FLAGS | * N-1 ^ 0 * (NR_PAGEFLAGS) * * The fields area is reserved for fields mapping zone, node (for NUMA) and * SPARSEMEM section (for variants of SPARSEMEM that require section ids like * SPARSEMEM_EXTREME with !SPARSEMEM_VMEMMAP). */ enum pageflags { PG_locked, /* Page is locked. Don't touch. */ PG_writeback, /* Page is under writeback */ PG_referenced, PG_uptodate, PG_dirty, PG_lru, PG_head, /* Must be in bit 6 */ PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_active, PG_workingset, PG_error, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ PG_arch_1, PG_reserved, PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ #ifdef CONFIG_MMU PG_mlocked, /* Page is vma mlocked */ #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED PG_uncached, /* Page has been mapped as uncached */ #endif #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) PG_young, PG_idle, #endif #ifdef CONFIG_ARCH_USES_PG_ARCH_X PG_arch_2, PG_arch_3, #endif __NR_PAGEFLAGS, PG_readahead = PG_reclaim, /* * Depending on the way an anonymous folio can be mapped into a page * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped * THP), PG_anon_exclusive may be set only for the head page or for * tail pages of an anonymous folio. For now, we only expect it to be * set on tail pages for PTE-mapped THP. */ PG_anon_exclusive = PG_mappedtodisk, /* Filesystems */ PG_checked = PG_owner_priv_1, /* SwapBacked */ PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */ /* Two page bits are conscripted by FS-Cache to maintain local caching * state. These bits are set on pages belonging to the netfs's inodes * when those inodes are being locally cached. */ PG_fscache = PG_private_2, /* page backed by cache */ /* XEN */ /* Pinned in Xen as a read-only pagetable page. */ PG_pinned = PG_owner_priv_1, /* Pinned as part of domain save (see xen_mm_pin_all()). */ PG_savepinned = PG_dirty, /* Has a grant mapping of another (foreign) domain's page. */ PG_foreign = PG_owner_priv_1, /* Remapped by swiotlb-xen. */ PG_xen_remapped = PG_owner_priv_1, /* non-lru isolated movable page */ PG_isolated = PG_reclaim, /* Only valid for buddy pages. Used to track pages that are reported */ PG_reported = PG_uptodate, #ifdef CONFIG_MEMORY_HOTPLUG /* For self-hosted memmap pages */ PG_vmemmap_self_hosted = PG_owner_priv_1, #endif /* * Flags only valid for compound pages. Stored in first tail page's * flags word. Cannot use the first 8 flags or any flag marked as * PF_ANY. */ /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_error, PG_large_rmappable = PG_workingset, /* anon or file-backed */ }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) #ifndef __GENERATING_BOUNDS_H #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key); /* * Return the real head page struct iff the @page is a fake head page, otherwise * return the @page itself. See Documentation/mm/vmemmap_dedup.rst. */ static __always_inline const struct page *page_fixed_fake_head(const struct page *page) { if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) return page; /* * Only addresses aligned with PAGE_SIZE of struct page may be fake head * struct page. The alignment check aims to avoid access the fields ( * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly) * cold cacheline in some cases. */ if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) && test_bit(PG_head, &page->flags)) { /* * We can safely access the field of the @page[1] with PG_head * because the @page is a compound page composed with at least * two contiguous pages. */ unsigned long head = READ_ONCE(page[1].compound_head); if (likely(head & 1)) return (const struct page *)(head - 1); } return page; } #else static inline const struct page *page_fixed_fake_head(const struct page *page) { return page; } #endif static __always_inline int page_is_fake_head(const struct page *page) { return page_fixed_fake_head(page) != page; } static inline unsigned long _compound_head(const struct page *page) { unsigned long head = READ_ONCE(page->compound_head); if (unlikely(head & 1)) return head - 1; return (unsigned long)page_fixed_fake_head(page); } #define compound_head(page) ((typeof(page))_compound_head(page)) /** * page_folio - Converts from page to folio. * @p: The page. * * Every page is part of a folio. This function cannot be called on a * NULL pointer. * * Context: No reference, nor lock is required on @page. If the caller * does not hold a reference, this call may race with a folio split, so * it should re-check the folio still contains this page after gaining * a reference on the folio. * Return: The folio which contains this page. */ #define page_folio(p) (_Generic((p), \ const struct page *: (const struct folio *)_compound_head(p), \ struct page *: (struct folio *)_compound_head(p))) /** * folio_page - Return a page from a folio. * @folio: The folio. * @n: The page number to return. * * @n is relative to the start of the folio. This function does not * check that the page number lies within @folio; the caller is presumed * to have a reference to the page. */ #define folio_page(folio, n) nth_page(&(folio)->page, n) static __always_inline int PageTail(const struct page *page) { return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); } static __always_inline int PageCompound(const struct page *page) { return test_bit(PG_head, &page->flags) || READ_ONCE(page->compound_head) & 1; } #define PAGE_POISON_PATTERN -1l static inline int PagePoisoned(const struct page *page) { return READ_ONCE(page->flags) == PAGE_POISON_PATTERN; } #ifdef CONFIG_DEBUG_VM void page_init_poison(struct page *page, size_t size); #else static inline void page_init_poison(struct page *page, size_t size) { } #endif static const unsigned long *const_folio_flags(const struct folio *folio, unsigned n) { const struct page *page = &folio->page; VM_BUG_ON_PGFLAGS(PageTail(page), page); VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); return &page[n].flags; } static unsigned long *folio_flags(struct folio *folio, unsigned n) { struct page *page = &folio->page; VM_BUG_ON_PGFLAGS(PageTail(page), page); VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); return &page[n].flags; } /* * Page flags policies wrt compound pages * * PF_POISONED_CHECK * check if this struct page poisoned/uninitialized * * PF_ANY: * the page flag is relevant for small, head and tail pages. * * PF_HEAD: * for compound page all operations related to the page flag applied to * head page. * * PF_NO_TAIL: * modifications of the page flag must be done on small or head pages, * checks can be done on tail pages too. * * PF_NO_COMPOUND: * the page flag is not relevant for compound pages. * * PF_SECOND: * the page flag is stored in the first tail page. */ #define PF_POISONED_CHECK(page) ({ \ VM_BUG_ON_PGFLAGS(PagePoisoned(page), page); \ page; }) #define PF_ANY(page, enforce) PF_POISONED_CHECK(page) #define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page)) #define PF_NO_TAIL(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ PF_POISONED_CHECK(compound_head(page)); }) #define PF_NO_COMPOUND(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \ PF_POISONED_CHECK(page); }) #define PF_SECOND(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(!PageHead(page), page); \ PF_POISONED_CHECK(&page[1]); }) /* Which page is the flag stored in */ #define FOLIO_PF_ANY 0 #define FOLIO_PF_HEAD 0 #define FOLIO_PF_NO_TAIL 0 #define FOLIO_PF_NO_COMPOUND 0 #define FOLIO_PF_SECOND 1 #define FOLIO_HEAD_PAGE 0 #define FOLIO_SECOND_PAGE 1 /* * Macros to create function definitions for page flags */ #define FOLIO_TEST_FLAG(name, page) \ static __always_inline bool folio_test_##name(const struct folio *folio) \ { return test_bit(PG_##name, const_folio_flags(folio, page)); } #define FOLIO_SET_FLAG(name, page) \ static __always_inline void folio_set_##name(struct folio *folio) \ { set_bit(PG_##name, folio_flags(folio, page)); } #define FOLIO_CLEAR_FLAG(name, page) \ static __always_inline void folio_clear_##name(struct folio *folio) \ { clear_bit(PG_##name, folio_flags(folio, page)); } #define __FOLIO_SET_FLAG(name, page) \ static __always_inline void __folio_set_##name(struct folio *folio) \ { __set_bit(PG_##name, folio_flags(folio, page)); } #define __FOLIO_CLEAR_FLAG(name, page) \ static __always_inline void __folio_clear_##name(struct folio *folio) \ { __clear_bit(PG_##name, folio_flags(folio, page)); } #define FOLIO_TEST_SET_FLAG(name, page) \ static __always_inline bool folio_test_set_##name(struct folio *folio) \ { return test_and_set_bit(PG_##name, folio_flags(folio, page)); } #define FOLIO_TEST_CLEAR_FLAG(name, page) \ static __always_inline bool folio_test_clear_##name(struct folio *folio) \ { return test_and_clear_bit(PG_##name, folio_flags(folio, page)); } #define FOLIO_FLAG(name, page) \ FOLIO_TEST_FLAG(name, page) \ FOLIO_SET_FLAG(name, page) \ FOLIO_CLEAR_FLAG(name, page) #define TESTPAGEFLAG(uname, lname, policy) \ FOLIO_TEST_FLAG(lname, FOLIO_##policy) \ static __always_inline int Page##uname(const struct page *page) \ { return test_bit(PG_##lname, &policy(page, 0)->flags); } #define SETPAGEFLAG(uname, lname, policy) \ FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void SetPage##uname(struct page *page) \ { set_bit(PG_##lname, &policy(page, 1)->flags); } #define CLEARPAGEFLAG(uname, lname, policy) \ FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void ClearPage##uname(struct page *page) \ { clear_bit(PG_##lname, &policy(page, 1)->flags); } #define __SETPAGEFLAG(uname, lname, policy) \ __FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void __SetPage##uname(struct page *page) \ { __set_bit(PG_##lname, &policy(page, 1)->flags); } #define __CLEARPAGEFLAG(uname, lname, policy) \ __FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void __ClearPage##uname(struct page *page) \ { __clear_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTSETFLAG(uname, lname, policy) \ FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestSetPage##uname(struct page *page) \ { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTCLEARFLAG(uname, lname, policy) \ FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestClearPage##uname(struct page *page) \ { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } #define PAGEFLAG(uname, lname, policy) \ TESTPAGEFLAG(uname, lname, policy) \ SETPAGEFLAG(uname, lname, policy) \ CLEARPAGEFLAG(uname, lname, policy) #define __PAGEFLAG(uname, lname, policy) \ TESTPAGEFLAG(uname, lname, policy) \ __SETPAGEFLAG(uname, lname, policy) \ __CLEARPAGEFLAG(uname, lname, policy) #define TESTSCFLAG(uname, lname, policy) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) #define FOLIO_TEST_FLAG_FALSE(name) \ static inline bool folio_test_##name(const struct folio *folio) \ { return false; } #define FOLIO_SET_FLAG_NOOP(name) \ static inline void folio_set_##name(struct folio *folio) { } #define FOLIO_CLEAR_FLAG_NOOP(name) \ static inline void folio_clear_##name(struct folio *folio) { } #define __FOLIO_SET_FLAG_NOOP(name) \ static inline void __folio_set_##name(struct folio *folio) { } #define __FOLIO_CLEAR_FLAG_NOOP(name) \ static inline void __folio_clear_##name(struct folio *folio) { } #define FOLIO_TEST_SET_FLAG_FALSE(name) \ static inline bool folio_test_set_##name(struct folio *folio) \ { return false; } #define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \ static inline bool folio_test_clear_##name(struct folio *folio) \ { return false; } #define FOLIO_FLAG_FALSE(name) \ FOLIO_TEST_FLAG_FALSE(name) \ FOLIO_SET_FLAG_NOOP(name) \ FOLIO_CLEAR_FLAG_NOOP(name) #define TESTPAGEFLAG_FALSE(uname, lname) \ FOLIO_TEST_FLAG_FALSE(lname) \ static inline int Page##uname(const struct page *page) { return 0; } #define SETPAGEFLAG_NOOP(uname, lname) \ FOLIO_SET_FLAG_NOOP(lname) \ static inline void SetPage##uname(struct page *page) { } #define CLEARPAGEFLAG_NOOP(uname, lname) \ FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void ClearPage##uname(struct page *page) { } #define __CLEARPAGEFLAG_NOOP(uname, lname) \ __FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void __ClearPage##uname(struct page *page) { } #define TESTSETFLAG_FALSE(uname, lname) \ FOLIO_TEST_SET_FLAG_FALSE(lname) \ static inline int TestSetPage##uname(struct page *page) { return 0; } #define TESTCLEARFLAG_FALSE(uname, lname) \ FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \ static inline int TestClearPage##uname(struct page *page) { return 0; } #define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname) #define TESTSCFLAG_FALSE(uname, lname) \ TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE) PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) FOLIO_FLAG(referenced, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(referenced, FOLIO_HEAD_PAGE) __FOLIO_SET_FLAG(referenced, FOLIO_HEAD_PAGE) PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) TESTCLEARFLAG(LRU, lru, PF_HEAD) PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) TESTCLEARFLAG(Active, active, PF_HEAD) PAGEFLAG(Workingset, workingset, PF_HEAD) TESTCLEARFLAG(Workingset, workingset, PF_HEAD) PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */ /* Xen */ PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND) TESTSCFLAG(Pinned, pinned, PF_NO_COMPOUND) PAGEFLAG(SavePinned, savepinned, PF_NO_COMPOUND); PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND); PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) TESTCLEARFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __SETPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) __CLEARPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) __SETPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) /* * Private page markings that may be used by the filesystem that owns the page * for its own purposes. * - PG_private and PG_private_2 cause release_folio() and co to be invoked */ PAGEFLAG(Private, private, PF_ANY) PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY) PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY) /* * Only test-and-set exist for PG_writeback. The unconditional operators are * risky: they bypass page accounting. */ TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL) TESTSCFLAG(Writeback, writeback, PF_NO_TAIL) PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND) #ifdef CONFIG_HIGHMEM /* * Must use a macro here due to header dependency issues. page_zone() is not * available at this point. */ #define PageHighMem(__p) is_highmem_idx(page_zonenum(__p)) #define folio_test_highmem(__f) is_highmem_idx(folio_zonenum(__f)) #else PAGEFLAG_FALSE(HighMem, highmem) #endif #ifdef CONFIG_SWAP static __always_inline bool folio_test_swapcache(const struct folio *folio) { return folio_test_swapbacked(folio) && test_bit(PG_swapcache, const_folio_flags(folio, 0)); } static __always_inline bool PageSwapCache(const struct page *page) { return folio_test_swapcache(page_folio(page)); } SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) #else PAGEFLAG_FALSE(SwapCache, swapcache) #endif PAGEFLAG(Unevictable, unevictable, PF_HEAD) __CLEARPAGEFLAG(Unevictable, unevictable, PF_HEAD) TESTCLEARFLAG(Unevictable, unevictable, PF_HEAD) #ifdef CONFIG_MMU PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) __CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL) #else PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked) TESTSCFLAG_FALSE(Mlocked, mlocked) #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND) #else PAGEFLAG_FALSE(Uncached, uncached) #endif #ifdef CONFIG_MEMORY_FAILURE PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) #else PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 #endif #ifdef CONFIG_PAGE_IDLE_FLAG #ifdef CONFIG_64BIT FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE) FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE) FOLIO_FLAG(idle, FOLIO_HEAD_PAGE) #endif /* See page_idle.h for !64BIT workaround */ #else /* !CONFIG_PAGE_IDLE_FLAG */ FOLIO_FLAG_FALSE(young) FOLIO_TEST_CLEAR_FLAG_FALSE(young) FOLIO_FLAG_FALSE(idle) #endif /* * PageReported() is used to track reported free pages within the Buddy * allocator. We can use the non-atomic version of the test and set * operations as both should be shielded with the zone lock to prevent * any possible races on the setting or clearing of the bit. */ __PAGEFLAG(Reported, reported, PF_NO_COMPOUND) #ifdef CONFIG_MEMORY_HOTPLUG PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY) #else PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #endif /* * On an anonymous folio mapped into a user virtual memory area, * folio->mapping points to its anon_vma, not to a struct address_space; * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. * * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled, * the PAGE_MAPPING_MOVABLE bit may be set along with the PAGE_MAPPING_ANON * bit; and then folio->mapping points, not to an anon_vma, but to a private * structure which KSM associates with that merged page. See ksm.h. * * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable * page and then folio->mapping points to a struct movable_operations. * * Please note that, confusingly, "folio_mapping" refers to the inode * address_space which maps the folio from disk; whereas "folio_mapped" * refers to user virtual address space into which the folio is mapped. * * For slab pages, since slab reuses the bits in struct page to store its * internal states, the folio->mapping does not exist as such, nor do * these flags below. So in order to avoid testing non-existent bits, * please make sure that folio_test_slab(folio) actually evaluates to * false before calling the following functions (e.g., folio_test_anon). * See mm/slab.h. */ #define PAGE_MAPPING_ANON 0x1 #define PAGE_MAPPING_MOVABLE 0x2 #define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) /* * Different with flags above, this flag is used only for fsdax mode. It * indicates that this page->mapping is now under reflink case. */ #define PAGE_MAPPING_DAX_SHARED ((void *)0x1) static __always_inline bool folio_mapping_flags(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; } static __always_inline bool PageMappingFlags(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; } static __always_inline bool folio_test_anon(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; } static __always_inline bool PageAnon(const struct page *page) { return folio_test_anon(page_folio(page)); } static __always_inline bool __folio_test_movable(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_MOVABLE; } static __always_inline bool __PageMovable(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_MOVABLE; } #ifdef CONFIG_KSM /* * A KSM page is one of those write-protected "shared pages" or "merged pages" * which KSM maps into multiple mms, wherever identical anonymous page content * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ static __always_inline bool folio_test_ksm(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } static __always_inline bool PageKsm(const struct page *page) { return folio_test_ksm(page_folio(page)); } #else TESTPAGEFLAG_FALSE(Ksm, ksm) #endif u64 stable_page_flags(const struct page *page); /** * folio_xor_flags_has_waiters - Change some folio flags. * @folio: The folio. * @mask: Bits set in this word will be changed. * * This must only be used for flags which are changed with the folio * lock held. For example, it is unsafe to use for PG_dirty as that * can be set without the folio lock held. It can also only be used * on flags which are in the range 0-6 as some of the implementations * only affect those bits. * * Return: Whether there are tasks waiting on the folio. */ static inline bool folio_xor_flags_has_waiters(struct folio *folio, unsigned long mask) { return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0)); } /** * folio_test_uptodate - Is this folio up to date? * @folio: The folio. * * The uptodate flag is set on a folio when every byte in the folio is * at least as new as the corresponding bytes on storage. Anonymous * and CoW folios are always uptodate. If the folio is not uptodate, * some of the bytes in it may be; see the is_partially_uptodate() * address_space operation. */ static inline bool folio_test_uptodate(const struct folio *folio) { bool ret = test_bit(PG_uptodate, const_folio_flags(folio, 0)); /* * Must ensure that the data we read out of the folio is loaded * _after_ we've loaded folio->flags to check the uptodate bit. * We can skip the barrier if the folio is not uptodate, because * we wouldn't be reading anything from it. * * See folio_mark_uptodate() for the other side of the story. */ if (ret) smp_rmb(); return ret; } static inline bool PageUptodate(const struct page *page) { return folio_test_uptodate(page_folio(page)); } static __always_inline void __folio_mark_uptodate(struct folio *folio) { smp_wmb(); __set_bit(PG_uptodate, folio_flags(folio, 0)); } static __always_inline void folio_mark_uptodate(struct folio *folio) { /* * Memory barrier must be issued before setting the PG_uptodate bit, * so that all previous stores issued in order to bring the folio * uptodate are actually visible before folio_test_uptodate becomes true. */ smp_wmb(); set_bit(PG_uptodate, folio_flags(folio, 0)); } static __always_inline void __SetPageUptodate(struct page *page) { __folio_mark_uptodate((struct folio *)page); } static __always_inline void SetPageUptodate(struct page *page) { folio_mark_uptodate((struct folio *)page); } CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) void __folio_start_writeback(struct folio *folio, bool keep_write); void set_page_writeback(struct page *page); #define folio_start_writeback(folio) \ __folio_start_writeback(folio, false) #define folio_start_writeback_keepwrite(folio) \ __folio_start_writeback(folio, true) static __always_inline bool folio_test_head(const struct folio *folio) { return test_bit(PG_head, const_folio_flags(folio, FOLIO_PF_ANY)); } static __always_inline int PageHead(const struct page *page) { PF_POISONED_CHECK(page); return test_bit(PG_head, &page->flags) && !page_is_fake_head(page); } __SETPAGEFLAG(Head, head, PF_ANY) __CLEARPAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) /** * folio_test_large() - Does this folio contain more than one page? * @folio: The folio to test. * * Return: True if the folio is larger than one page. */ static inline bool folio_test_large(const struct folio *folio) { return folio_test_head(folio); } static __always_inline void set_compound_head(struct page *page, struct page *head) { WRITE_ONCE(page->compound_head, (unsigned long)head + 1); } static __always_inline void clear_compound_head(struct page *page) { WRITE_ONCE(page->compound_head, 0); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void ClearPageCompound(struct page *page) { BUG_ON(!PageHead(page)); ClearPageHead(page); } FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE) #else FOLIO_FLAG_FALSE(large_rmappable) #endif #define PG_head_mask ((1UL << PG_head)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for * normal or transparent huge pages. * * PageTransHuge() returns true for both transparent huge and * hugetlbfs pages, but not normal pages. PageTransHuge() can only be * called only in the core VM paths where hugetlbfs pages can't exist. */ static inline int PageTransHuge(const struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); return PageHead(page); } /* * PageTransCompound returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. */ static inline int PageTransCompound(const struct page *page) { return PageCompound(page); } /* * PageTransTail returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. */ static inline int PageTransTail(const struct page *page) { return PageTail(page); } #else TESTPAGEFLAG_FALSE(TransHuge, transhuge) TESTPAGEFLAG_FALSE(TransCompound, transcompound) TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap) TESTPAGEFLAG_FALSE(TransTail, transtail) #endif #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) /* * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the * compound page. * * This flag is set by hwpoison handler. Cleared by THP split or free page. */ PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) #else PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif /* * For pages that are never mapped to userspace, * page_type may be used. Because it is initialised to -1, we invert the * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and * low bits so that an underflow or overflow of _mapcount won't be * mistaken for a page type value. */ enum pagetype { PG_buddy = 0x40000000, PG_offline = 0x20000000, PG_table = 0x10000000, PG_guard = 0x08000000, PG_hugetlb = 0x04000000, PG_slab = 0x02000000, PG_zsmalloc = 0x01000000, PAGE_TYPE_BASE = 0x80000000, /* * Reserve 0xffff0000 - 0xfffffffe to catch _mapcount underflows and * allow owners that set a type to reuse the lower 16 bit for their own * purposes. */ PAGE_MAPCOUNT_RESERVE = ~0x0000ffff, }; #define PageType(page, flag) \ ((READ_ONCE(page->page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) #define folio_test_type(folio, flag) \ ((READ_ONCE(folio->page.page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) static inline int page_type_has_type(unsigned int page_type) { return (int)page_type < PAGE_MAPCOUNT_RESERVE; } static inline int page_has_type(const struct page *page) { return page_type_has_type(READ_ONCE(page->page_type)); } #define FOLIO_TYPE_OPS(lname, fname) \ static __always_inline bool folio_test_##fname(const struct folio *folio)\ { \ return folio_test_type(folio, PG_##lname); \ } \ static __always_inline void __folio_set_##fname(struct folio *folio) \ { \ VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ folio->page.page_type &= ~PG_##lname; \ } \ static __always_inline void __folio_clear_##fname(struct folio *folio) \ { \ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ folio->page.page_type |= PG_##lname; \ } #define PAGE_TYPE_OPS(uname, lname, fname) \ FOLIO_TYPE_OPS(lname, fname) \ static __always_inline int Page##uname(const struct page *page) \ { \ return PageType(page, PG_##lname); \ } \ static __always_inline void __SetPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!PageType(page, 0), page); \ page->page_type &= ~PG_##lname; \ } \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ page->page_type |= PG_##lname; \ } /* * PageBuddy() indicates that the page is free and in the buddy system * (see mm/page_alloc.c). */ PAGE_TYPE_OPS(Buddy, buddy, buddy) /* * PageOffline() indicates that the page is logically offline although the * containing section is online. (e.g. inflated in a balloon driver or * not onlined when onlining the section). * The content of these pages is effectively stale. Such pages should not * be touched (read/write/dump/save) except by their owner. * * When a memory block gets onlined, all pages are initialized with a * refcount of 1 and PageOffline(). generic_online_page() will * take care of clearing PageOffline(). * * If a driver wants to allow to offline unmovable PageOffline() pages without * putting them back to the buddy, it can do so via the memory notifier by * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the * reference count in MEM_CANCEL_OFFLINE. When offlining, the PageOffline() * pages (now with a reference count of zero) are treated like free (unmanaged) * pages, allowing the containing memory block to get offlined. A driver that * relies on this feature is aware that re-onlining the memory block will * require not giving them to the buddy via generic_online_page(). * * Memory offlining code will not adjust the managed page count for any * PageOffline() pages, treating them like they were never exposed to the * buddy using generic_online_page(). * * There are drivers that mark a page PageOffline() and expect there won't be * any further access to page content. PFN walkers that read content of random * pages should check PageOffline() and synchronize with such drivers using * page_offline_freeze()/page_offline_thaw(). */ PAGE_TYPE_OPS(Offline, offline, offline) extern void page_offline_freeze(void); extern void page_offline_thaw(void); extern void page_offline_begin(void); extern void page_offline_end(void); /* * Marks pages in use as page tables. */ PAGE_TYPE_OPS(Table, table, pgtable) /* * Marks guardpages used with debug_pagealloc. */ PAGE_TYPE_OPS(Guard, guard, guard) FOLIO_TYPE_OPS(slab, slab) /** * PageSlab - Determine if the page belongs to the slab allocator * @page: The page to test. * * Context: Any context. * Return: True for slab pages, false for any other kind of page. */ static inline bool PageSlab(const struct page *page) { return folio_test_slab(page_folio(page)); } #ifdef CONFIG_HUGETLB_PAGE FOLIO_TYPE_OPS(hugetlb, hugetlb) #else FOLIO_TEST_FLAG_FALSE(hugetlb) #endif PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) /** * PageHuge - Determine if the page belongs to hugetlbfs * @page: The page to test. * * Context: Any context. * Return: True for hugetlbfs pages, false for anon pages or pages * belonging to other filesystems. */ static inline bool PageHuge(const struct page *page) { return folio_test_hugetlb(page_folio(page)); } /* * Check if a page is currently marked HWPoisoned. Note that this check is * best effort only and inherently racy: there is no way to synchronize with * failing hardware. */ static inline bool is_page_hwpoison(const struct page *page) { const struct folio *folio; if (PageHWPoison(page)) return true; folio = page_folio(page); return folio_test_hugetlb(folio) && PageHWPoison(&folio->page); } bool is_free_buddy_page(const struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); static __always_inline int PageAnonExclusive(const struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page), page); /* * HugeTLB stores this information on the head page; THP keeps it per * page */ if (PageHuge(page)) page = compound_head(page); return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } static __always_inline void SetPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } static __always_inline void ClearPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } static __always_inline void __ClearPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } #ifdef CONFIG_MMU #define __PG_MLOCKED (1UL << PG_mlocked) #else #define __PG_MLOCKED 0 #endif /* * Flags checked when a page is freed. Pages being freed should not have * these flags set. If they are, there is a problem. */ #define PAGE_FLAGS_CHECK_AT_FREE \ (1UL << PG_lru | 1UL << PG_locked | \ 1UL << PG_private | 1UL << PG_private_2 | \ 1UL << PG_writeback | 1UL << PG_reserved | \ 1UL << PG_active | \ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* * Flags checked when a page is prepped for return by the page allocator. * Pages being prepped should not have these flags set. If they are set, * there has been a kernel bug or struct page corruption. * * __PG_HWPOISON is exceptional because it needs to be kept beyond page's * alloc-free cycle to prevent from reusing the page. */ #define PAGE_FLAGS_CHECK_AT_PREP \ ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK) /* * Flags stored in the second page of a compound page. They may overlap * the CHECK_AT_FREE flags above, so need to be cleared. */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ 1UL << PG_large_rmappable) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) /** * page_has_private - Determine if page has private stuff * @page: The page to be checked * * Determine if a page has private stuff, indicating that release routines * should be invoked upon it. */ static inline int page_has_private(const struct page *page) { return !!(page->flags & PAGE_FLAGS_PRIVATE); } static inline bool folio_has_private(const struct folio *folio) { return page_has_private(&folio->page); } #undef PF_ANY #undef PF_HEAD #undef PF_NO_TAIL #undef PF_NO_COMPOUND #undef PF_SECOND #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */
36143 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_X86_XSAVE_H #define __ASM_X86_XSAVE_H #include <linux/uaccess.h> #include <linux/types.h> #include <asm/processor.h> #include <asm/fpu/api.h> #include <asm/user.h> /* Bit 63 of XCR0 is reserved for future expansion */ #define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) #define XSTATE_CPUID 0x0000000d #define TILE_CPUID 0x0000001d #define FXSAVE_SIZE 512 #define XSAVE_HDR_SIZE 64 #define XSAVE_HDR_OFFSET FXSAVE_SIZE #define XSAVE_YMM_SIZE 256 #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) #define XSAVE_ALIGNMENT 64 /* All currently supported user features */ #define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \ XFEATURE_MASK_SSE | \ XFEATURE_MASK_YMM | \ XFEATURE_MASK_OPMASK | \ XFEATURE_MASK_ZMM_Hi256 | \ XFEATURE_MASK_Hi16_ZMM | \ XFEATURE_MASK_PKRU | \ XFEATURE_MASK_BNDREGS | \ XFEATURE_MASK_BNDCSR | \ XFEATURE_MASK_XTILE) /* * Features which are restored when returning to user space. * PKRU is not restored on return to user space because PKRU * is switched eagerly in switch_to() and flush_thread() */ #define XFEATURE_MASK_USER_RESTORE \ (XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU) /* Features which are dynamically enabled for a process on request */ #define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA /* All currently supported supervisor features */ #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \ XFEATURE_MASK_CET_USER) /* * A supervisor state component may not always contain valuable information, * and its size may be huge. Saving/restoring such supervisor state components * at each context switch can cause high CPU and space overhead, which should * be avoided. Such supervisor state components should only be saved/restored * on demand. The on-demand supervisor features are set in this mask. * * Unlike the existing supported supervisor features, an independent supervisor * feature does not allocate a buffer in task->fpu, and the corresponding * supervisor state component cannot be saved/restored at each context switch. * * To support an independent supervisor feature, a developer should follow the * dos and don'ts as below: * - Do dynamically allocate a buffer for the supervisor state component. * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the * state component to/from the buffer. * - Don't set the bit corresponding to the independent supervisor feature in * IA32_XSS at run time, since it has been set at boot time. */ #define XFEATURE_MASK_INDEPENDENT (XFEATURE_MASK_LBR) /* * Unsupported supervisor features. When a supervisor feature in this mask is * supported in the future, move it to the supported supervisor feature mask. */ #define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT | \ XFEATURE_MASK_CET_KERNEL) /* All supervisor states including supported and unsupported states. */ #define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ XFEATURE_MASK_INDEPENDENT | \ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) /* * The feature mask required to restore FPU state: * - All user states which are not eagerly switched in switch_to()/exec() * - The suporvisor states */ #define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \ XFEATURE_MASK_SUPERVISOR_SUPPORTED) /* * Features in this mask have space allocated in the signal frame, but may not * have that space initialized when the feature is in its init state. */ #define XFEATURE_MASK_SIGFRAME_INITOPT (XFEATURE_MASK_XTILE | \ XFEATURE_MASK_USER_DYNAMIC) extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); int xfeature_size(int xfeature_nr); void xsaves(struct xregs_state *xsave, u64 mask); void xrstors(struct xregs_state *xsave, u64 mask); int xfd_enable_feature(u64 xfd_err); #ifdef CONFIG_X86_64 DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); #endif #ifdef CONFIG_X86_64 DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); static __always_inline __pure bool fpu_state_size_dynamic(void) { return static_branch_unlikely(&__fpu_state_size_dynamic); } #else static __always_inline __pure bool fpu_state_size_dynamic(void) { return false; } #endif #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 /* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Manage send buffer * * Copyright IBM Corp. 2016 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ #ifndef SMC_TX_H #define SMC_TX_H #include <linux/socket.h> #include <linux/types.h> #include "smc.h" #include "smc_cdc.h" static inline int smc_tx_prepared_sends(struct smc_connection *conn) { union smc_host_cursor sent, prep; smc_curs_copy(&sent, &conn->tx_curs_sent, conn); smc_curs_copy(&prep, &conn->tx_curs_prep, conn); return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); } void smc_tx_pending(struct smc_connection *conn); void smc_tx_work(struct work_struct *work); void smc_tx_init(struct smc_sock *smc); int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len); int smc_tx_sndbuf_nonempty(struct smc_connection *conn); void smc_tx_sndbuf_nonfull(struct smc_sock *smc); void smc_tx_consumer_update(struct smc_connection *conn, bool force); int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, u32 offset, int signal); #endif /* SMC_TX_H */
6 6 6 6 6 6 6 23 23 23 2 21 21 21 2 26 26 26 26 26 3 23 6 23 23 26 10 1 5 1 1 2 7 7 1 6 6 6 4 4 6 4 1 1 3 2 5 2 4 4 7 11 4 4 4 4 4 4 9 2 2 1 2 3 2 2 10 2 1 2 3 3 2 1 1 2 9 2 2 4 2 1 1 1 1 1 1 3 3 3 3 3 3 3 1 1 1 1 3 3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 // SPDX-License-Identifier: GPL-2.0-only /* * HWSIM IEEE 802.15.4 interface * * (C) 2018 Mojatau, Alexander Aring <aring@mojatau.com> * Copyright 2007-2012 Siemens AG * * Based on fakelb, original Written by: * Sergey Lapin <slapin@ossfans.org> * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> * Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ #include <linux/module.h> #include <linux/timer.h> #include <linux/platform_device.h> #include <linux/rtnetlink.h> #include <linux/netdevice.h> #include <linux/device.h> #include <linux/spinlock.h> #include <net/ieee802154_netdev.h> #include <net/mac802154.h> #include <net/cfg802154.h> #include <net/genetlink.h> #include "mac802154_hwsim.h" MODULE_DESCRIPTION("Software simulator of IEEE 802.15.4 radio(s) for mac802154"); MODULE_LICENSE("GPL"); static LIST_HEAD(hwsim_phys); static DEFINE_MUTEX(hwsim_phys_lock); static struct platform_device *mac802154hwsim_dev; /* MAC802154_HWSIM netlink family */ static struct genl_family hwsim_genl_family; static int hwsim_radio_idx; enum hwsim_multicast_groups { HWSIM_MCGRP_CONFIG, }; static const struct genl_multicast_group hwsim_mcgrps[] = { [HWSIM_MCGRP_CONFIG] = { .name = "config", }, }; struct hwsim_pib { u8 page; u8 channel; struct ieee802154_hw_addr_filt filt; enum ieee802154_filtering_level filt_level; struct rcu_head rcu; }; struct hwsim_edge_info { u8 lqi; struct rcu_head rcu; }; struct hwsim_edge { struct hwsim_phy *endpoint; struct hwsim_edge_info __rcu *info; struct list_head list; struct rcu_head rcu; }; struct hwsim_phy { struct ieee802154_hw *hw; u32 idx; struct hwsim_pib __rcu *pib; bool suspended; struct list_head edges; struct list_head list; }; static int hwsim_add_one(struct genl_info *info, struct device *dev, bool init); static void hwsim_del(struct hwsim_phy *phy); static int hwsim_hw_ed(struct ieee802154_hw *hw, u8 *level) { *level = 0xbe; return 0; } static int hwsim_update_pib(struct ieee802154_hw *hw, u8 page, u8 channel, struct ieee802154_hw_addr_filt *filt, enum ieee802154_filtering_level filt_level) { struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib, *pib_old; pib = kzalloc(sizeof(*pib), GFP_ATOMIC); if (!pib) return -ENOMEM; pib_old = rtnl_dereference(phy->pib); pib->page = page; pib->channel = channel; pib->filt.short_addr = filt->short_addr; pib->filt.pan_id = filt->pan_id; pib->filt.ieee_addr = filt->ieee_addr; pib->filt.pan_coord = filt->pan_coord; pib->filt_level = filt_level; rcu_assign_pointer(phy->pib, pib); kfree_rcu(pib_old, rcu); return 0; } static int hwsim_hw_channel(struct ieee802154_hw *hw, u8 page, u8 channel) { struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; int ret; rcu_read_lock(); pib = rcu_dereference(phy->pib); ret = hwsim_update_pib(hw, page, channel, &pib->filt, pib->filt_level); rcu_read_unlock(); return ret; } static int hwsim_hw_addr_filt(struct ieee802154_hw *hw, struct ieee802154_hw_addr_filt *filt, unsigned long changed) { struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; int ret; rcu_read_lock(); pib = rcu_dereference(phy->pib); ret = hwsim_update_pib(hw, pib->page, pib->channel, filt, pib->filt_level); rcu_read_unlock(); return ret; } static void hwsim_hw_receive(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi) { struct ieee802154_hdr hdr; struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; rcu_read_lock(); pib = rcu_dereference(phy->pib); if (!pskb_may_pull(skb, 3)) { dev_dbg(hw->parent, "invalid frame\n"); goto drop; } memcpy(&hdr, skb->data, 3); /* Level 4 filtering: Frame fields validity */ if (pib->filt_level == IEEE802154_FILTERING_4_FRAME_FIELDS) { /* a) Drop reserved frame types */ switch (mac_cb(skb)->type) { case IEEE802154_FC_TYPE_BEACON: case IEEE802154_FC_TYPE_DATA: case IEEE802154_FC_TYPE_ACK: case IEEE802154_FC_TYPE_MAC_CMD: break; default: dev_dbg(hw->parent, "unrecognized frame type 0x%x\n", mac_cb(skb)->type); goto drop; } /* b) Drop reserved frame versions */ switch (hdr.fc.version) { case IEEE802154_2003_STD: case IEEE802154_2006_STD: case IEEE802154_STD: break; default: dev_dbg(hw->parent, "unrecognized frame version 0x%x\n", hdr.fc.version); goto drop; } /* c) PAN ID constraints */ if ((mac_cb(skb)->dest.mode == IEEE802154_ADDR_LONG || mac_cb(skb)->dest.mode == IEEE802154_ADDR_SHORT) && mac_cb(skb)->dest.pan_id != pib->filt.pan_id && mac_cb(skb)->dest.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST)) { dev_dbg(hw->parent, "unrecognized PAN ID %04x\n", le16_to_cpu(mac_cb(skb)->dest.pan_id)); goto drop; } /* d1) Short address constraints */ if (mac_cb(skb)->dest.mode == IEEE802154_ADDR_SHORT && mac_cb(skb)->dest.short_addr != pib->filt.short_addr && mac_cb(skb)->dest.short_addr != cpu_to_le16(IEEE802154_ADDR_BROADCAST)) { dev_dbg(hw->parent, "unrecognized short address %04x\n", le16_to_cpu(mac_cb(skb)->dest.short_addr)); goto drop; } /* d2) Extended address constraints */ if (mac_cb(skb)->dest.mode == IEEE802154_ADDR_LONG && mac_cb(skb)->dest.extended_addr != pib->filt.ieee_addr) { dev_dbg(hw->parent, "unrecognized long address 0x%016llx\n", mac_cb(skb)->dest.extended_addr); goto drop; } /* d4) Specific PAN coordinator case (no parent) */ if ((mac_cb(skb)->type == IEEE802154_FC_TYPE_DATA || mac_cb(skb)->type == IEEE802154_FC_TYPE_MAC_CMD) && mac_cb(skb)->dest.mode == IEEE802154_ADDR_NONE) { dev_dbg(hw->parent, "relaying is not supported\n"); goto drop; } /* e) Beacon frames follow specific PAN ID rules */ if (mac_cb(skb)->type == IEEE802154_FC_TYPE_BEACON && pib->filt.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST) && mac_cb(skb)->dest.pan_id != pib->filt.pan_id) { dev_dbg(hw->parent, "invalid beacon PAN ID %04x\n", le16_to_cpu(mac_cb(skb)->dest.pan_id)); goto drop; } } rcu_read_unlock(); ieee802154_rx_irqsafe(hw, skb, lqi); return; drop: rcu_read_unlock(); kfree_skb(skb); } static int hwsim_hw_xmit(struct ieee802154_hw *hw, struct sk_buff *skb) { struct hwsim_phy *current_phy = hw->priv; struct hwsim_pib *current_pib, *endpoint_pib; struct hwsim_edge_info *einfo; struct hwsim_edge *e; WARN_ON(current_phy->suspended); rcu_read_lock(); current_pib = rcu_dereference(current_phy->pib); list_for_each_entry_rcu(e, &current_phy->edges, list) { /* Can be changed later in rx_irqsafe, but this is only a * performance tweak. Received radio should drop the frame * in mac802154 stack anyway... so we don't need to be * 100% of locking here to check on suspended */ if (e->endpoint->suspended) continue; endpoint_pib = rcu_dereference(e->endpoint->pib); if (current_pib->page == endpoint_pib->page && current_pib->channel == endpoint_pib->channel) { struct sk_buff *newskb = pskb_copy(skb, GFP_ATOMIC); einfo = rcu_dereference(e->info); if (newskb) hwsim_hw_receive(e->endpoint->hw, newskb, einfo->lqi); } } rcu_read_unlock(); ieee802154_xmit_complete(hw, skb, false); return 0; } static int hwsim_hw_start(struct ieee802154_hw *hw) { struct hwsim_phy *phy = hw->priv; phy->suspended = false; return 0; } static void hwsim_hw_stop(struct ieee802154_hw *hw) { struct hwsim_phy *phy = hw->priv; phy->suspended = true; } static int hwsim_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on) { enum ieee802154_filtering_level filt_level; struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; int ret; if (on) filt_level = IEEE802154_FILTERING_NONE; else filt_level = IEEE802154_FILTERING_4_FRAME_FIELDS; rcu_read_lock(); pib = rcu_dereference(phy->pib); ret = hwsim_update_pib(hw, pib->page, pib->channel, &pib->filt, filt_level); rcu_read_unlock(); return ret; } static const struct ieee802154_ops hwsim_ops = { .owner = THIS_MODULE, .xmit_async = hwsim_hw_xmit, .ed = hwsim_hw_ed, .set_channel = hwsim_hw_channel, .start = hwsim_hw_start, .stop = hwsim_hw_stop, .set_promiscuous_mode = hwsim_set_promiscuous_mode, .set_hw_addr_filt = hwsim_hw_addr_filt, }; static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) { return hwsim_add_one(info, &mac802154hwsim_dev->dev, false); } static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) { struct hwsim_phy *phy, *tmp; s64 idx = -1; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]) return -EINVAL; idx = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); mutex_lock(&hwsim_phys_lock); list_for_each_entry_safe(phy, tmp, &hwsim_phys, list) { if (idx == phy->idx) { hwsim_del(phy); mutex_unlock(&hwsim_phys_lock); return 0; } } mutex_unlock(&hwsim_phys_lock); return -ENODEV; } static int append_radio_msg(struct sk_buff *skb, struct hwsim_phy *phy) { struct nlattr *nl_edges, *nl_edge; struct hwsim_edge_info *einfo; struct hwsim_edge *e; int ret; ret = nla_put_u32(skb, MAC802154_HWSIM_ATTR_RADIO_ID, phy->idx); if (ret < 0) return ret; rcu_read_lock(); if (list_empty(&phy->edges)) { rcu_read_unlock(); return 0; } nl_edges = nla_nest_start_noflag(skb, MAC802154_HWSIM_ATTR_RADIO_EDGES); if (!nl_edges) { rcu_read_unlock(); return -ENOBUFS; } list_for_each_entry_rcu(e, &phy->edges, list) { nl_edge = nla_nest_start_noflag(skb, MAC802154_HWSIM_ATTR_RADIO_EDGE); if (!nl_edge) { rcu_read_unlock(); nla_nest_cancel(skb, nl_edges); return -ENOBUFS; } ret = nla_put_u32(skb, MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID, e->endpoint->idx); if (ret < 0) { rcu_read_unlock(); nla_nest_cancel(skb, nl_edge); nla_nest_cancel(skb, nl_edges); return ret; } einfo = rcu_dereference(e->info); ret = nla_put_u8(skb, MAC802154_HWSIM_EDGE_ATTR_LQI, einfo->lqi); if (ret < 0) { rcu_read_unlock(); nla_nest_cancel(skb, nl_edge); nla_nest_cancel(skb, nl_edges); return ret; } nla_nest_end(skb, nl_edge); } rcu_read_unlock(); nla_nest_end(skb, nl_edges); return 0; } static int hwsim_get_radio(struct sk_buff *skb, struct hwsim_phy *phy, u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; int res; hdr = genlmsg_put(skb, portid, seq, &hwsim_genl_family, flags, MAC802154_HWSIM_CMD_GET_RADIO); if (!hdr) return -EMSGSIZE; if (cb) genl_dump_check_consistent(cb, hdr); res = append_radio_msg(skb, phy); if (res < 0) goto out_err; genlmsg_end(skb, hdr); return 0; out_err: genlmsg_cancel(skb, hdr); return res; } static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info) { struct hwsim_phy *phy; struct sk_buff *skb; int idx, res = -ENODEV; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]) return -EINVAL; idx = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); mutex_lock(&hwsim_phys_lock); list_for_each_entry(phy, &hwsim_phys, list) { if (phy->idx != idx) continue; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) { res = -ENOMEM; goto out_err; } res = hwsim_get_radio(skb, phy, info->snd_portid, info->snd_seq, NULL, 0); if (res < 0) { nlmsg_free(skb); goto out_err; } res = genlmsg_reply(skb, info); break; } out_err: mutex_unlock(&hwsim_phys_lock); return res; } static int hwsim_dump_radio_nl(struct sk_buff *skb, struct netlink_callback *cb) { int idx = cb->args[0]; struct hwsim_phy *phy; int res; mutex_lock(&hwsim_phys_lock); if (idx == hwsim_radio_idx) goto done; list_for_each_entry(phy, &hwsim_phys, list) { if (phy->idx < idx) continue; res = hwsim_get_radio(skb, phy, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (res < 0) break; idx = phy->idx + 1; } cb->args[0] = idx; done: mutex_unlock(&hwsim_phys_lock); return skb->len; } /* caller need to held hwsim_phys_lock */ static struct hwsim_phy *hwsim_get_radio_by_id(uint32_t idx) { struct hwsim_phy *phy; list_for_each_entry(phy, &hwsim_phys, list) { if (phy->idx == idx) return phy; } return NULL; } static const struct nla_policy hwsim_edge_policy[MAC802154_HWSIM_EDGE_ATTR_MAX + 1] = { [MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] = { .type = NLA_U32 }, [MAC802154_HWSIM_EDGE_ATTR_LQI] = { .type = NLA_U8 }, }; static struct hwsim_edge *hwsim_alloc_edge(struct hwsim_phy *endpoint, u8 lqi) { struct hwsim_edge_info *einfo; struct hwsim_edge *e; e = kzalloc(sizeof(*e), GFP_KERNEL); if (!e) return NULL; einfo = kzalloc(sizeof(*einfo), GFP_KERNEL); if (!einfo) { kfree(e); return NULL; } einfo->lqi = 0xff; rcu_assign_pointer(e->info, einfo); e->endpoint = endpoint; return e; } static void hwsim_free_edge(struct hwsim_edge *e) { struct hwsim_edge_info *einfo; rcu_read_lock(); einfo = rcu_dereference(e->info); rcu_read_unlock(); kfree_rcu(einfo, rcu); kfree_rcu(e, rcu); } static int hwsim_new_edge_nl(struct sk_buff *msg, struct genl_info *info) { struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1]; struct hwsim_phy *phy_v0, *phy_v1; struct hwsim_edge *e; u32 v0, v1; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]) return -EINVAL; v0 = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); v1 = nla_get_u32(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]); if (v0 == v1) return -EINVAL; mutex_lock(&hwsim_phys_lock); phy_v0 = hwsim_get_radio_by_id(v0); if (!phy_v0) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } phy_v1 = hwsim_get_radio_by_id(v1); if (!phy_v1) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } rcu_read_lock(); list_for_each_entry_rcu(e, &phy_v0->edges, list) { if (e->endpoint->idx == v1) { mutex_unlock(&hwsim_phys_lock); rcu_read_unlock(); return -EEXIST; } } rcu_read_unlock(); e = hwsim_alloc_edge(phy_v1, 0xff); if (!e) { mutex_unlock(&hwsim_phys_lock); return -ENOMEM; } list_add_rcu(&e->list, &phy_v0->edges); /* wait until changes are done under hwsim_phys_lock lock * should prevent of calling this function twice while * edges list has not the changes yet. */ synchronize_rcu(); mutex_unlock(&hwsim_phys_lock); return 0; } static int hwsim_del_edge_nl(struct sk_buff *msg, struct genl_info *info) { struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1]; struct hwsim_phy *phy_v0; struct hwsim_edge *e; u32 v0, v1; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]) return -EINVAL; v0 = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); v1 = nla_get_u32(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]); mutex_lock(&hwsim_phys_lock); phy_v0 = hwsim_get_radio_by_id(v0); if (!phy_v0) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } rcu_read_lock(); list_for_each_entry_rcu(e, &phy_v0->edges, list) { if (e->endpoint->idx == v1) { rcu_read_unlock(); list_del_rcu(&e->list); hwsim_free_edge(e); /* same again - wait until list changes are done */ synchronize_rcu(); mutex_unlock(&hwsim_phys_lock); return 0; } } rcu_read_unlock(); mutex_unlock(&hwsim_phys_lock); return -ENOENT; } static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info) { struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1]; struct hwsim_edge_info *einfo, *einfo_old; struct hwsim_phy *phy_v0; struct hwsim_edge *e; u32 v0, v1; u8 lqi; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] || !edge_attrs[MAC802154_HWSIM_EDGE_ATTR_LQI]) return -EINVAL; v0 = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); v1 = nla_get_u32(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]); lqi = nla_get_u8(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_LQI]); mutex_lock(&hwsim_phys_lock); phy_v0 = hwsim_get_radio_by_id(v0); if (!phy_v0) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } einfo = kzalloc(sizeof(*einfo), GFP_KERNEL); if (!einfo) { mutex_unlock(&hwsim_phys_lock); return -ENOMEM; } rcu_read_lock(); list_for_each_entry_rcu(e, &phy_v0->edges, list) { if (e->endpoint->idx == v1) { einfo->lqi = lqi; einfo_old = rcu_replace_pointer(e->info, einfo, lockdep_is_held(&hwsim_phys_lock)); rcu_read_unlock(); kfree_rcu(einfo_old, rcu); mutex_unlock(&hwsim_phys_lock); return 0; } } rcu_read_unlock(); kfree(einfo); mutex_unlock(&hwsim_phys_lock); return -ENOENT; } /* MAC802154_HWSIM netlink policy */ static const struct nla_policy hwsim_genl_policy[MAC802154_HWSIM_ATTR_MAX + 1] = { [MAC802154_HWSIM_ATTR_RADIO_ID] = { .type = NLA_U32 }, [MAC802154_HWSIM_ATTR_RADIO_EDGE] = { .type = NLA_NESTED }, [MAC802154_HWSIM_ATTR_RADIO_EDGES] = { .type = NLA_NESTED }, }; /* Generic Netlink operations array */ static const struct genl_small_ops hwsim_nl_ops[] = { { .cmd = MAC802154_HWSIM_CMD_NEW_RADIO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_new_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_RADIO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_del_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_GET_RADIO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_get_radio_nl, .dumpit = hwsim_dump_radio_nl, }, { .cmd = MAC802154_HWSIM_CMD_NEW_EDGE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_new_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_EDGE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_del_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_SET_EDGE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_set_edge_lqi, .flags = GENL_UNS_ADMIN_PERM, }, }; static struct genl_family hwsim_genl_family __ro_after_init = { .name = "MAC802154_HWSIM", .version = 1, .maxattr = MAC802154_HWSIM_ATTR_MAX, .policy = hwsim_genl_policy, .module = THIS_MODULE, .small_ops = hwsim_nl_ops, .n_small_ops = ARRAY_SIZE(hwsim_nl_ops), .resv_start_op = MAC802154_HWSIM_CMD_NEW_EDGE + 1, .mcgrps = hwsim_mcgrps, .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), }; static void hwsim_mcast_config_msg(struct sk_buff *mcast_skb, struct genl_info *info) { if (info) genl_notify(&hwsim_genl_family, mcast_skb, info, HWSIM_MCGRP_CONFIG, GFP_KERNEL); else genlmsg_multicast(&hwsim_genl_family, mcast_skb, 0, HWSIM_MCGRP_CONFIG, GFP_KERNEL); } static void hwsim_mcast_new_radio(struct genl_info *info, struct hwsim_phy *phy) { struct sk_buff *mcast_skb; void *data; mcast_skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!mcast_skb) return; data = genlmsg_put(mcast_skb, 0, 0, &hwsim_genl_family, 0, MAC802154_HWSIM_CMD_NEW_RADIO); if (!data) goto out_err; if (append_radio_msg(mcast_skb, phy) < 0) goto out_err; genlmsg_end(mcast_skb, data); hwsim_mcast_config_msg(mcast_skb, info); return; out_err: genlmsg_cancel(mcast_skb, data); nlmsg_free(mcast_skb); } static void hwsim_edge_unsubscribe_me(struct hwsim_phy *phy) { struct hwsim_phy *tmp; struct hwsim_edge *e; rcu_read_lock(); /* going to all phy edges and remove phy from it */ list_for_each_entry(tmp, &hwsim_phys, list) { list_for_each_entry_rcu(e, &tmp->edges, list) { if (e->endpoint->idx == phy->idx) { list_del_rcu(&e->list); hwsim_free_edge(e); } } } rcu_read_unlock(); synchronize_rcu(); } static int hwsim_subscribe_all_others(struct hwsim_phy *phy) { struct hwsim_phy *sub; struct hwsim_edge *e; list_for_each_entry(sub, &hwsim_phys, list) { e = hwsim_alloc_edge(sub, 0xff); if (!e) goto me_fail; list_add_rcu(&e->list, &phy->edges); } list_for_each_entry(sub, &hwsim_phys, list) { e = hwsim_alloc_edge(phy, 0xff); if (!e) goto sub_fail; list_add_rcu(&e->list, &sub->edges); } return 0; sub_fail: hwsim_edge_unsubscribe_me(phy); me_fail: rcu_read_lock(); list_for_each_entry_rcu(e, &phy->edges, list) { list_del_rcu(&e->list); hwsim_free_edge(e); } rcu_read_unlock(); return -ENOMEM; } static int hwsim_add_one(struct genl_info *info, struct device *dev, bool init) { struct ieee802154_hw *hw; struct hwsim_phy *phy; struct hwsim_pib *pib; int idx; int err; idx = hwsim_radio_idx++; hw = ieee802154_alloc_hw(sizeof(*phy), &hwsim_ops); if (!hw) return -ENOMEM; phy = hw->priv; phy->hw = hw; /* 868 MHz BPSK 802.15.4-2003 */ hw->phy->supported.channels[0] |= 1; /* 915 MHz BPSK 802.15.4-2003 */ hw->phy->supported.channels[0] |= 0x7fe; /* 2.4 GHz O-QPSK 802.15.4-2003 */ hw->phy->supported.channels[0] |= 0x7FFF800; /* 868 MHz ASK 802.15.4-2006 */ hw->phy->supported.channels[1] |= 1; /* 915 MHz ASK 802.15.4-2006 */ hw->phy->supported.channels[1] |= 0x7fe; /* 868 MHz O-QPSK 802.15.4-2006 */ hw->phy->supported.channels[2] |= 1; /* 915 MHz O-QPSK 802.15.4-2006 */ hw->phy->supported.channels[2] |= 0x7fe; /* 2.4 GHz CSS 802.15.4a-2007 */ hw->phy->supported.channels[3] |= 0x3fff; /* UWB Sub-gigahertz 802.15.4a-2007 */ hw->phy->supported.channels[4] |= 1; /* UWB Low band 802.15.4a-2007 */ hw->phy->supported.channels[4] |= 0x1e; /* UWB High band 802.15.4a-2007 */ hw->phy->supported.channels[4] |= 0xffe0; /* 750 MHz O-QPSK 802.15.4c-2009 */ hw->phy->supported.channels[5] |= 0xf; /* 750 MHz MPSK 802.15.4c-2009 */ hw->phy->supported.channels[5] |= 0xf0; /* 950 MHz BPSK 802.15.4d-2009 */ hw->phy->supported.channels[6] |= 0x3ff; /* 950 MHz GFSK 802.15.4d-2009 */ hw->phy->supported.channels[6] |= 0x3ffc00; ieee802154_random_extended_addr(&hw->phy->perm_extended_addr); /* hwsim phy channel 13 as default */ hw->phy->current_channel = 13; pib = kzalloc(sizeof(*pib), GFP_KERNEL); if (!pib) { err = -ENOMEM; goto err_pib; } pib->channel = 13; pib->filt.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); pib->filt.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); rcu_assign_pointer(phy->pib, pib); phy->idx = idx; INIT_LIST_HEAD(&phy->edges); hw->flags = IEEE802154_HW_PROMISCUOUS; hw->parent = dev; err = ieee802154_register_hw(hw); if (err) goto err_reg; mutex_lock(&hwsim_phys_lock); if (init) { err = hwsim_subscribe_all_others(phy); if (err < 0) { mutex_unlock(&hwsim_phys_lock); goto err_subscribe; } } list_add_tail(&phy->list, &hwsim_phys); mutex_unlock(&hwsim_phys_lock); hwsim_mcast_new_radio(info, phy); return idx; err_subscribe: ieee802154_unregister_hw(phy->hw); err_reg: kfree(pib); err_pib: ieee802154_free_hw(phy->hw); return err; } static void hwsim_del(struct hwsim_phy *phy) { struct hwsim_pib *pib; struct hwsim_edge *e; hwsim_edge_unsubscribe_me(phy); list_del(&phy->list); rcu_read_lock(); list_for_each_entry_rcu(e, &phy->edges, list) { list_del_rcu(&e->list); hwsim_free_edge(e); } pib = rcu_dereference(phy->pib); rcu_read_unlock(); kfree_rcu(pib, rcu); ieee802154_unregister_hw(phy->hw); ieee802154_free_hw(phy->hw); } static int hwsim_probe(struct platform_device *pdev) { struct hwsim_phy *phy, *tmp; int err, i; for (i = 0; i < 2; i++) { err = hwsim_add_one(NULL, &pdev->dev, true); if (err < 0) goto err_slave; } dev_info(&pdev->dev, "Added 2 mac802154 hwsim hardware radios\n"); return 0; err_slave: mutex_lock(&hwsim_phys_lock); list_for_each_entry_safe(phy, tmp, &hwsim_phys, list) hwsim_del(phy); mutex_unlock(&hwsim_phys_lock); return err; } static void hwsim_remove(struct platform_device *pdev) { struct hwsim_phy *phy, *tmp; mutex_lock(&hwsim_phys_lock); list_for_each_entry_safe(phy, tmp, &hwsim_phys, list) hwsim_del(phy); mutex_unlock(&hwsim_phys_lock); } static struct platform_driver mac802154hwsim_driver = { .probe = hwsim_probe, .remove_new = hwsim_remove, .driver = { .name = "mac802154_hwsim", }, }; static __init int hwsim_init_module(void) { int rc; rc = genl_register_family(&hwsim_genl_family); if (rc) return rc; mac802154hwsim_dev = platform_device_register_simple("mac802154_hwsim", -1, NULL, 0); if (IS_ERR(mac802154hwsim_dev)) { rc = PTR_ERR(mac802154hwsim_dev); goto platform_dev; } rc = platform_driver_register(&mac802154hwsim_driver); if (rc < 0) goto platform_drv; return 0; platform_drv: platform_device_unregister(mac802154hwsim_dev); platform_dev: genl_unregister_family(&hwsim_genl_family); return rc; } static __exit void hwsim_remove_module(void) { genl_unregister_family(&hwsim_genl_family); platform_driver_unregister(&mac802154hwsim_driver); platform_device_unregister(mac802154hwsim_dev); } module_init(hwsim_init_module); module_exit(hwsim_remove_module);
51 55 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 /* SPDX-License-Identifier: GPL-2.0-only */ /* * V9FS definitions. * * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #ifndef FS_9P_V9FS_H #define FS_9P_V9FS_H #include <linux/backing-dev.h> #include <linux/netfs.h> /** * enum p9_session_flags - option flags for each 9P session * @V9FS_PROTO_2000U: whether or not to use 9P2000.u extensions * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client. * @V9FS_ACCESS_ANY: use a single attach for all users * @V9FS_ACCESS_MASK: bit mask of different ACCESS options * @V9FS_POSIX_ACL: POSIX ACLs are enforced * * Session flags reflect options selected by users at mount time */ #define V9FS_ACCESS_ANY (V9FS_ACCESS_SINGLE | \ V9FS_ACCESS_USER | \ V9FS_ACCESS_CLIENT) #define V9FS_ACCESS_MASK V9FS_ACCESS_ANY #define V9FS_ACL_MASK V9FS_POSIX_ACL enum p9_session_flags { V9FS_PROTO_2000U = 0x01, V9FS_PROTO_2000L = 0x02, V9FS_ACCESS_SINGLE = 0x04, V9FS_ACCESS_USER = 0x08, V9FS_ACCESS_CLIENT = 0x10, V9FS_POSIX_ACL = 0x20, V9FS_NO_XATTR = 0x40, V9FS_IGNORE_QV = 0x80, /* ignore qid.version for cache hints */ V9FS_DIRECT_IO = 0x100, V9FS_SYNC = 0x200 }; /** * enum p9_cache_shortcuts - human readable cache preferences * @CACHE_SC_NONE: disable all caches * @CACHE_SC_READAHEAD: only provide caching for readahead * @CACHE_SC_MMAP: provide caching to enable mmap * @CACHE_SC_LOOSE: non-coherent caching for files and meta data * @CACHE_SC_FSCACHE: persistent non-coherent caching for files and meta-data * */ enum p9_cache_shortcuts { CACHE_SC_NONE = 0b00000000, CACHE_SC_READAHEAD = 0b00000001, CACHE_SC_MMAP = 0b00000101, CACHE_SC_LOOSE = 0b00001111, CACHE_SC_FSCACHE = 0b10001111, }; /** * enum p9_cache_bits - possible values of ->cache * @CACHE_NONE: caches disabled * @CACHE_FILE: file caching (open to close) * @CACHE_META: meta-data and directory caching * @CACHE_WRITEBACK: write-back caching for files * @CACHE_LOOSE: don't check cache consistency * @CACHE_FSCACHE: local persistent caches * */ enum p9_cache_bits { CACHE_NONE = 0b00000000, CACHE_FILE = 0b00000001, CACHE_META = 0b00000010, CACHE_WRITEBACK = 0b00000100, CACHE_LOOSE = 0b00001000, CACHE_FSCACHE = 0b10000000, }; /** * struct v9fs_session_info - per-instance session information * @flags: session options of type &p9_session_flags * @nodev: set to 1 to disable device mapping * @debug: debug level * @afid: authentication handle * @cache: cache mode of type &p9_cache_bits * @cachetag: the tag of the cache associated with this session * @fscache: session cookie associated with FS-Cache * @uname: string user name to mount hierarchy as * @aname: mount specifier for remote hierarchy * @maxdata: maximum data to be sent/recvd per protocol message * @dfltuid: default numeric userid to mount hierarchy as * @dfltgid: default numeric groupid to mount hierarchy as * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy * @clnt: reference to 9P network client instantiated for this session * @slist: reference to list of registered 9p sessions * * This structure holds state for each session instance established during * a sys_mount() . * * Bugs: there seems to be a lot of state which could be condensed and/or * removed. */ struct v9fs_session_info { /* options */ unsigned int flags; unsigned char nodev; unsigned short debug; unsigned int afid; unsigned int cache; #ifdef CONFIG_9P_FSCACHE char *cachetag; struct fscache_volume *fscache; #endif char *uname; /* user name to mount as */ char *aname; /* name of remote hierarchy being mounted */ unsigned int maxdata; /* max data for client interface */ kuid_t dfltuid; /* default uid/muid for legacy support */ kgid_t dfltgid; /* default gid for legacy support */ kuid_t uid; /* if ACCESS_SINGLE, the uid that has access */ struct p9_client *clnt; /* 9p client */ struct list_head slist; /* list of sessions registered with v9fs */ struct rw_semaphore rename_sem; long session_lock_timeout; /* retry interval for blocking locks */ }; /* cache_validity flags */ #define V9FS_INO_INVALID_ATTR 0x01 struct v9fs_inode { struct netfs_inode netfs; /* Netfslib context and vfs inode */ struct p9_qid qid; unsigned int cache_validity; struct mutex v_mutex; }; static inline struct v9fs_inode *V9FS_I(const struct inode *inode) { return container_of(inode, struct v9fs_inode, netfs.inode); } static inline struct fscache_cookie *v9fs_inode_cookie(struct v9fs_inode *v9inode) { #ifdef CONFIG_9P_FSCACHE return netfs_i_cookie(&v9inode->netfs); #else return NULL; #endif } static inline struct fscache_volume *v9fs_session_cache(struct v9fs_session_info *v9ses) { #ifdef CONFIG_9P_FSCACHE return v9ses->fscache; #else return NULL; #endif } extern int v9fs_show_options(struct seq_file *m, struct dentry *root); struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, const char *dev_name, char *data); extern void v9fs_session_close(struct v9fs_session_info *v9ses); extern void v9fs_session_cancel(struct v9fs_session_info *v9ses); extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d); extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); extern int v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid, bool new); extern const struct inode_operations v9fs_dir_inode_operations_dotl; extern const struct inode_operations v9fs_file_inode_operations_dotl; extern const struct inode_operations v9fs_symlink_inode_operations_dotl; extern const struct netfs_request_ops v9fs_req_ops; extern struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid, bool new); /* other default globals */ #define V9FS_PORT 564 #define V9FS_DEFUSER "nobody" #define V9FS_DEFANAME "" #define V9FS_DEFUID KUIDT_INIT(-2) #define V9FS_DEFGID KGIDT_INIT(-2) static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) { return inode->i_sb->s_fs_info; } static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry) { return dentry->d_sb->s_fs_info; } static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses) { return v9ses->flags & V9FS_PROTO_2000U; } static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses) { return v9ses->flags & V9FS_PROTO_2000L; } /** * v9fs_get_inode_from_fid - Helper routine to populate an inode by * issuing a attribute request * @v9ses: session information * @fid: fid to issue attribute request for * @sb: superblock on which to create inode * */ static inline struct inode * v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, bool new) { if (v9fs_proto_dotl(v9ses)) return v9fs_fid_iget_dotl(sb, fid, new); else return v9fs_fid_iget(sb, fid, new); } #endif
35243 4252 35104 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 // SPDX-License-Identifier: GPL-2.0 #include <linux/fault-inject.h> #include <linux/error-injection.h> #include <linux/mm.h> static struct { struct fault_attr attr; bool ignore_gfp_highmem; bool ignore_gfp_reclaim; u32 min_order; } fail_page_alloc = { .attr = FAULT_ATTR_INITIALIZER, .ignore_gfp_reclaim = true, .ignore_gfp_highmem = true, .min_order = 1, }; static int __init setup_fail_page_alloc(char *str) { return setup_fault_attr(&fail_page_alloc.attr, str); } __setup("fail_page_alloc=", setup_fail_page_alloc); bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { int flags = 0; if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL) return false; if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM)) return false; if (fail_page_alloc.ignore_gfp_reclaim && (gfp_mask & __GFP_DIRECT_RECLAIM)) return false; /* See comment in __should_failslab() */ if (gfp_mask & __GFP_NOWARN) flags |= FAULT_NOWARN; return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); } ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS static int __init fail_page_alloc_debugfs(void) { umode_t mode = S_IFREG | 0600; struct dentry *dir; dir = fault_create_debugfs_attr("fail_page_alloc", NULL, &fail_page_alloc.attr); debugfs_create_bool("ignore-gfp-wait", mode, dir, &fail_page_alloc.ignore_gfp_reclaim); debugfs_create_bool("ignore-gfp-highmem", mode, dir, &fail_page_alloc.ignore_gfp_highmem); debugfs_create_u32("min-order", mode, dir, &fail_page_alloc.min_order); return 0; } late_initcall(fail_page_alloc_debugfs); #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
4319 1018 7494 6325 6324 3 681 680 194 10 527 795 201 10 10 792 791 8 1 4372 1386 403 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM writeback #if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_WRITEBACK_H #include <linux/tracepoint.h> #include <linux/backing-dev.h> #include <linux/writeback.h> #define show_inode_state(state) \ __print_flags(state, "|", \ {I_DIRTY_SYNC, "I_DIRTY_SYNC"}, \ {I_DIRTY_DATASYNC, "I_DIRTY_DATASYNC"}, \ {I_DIRTY_PAGES, "I_DIRTY_PAGES"}, \ {I_NEW, "I_NEW"}, \ {I_WILL_FREE, "I_WILL_FREE"}, \ {I_FREEING, "I_FREEING"}, \ {I_CLEAR, "I_CLEAR"}, \ {I_SYNC, "I_SYNC"}, \ {I_DIRTY_TIME, "I_DIRTY_TIME"}, \ {I_REFERENCED, "I_REFERENCED"} \ ) /* enums need to be exported to user space */ #undef EM #undef EMe #define EM(a,b) TRACE_DEFINE_ENUM(a); #define EMe(a,b) TRACE_DEFINE_ENUM(a); #define WB_WORK_REASON \ EM( WB_REASON_BACKGROUND, "background") \ EM( WB_REASON_VMSCAN, "vmscan") \ EM( WB_REASON_SYNC, "sync") \ EM( WB_REASON_PERIODIC, "periodic") \ EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \ EM( WB_REASON_FS_FREE_SPACE, "fs_free_space") \ EM( WB_REASON_FORKER_THREAD, "forker_thread") \ EMe(WB_REASON_FOREIGN_FLUSH, "foreign_flush") WB_WORK_REASON /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. */ #undef EM #undef EMe #define EM(a,b) { a, b }, #define EMe(a,b) { a, b } struct wb_writeback_work; DECLARE_EVENT_CLASS(writeback_folio_template, TP_PROTO(struct folio *folio, struct address_space *mapping), TP_ARGS(folio, mapping), TP_STRUCT__entry ( __array(char, name, 32) __field(ino_t, ino) __field(pgoff_t, index) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : NULL), 32); __entry->ino = (mapping && mapping->host) ? mapping->host->i_ino : 0; __entry->index = folio->index; ), TP_printk("bdi %s: ino=%lu index=%lu", __entry->name, (unsigned long)__entry->ino, __entry->index ) ); DEFINE_EVENT(writeback_folio_template, writeback_dirty_folio, TP_PROTO(struct folio *folio, struct address_space *mapping), TP_ARGS(folio, mapping) ); DEFINE_EVENT(writeback_folio_template, folio_wait_writeback, TP_PROTO(struct folio *folio, struct address_space *mapping), TP_ARGS(folio, mapping) ); DECLARE_EVENT_CLASS(writeback_dirty_inode_template, TP_PROTO(struct inode *inode, int flags), TP_ARGS(inode, flags), TP_STRUCT__entry ( __array(char, name, 32) __field(ino_t, ino) __field(unsigned long, state) __field(unsigned long, flags) ), TP_fast_assign( struct backing_dev_info *bdi = inode_to_bdi(inode); /* may be called for files on pseudo FSes w/ unregistered bdi */ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->flags = flags; ), TP_printk("bdi %s: ino=%lu state=%s flags=%s", __entry->name, (unsigned long)__entry->ino, show_inode_state(__entry->state), show_inode_state(__entry->flags) ) ); DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty, TP_PROTO(struct inode *inode, int flags), TP_ARGS(inode, flags) ); DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start, TP_PROTO(struct inode *inode, int flags), TP_ARGS(inode, flags) ); DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode, TP_PROTO(struct inode *inode, int flags), TP_ARGS(inode, flags) ); #ifdef CREATE_TRACE_POINTS #ifdef CONFIG_CGROUP_WRITEBACK static inline ino_t __trace_wb_assign_cgroup(struct bdi_writeback *wb) { return cgroup_ino(wb->memcg_css->cgroup); } static inline ino_t __trace_wbc_assign_cgroup(struct writeback_control *wbc) { if (wbc->wb) return __trace_wb_assign_cgroup(wbc->wb); else return 1; } #else /* CONFIG_CGROUP_WRITEBACK */ static inline ino_t __trace_wb_assign_cgroup(struct bdi_writeback *wb) { return 1; } static inline ino_t __trace_wbc_assign_cgroup(struct writeback_control *wbc) { return 1; } #endif /* CONFIG_CGROUP_WRITEBACK */ #endif /* CREATE_TRACE_POINTS */ #ifdef CONFIG_CGROUP_WRITEBACK TRACE_EVENT(inode_foreign_history, TP_PROTO(struct inode *inode, struct writeback_control *wbc, unsigned int history), TP_ARGS(inode, wbc, history), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, ino) __field(ino_t, cgroup_ino) __field(unsigned int, history) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); __entry->history = history; ), TP_printk("bdi %s: ino=%lu cgroup_ino=%lu history=0x%x", __entry->name, (unsigned long)__entry->ino, (unsigned long)__entry->cgroup_ino, __entry->history ) ); TRACE_EVENT(inode_switch_wbs, TP_PROTO(struct inode *inode, struct bdi_writeback *old_wb, struct bdi_writeback *new_wb), TP_ARGS(inode, old_wb, new_wb), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, ino) __field(ino_t, old_cgroup_ino) __field(ino_t, new_cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32); __entry->ino = inode->i_ino; __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb); __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb); ), TP_printk("bdi %s: ino=%lu old_cgroup_ino=%lu new_cgroup_ino=%lu", __entry->name, (unsigned long)__entry->ino, (unsigned long)__entry->old_cgroup_ino, (unsigned long)__entry->new_cgroup_ino ) ); TRACE_EVENT(track_foreign_dirty, TP_PROTO(struct folio *folio, struct bdi_writeback *wb), TP_ARGS(folio, wb), TP_STRUCT__entry( __array(char, name, 32) __field(u64, bdi_id) __field(ino_t, ino) __field(unsigned int, memcg_id) __field(ino_t, cgroup_ino) __field(ino_t, page_cgroup_ino) ), TP_fast_assign( struct address_space *mapping = folio_mapping(folio); struct inode *inode = mapping ? mapping->host : NULL; strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->bdi_id = wb->bdi->id; __entry->ino = inode ? inode->i_ino : 0; __entry->memcg_id = wb->memcg_css->id; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); __entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup); ), TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu", __entry->name, __entry->bdi_id, (unsigned long)__entry->ino, __entry->memcg_id, (unsigned long)__entry->cgroup_ino, (unsigned long)__entry->page_cgroup_ino ) ); TRACE_EVENT(flush_foreign, TP_PROTO(struct bdi_writeback *wb, unsigned int frn_bdi_id, unsigned int frn_memcg_id), TP_ARGS(wb, frn_bdi_id, frn_memcg_id), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, cgroup_ino) __field(unsigned int, frn_bdi_id) __field(unsigned int, frn_memcg_id) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); __entry->frn_bdi_id = frn_bdi_id; __entry->frn_memcg_id = frn_memcg_id; ), TP_printk("bdi %s: cgroup_ino=%lu frn_bdi_id=%u frn_memcg_id=%u", __entry->name, (unsigned long)__entry->cgroup_ino, __entry->frn_bdi_id, __entry->frn_memcg_id ) ); #endif DECLARE_EVENT_CLASS(writeback_write_inode_template, TP_PROTO(struct inode *inode, struct writeback_control *wbc), TP_ARGS(inode, wbc), TP_STRUCT__entry ( __array(char, name, 32) __field(ino_t, ino) __field(int, sync_mode) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); ), TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%lu", __entry->name, (unsigned long)__entry->ino, __entry->sync_mode, (unsigned long)__entry->cgroup_ino ) ); DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode_start, TP_PROTO(struct inode *inode, struct writeback_control *wbc), TP_ARGS(inode, wbc) ); DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode, TP_PROTO(struct inode *inode, struct writeback_control *wbc), TP_ARGS(inode, wbc) ); DECLARE_EVENT_CLASS(writeback_work_class, TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work), TP_ARGS(wb, work), TP_STRUCT__entry( __array(char, name, 32) __field(long, nr_pages) __field(dev_t, sb_dev) __field(int, sync_mode) __field(int, for_kupdate) __field(int, range_cyclic) __field(int, for_background) __field(int, reason) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; __entry->for_kupdate = work->for_kupdate; __entry->range_cyclic = work->range_cyclic; __entry->for_background = work->for_background; __entry->reason = work->reason; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d " "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%lu", __entry->name, MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev), __entry->nr_pages, __entry->sync_mode, __entry->for_kupdate, __entry->range_cyclic, __entry->for_background, __print_symbolic(__entry->reason, WB_WORK_REASON), (unsigned long)__entry->cgroup_ino ) ); #define DEFINE_WRITEBACK_WORK_EVENT(name) \ DEFINE_EVENT(writeback_work_class, name, \ TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work), \ TP_ARGS(wb, work)) DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); DEFINE_WRITEBACK_WORK_EVENT(writeback_start); DEFINE_WRITEBACK_WORK_EVENT(writeback_written); DEFINE_WRITEBACK_WORK_EVENT(writeback_wait); TRACE_EVENT(writeback_pages_written, TP_PROTO(long pages_written), TP_ARGS(pages_written), TP_STRUCT__entry( __field(long, pages) ), TP_fast_assign( __entry->pages = pages_written; ), TP_printk("%ld", __entry->pages) ); DECLARE_EVENT_CLASS(writeback_class, TP_PROTO(struct bdi_writeback *wb), TP_ARGS(wb), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: cgroup_ino=%lu", __entry->name, (unsigned long)__entry->cgroup_ino ) ); #define DEFINE_WRITEBACK_EVENT(name) \ DEFINE_EVENT(writeback_class, name, \ TP_PROTO(struct bdi_writeback *wb), \ TP_ARGS(wb)) DEFINE_WRITEBACK_EVENT(writeback_wake_background); TRACE_EVENT(writeback_bdi_register, TP_PROTO(struct backing_dev_info *bdi), TP_ARGS(bdi), TP_STRUCT__entry( __array(char, name, 32) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); ), TP_printk("bdi %s", __entry->name ) ); DECLARE_EVENT_CLASS(wbc_class, TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), TP_ARGS(wbc, bdi), TP_STRUCT__entry( __array(char, name, 32) __field(long, nr_to_write) __field(long, pages_skipped) __field(int, sync_mode) __field(int, for_kupdate) __field(int, for_background) __field(int, for_reclaim) __field(int, range_cyclic) __field(long, range_start) __field(long, range_end) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->sync_mode = wbc->sync_mode; __entry->for_kupdate = wbc->for_kupdate; __entry->for_background = wbc->for_background; __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; __entry->range_start = (long)wbc->range_start; __entry->range_end = (long)wbc->range_end; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); ), TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " "bgrd=%d reclm=%d cyclic=%d " "start=0x%lx end=0x%lx cgroup_ino=%lu", __entry->name, __entry->nr_to_write, __entry->pages_skipped, __entry->sync_mode, __entry->for_kupdate, __entry->for_background, __entry->for_reclaim, __entry->range_cyclic, __entry->range_start, __entry->range_end, (unsigned long)__entry->cgroup_ino ) ) #define DEFINE_WBC_EVENT(name) \ DEFINE_EVENT(wbc_class, name, \ TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \ TP_ARGS(wbc, bdi)) DEFINE_WBC_EVENT(wbc_writepage); TRACE_EVENT(writeback_queue_io, TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work, unsigned long dirtied_before, int moved), TP_ARGS(wb, work, dirtied_before, moved), TP_STRUCT__entry( __array(char, name, 32) __field(unsigned long, older) __field(long, age) __field(int, moved) __field(int, reason) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->older = dirtied_before; __entry->age = (jiffies - dirtied_before) * 1000 / HZ; __entry->moved = moved; __entry->reason = work->reason; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%lu", __entry->name, __entry->older, /* dirtied_before in jiffies */ __entry->age, /* dirtied_before in relative milliseconds */ __entry->moved, __print_symbolic(__entry->reason, WB_WORK_REASON), (unsigned long)__entry->cgroup_ino ) ); TRACE_EVENT(global_dirty_state, TP_PROTO(unsigned long background_thresh, unsigned long dirty_thresh ), TP_ARGS(background_thresh, dirty_thresh ), TP_STRUCT__entry( __field(unsigned long, nr_dirty) __field(unsigned long, nr_writeback) __field(unsigned long, background_thresh) __field(unsigned long, dirty_thresh) __field(unsigned long, dirty_limit) __field(unsigned long, nr_dirtied) __field(unsigned long, nr_written) ), TP_fast_assign( __entry->nr_dirty = global_node_page_state(NR_FILE_DIRTY); __entry->nr_writeback = global_node_page_state(NR_WRITEBACK); __entry->nr_dirtied = global_node_page_state(NR_DIRTIED); __entry->nr_written = global_node_page_state(NR_WRITTEN); __entry->background_thresh = background_thresh; __entry->dirty_thresh = dirty_thresh; __entry->dirty_limit = global_wb_domain.dirty_limit; ), TP_printk("dirty=%lu writeback=%lu " "bg_thresh=%lu thresh=%lu limit=%lu " "dirtied=%lu written=%lu", __entry->nr_dirty, __entry->nr_writeback, __entry->background_thresh, __entry->dirty_thresh, __entry->dirty_limit, __entry->nr_dirtied, __entry->nr_written ) ); #define KBps(x) ((x) << (PAGE_SHIFT - 10)) TRACE_EVENT(bdi_dirty_ratelimit, TP_PROTO(struct bdi_writeback *wb, unsigned long dirty_rate, unsigned long task_ratelimit), TP_ARGS(wb, dirty_rate, task_ratelimit), TP_STRUCT__entry( __array(char, bdi, 32) __field(unsigned long, write_bw) __field(unsigned long, avg_write_bw) __field(unsigned long, dirty_rate) __field(unsigned long, dirty_ratelimit) __field(unsigned long, task_ratelimit) __field(unsigned long, balanced_dirty_ratelimit) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->write_bw = KBps(wb->write_bandwidth); __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); __entry->dirty_rate = KBps(dirty_rate); __entry->dirty_ratelimit = KBps(wb->dirty_ratelimit); __entry->task_ratelimit = KBps(task_ratelimit); __entry->balanced_dirty_ratelimit = KBps(wb->balanced_dirty_ratelimit); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: " "write_bw=%lu awrite_bw=%lu dirty_rate=%lu " "dirty_ratelimit=%lu task_ratelimit=%lu " "balanced_dirty_ratelimit=%lu cgroup_ino=%lu", __entry->bdi, __entry->write_bw, /* write bandwidth */ __entry->avg_write_bw, /* avg write bandwidth */ __entry->dirty_rate, /* bdi dirty rate */ __entry->dirty_ratelimit, /* base ratelimit */ __entry->task_ratelimit, /* ratelimit with position control */ __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */ (unsigned long)__entry->cgroup_ino ) ); TRACE_EVENT(balance_dirty_pages, TP_PROTO(struct bdi_writeback *wb, unsigned long thresh, unsigned long bg_thresh, unsigned long dirty, unsigned long bdi_thresh, unsigned long bdi_dirty, unsigned long dirty_ratelimit, unsigned long task_ratelimit, unsigned long dirtied, unsigned long period, long pause, unsigned long start_time), TP_ARGS(wb, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, dirty_ratelimit, task_ratelimit, dirtied, period, pause, start_time), TP_STRUCT__entry( __array( char, bdi, 32) __field(unsigned long, limit) __field(unsigned long, setpoint) __field(unsigned long, dirty) __field(unsigned long, bdi_setpoint) __field(unsigned long, bdi_dirty) __field(unsigned long, dirty_ratelimit) __field(unsigned long, task_ratelimit) __field(unsigned int, dirtied) __field(unsigned int, dirtied_pause) __field(unsigned long, paused) __field( long, pause) __field(unsigned long, period) __field( long, think) __field(ino_t, cgroup_ino) ), TP_fast_assign( unsigned long freerun = (thresh + bg_thresh) / 2; strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + freerun) / 2; __entry->dirty = dirty; __entry->bdi_setpoint = __entry->setpoint * bdi_thresh / (thresh + 1); __entry->bdi_dirty = bdi_dirty; __entry->dirty_ratelimit = KBps(dirty_ratelimit); __entry->task_ratelimit = KBps(task_ratelimit); __entry->dirtied = dirtied; __entry->dirtied_pause = current->nr_dirtied_pause; __entry->think = current->dirty_paused_when == 0 ? 0 : (long)(jiffies - current->dirty_paused_when) * 1000/HZ; __entry->period = period * 1000 / HZ; __entry->pause = pause * 1000 / HZ; __entry->paused = (jiffies - start_time) * 1000 / HZ; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: " "limit=%lu setpoint=%lu dirty=%lu " "bdi_setpoint=%lu bdi_dirty=%lu " "dirty_ratelimit=%lu task_ratelimit=%lu " "dirtied=%u dirtied_pause=%u " "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%lu", __entry->bdi, __entry->limit, __entry->setpoint, __entry->dirty, __entry->bdi_setpoint, __entry->bdi_dirty, __entry->dirty_ratelimit, __entry->task_ratelimit, __entry->dirtied, __entry->dirtied_pause, __entry->paused, /* ms */ __entry->pause, /* ms */ __entry->period, /* ms */ __entry->think, /* ms */ (unsigned long)__entry->cgroup_ino ) ); TRACE_EVENT(writeback_sb_inodes_requeue, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, ino) __field(unsigned long, state) __field(unsigned long, dirtied_when) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; __entry->cgroup_ino = __trace_wb_assign_cgroup(inode_to_wb(inode)); ), TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%lu", __entry->name, (unsigned long)__entry->ino, show_inode_state(__entry->state), __entry->dirtied_when, (jiffies - __entry->dirtied_when) / HZ, (unsigned long)__entry->cgroup_ino ) ); DECLARE_EVENT_CLASS(writeback_single_inode_template, TP_PROTO(struct inode *inode, struct writeback_control *wbc, unsigned long nr_to_write ), TP_ARGS(inode, wbc, nr_to_write), TP_STRUCT__entry( __array(char, name, 32) __field(ino_t, ino) __field(unsigned long, state) __field(unsigned long, dirtied_when) __field(unsigned long, writeback_index) __field(long, nr_to_write) __field(unsigned long, wrote) __field(ino_t, cgroup_ino) ), TP_fast_assign( strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; __entry->writeback_index = inode->i_mapping->writeback_index; __entry->nr_to_write = nr_to_write; __entry->wrote = nr_to_write - wbc->nr_to_write; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); ), TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu " "index=%lu to_write=%ld wrote=%lu cgroup_ino=%lu", __entry->name, (unsigned long)__entry->ino, show_inode_state(__entry->state), __entry->dirtied_when, (jiffies - __entry->dirtied_when) / HZ, __entry->writeback_index, __entry->nr_to_write, __entry->wrote, (unsigned long)__entry->cgroup_ino ) ); DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_start, TP_PROTO(struct inode *inode, struct writeback_control *wbc, unsigned long nr_to_write), TP_ARGS(inode, wbc, nr_to_write) ); DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, TP_PROTO(struct inode *inode, struct writeback_control *wbc, unsigned long nr_to_write), TP_ARGS(inode, wbc, nr_to_write) ); DECLARE_EVENT_CLASS(writeback_inode_template, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field(unsigned long, state ) __field( __u16, mode ) __field(unsigned long, dirtied_when ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->mode = inode->i_mode; __entry->dirtied_when = inode->dirtied_when; ), TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long)__entry->ino, __entry->dirtied_when, show_inode_state(__entry->state), __entry->mode) ); DEFINE_EVENT(writeback_inode_template, writeback_lazytime, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(writeback_inode_template, writeback_lazytime_iput, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(writeback_inode_template, writeback_dirty_inode_enqueue, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); /* * Inode writeback list tracking. */ DEFINE_EVENT(writeback_inode_template, sb_mark_inode_writeback, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(writeback_inode_template, sb_clear_inode_writeback, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); #endif /* _TRACE_WRITEBACK_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
219 220 220 220 220 220 220 361 361 361 361 361 361 359 336 30 359 25 337 337 361 220 220 220 220 360 361 361 6518 6519 6517 63 321 6423 432 9 7 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 // SPDX-License-Identifier: GPL-2.0 /* * Workingset detection * * Copyright (C) 2013 Red Hat, Inc., Johannes Weiner */ #include <linux/memcontrol.h> #include <linux/mm_inline.h> #include <linux/writeback.h> #include <linux/shmem_fs.h> #include <linux/pagemap.h> #include <linux/atomic.h> #include <linux/module.h> #include <linux/swap.h> #include <linux/dax.h> #include <linux/fs.h> #include <linux/mm.h> #include "internal.h" /* * Double CLOCK lists * * Per node, two clock lists are maintained for file pages: the * inactive and the active list. Freshly faulted pages start out at * the head of the inactive list and page reclaim scans pages from the * tail. Pages that are accessed multiple times on the inactive list * are promoted to the active list, to protect them from reclaim, * whereas active pages are demoted to the inactive list when the * active list grows too big. * * fault ------------------------+ * | * +--------------+ | +-------------+ * reclaim <- | inactive | <-+-- demotion | active | <--+ * +--------------+ +-------------+ | * | | * +-------------- promotion ------------------+ * * * Access frequency and refault distance * * A workload is thrashing when its pages are frequently used but they * are evicted from the inactive list every time before another access * would have promoted them to the active list. * * In cases where the average access distance between thrashing pages * is bigger than the size of memory there is nothing that can be * done - the thrashing set could never fit into memory under any * circumstance. * * However, the average access distance could be bigger than the * inactive list, yet smaller than the size of memory. In this case, * the set could fit into memory if it weren't for the currently * active pages - which may be used more, hopefully less frequently: * * +-memory available to cache-+ * | | * +-inactive------+-active----+ * a b | c d e f g h i | J K L M N | * +---------------+-----------+ * * It is prohibitively expensive to accurately track access frequency * of pages. But a reasonable approximation can be made to measure * thrashing on the inactive list, after which refaulting pages can be * activated optimistically to compete with the existing active pages. * * Approximating inactive page access frequency - Observations: * * 1. When a page is accessed for the first time, it is added to the * head of the inactive list, slides every existing inactive page * towards the tail by one slot, and pushes the current tail page * out of memory. * * 2. When a page is accessed for the second time, it is promoted to * the active list, shrinking the inactive list by one slot. This * also slides all inactive pages that were faulted into the cache * more recently than the activated page towards the tail of the * inactive list. * * Thus: * * 1. The sum of evictions and activations between any two points in * time indicate the minimum number of inactive pages accessed in * between. * * 2. Moving one inactive page N page slots towards the tail of the * list requires at least N inactive page accesses. * * Combining these: * * 1. When a page is finally evicted from memory, the number of * inactive pages accessed while the page was in cache is at least * the number of page slots on the inactive list. * * 2. In addition, measuring the sum of evictions and activations (E) * at the time of a page's eviction, and comparing it to another * reading (R) at the time the page faults back into memory tells * the minimum number of accesses while the page was not cached. * This is called the refault distance. * * Because the first access of the page was the fault and the second * access the refault, we combine the in-cache distance with the * out-of-cache distance to get the complete minimum access distance * of this page: * * NR_inactive + (R - E) * * And knowing the minimum access distance of a page, we can easily * tell if the page would be able to stay in cache assuming all page * slots in the cache were available: * * NR_inactive + (R - E) <= NR_inactive + NR_active * * If we have swap we should consider about NR_inactive_anon and * NR_active_anon, so for page cache and anonymous respectively: * * NR_inactive_file + (R - E) <= NR_inactive_file + NR_active_file * + NR_inactive_anon + NR_active_anon * * NR_inactive_anon + (R - E) <= NR_inactive_anon + NR_active_anon * + NR_inactive_file + NR_active_file * * Which can be further simplified to: * * (R - E) <= NR_active_file + NR_inactive_anon + NR_active_anon * * (R - E) <= NR_active_anon + NR_inactive_file + NR_active_file * * Put into words, the refault distance (out-of-cache) can be seen as * a deficit in inactive list space (in-cache). If the inactive list * had (R - E) more page slots, the page would not have been evicted * in between accesses, but activated instead. And on a full system, * the only thing eating into inactive list space is active pages. * * * Refaulting inactive pages * * All that is known about the active list is that the pages have been * accessed more than once in the past. This means that at any given * time there is actually a good chance that pages on the active list * are no longer in active use. * * So when a refault distance of (R - E) is observed and there are at * least (R - E) pages in the userspace workingset, the refaulting page * is activated optimistically in the hope that (R - E) pages are actually * used less frequently than the refaulting page - or even not used at * all anymore. * * That means if inactive cache is refaulting with a suitable refault * distance, we assume the cache workingset is transitioning and put * pressure on the current workingset. * * If this is wrong and demotion kicks in, the pages which are truly * used more frequently will be reactivated while the less frequently * used once will be evicted from memory. * * But if this is right, the stale pages will be pushed out of memory * and the used pages get to stay in cache. * * Refaulting active pages * * If on the other hand the refaulting pages have recently been * deactivated, it means that the active list is no longer protecting * actively used cache from reclaim. The cache is NOT transitioning to * a different workingset; the existing workingset is thrashing in the * space allocated to the page cache. * * * Implementation * * For each node's LRU lists, a counter for inactive evictions and * activations is maintained (node->nonresident_age). * * On eviction, a snapshot of this counter (along with some bits to * identify the node) is stored in the now empty page cache * slot of the evicted page. This is called a shadow entry. * * On cache misses for which there are shadow entries, an eligible * refault distance will immediately activate the refaulting page. */ #define WORKINGSET_SHIFT 1 #define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \ WORKINGSET_SHIFT + NODES_SHIFT + \ MEM_CGROUP_ID_SHIFT) #define EVICTION_MASK (~0UL >> EVICTION_SHIFT) /* * Eviction timestamps need to be able to cover the full range of * actionable refaults. However, bits are tight in the xarray * entry, and after storing the identifier for the lruvec there might * not be enough left to represent every single actionable refault. In * that case, we have to sacrifice granularity for distance, and group * evictions into coarser buckets by shaving off lower timestamp bits. */ static unsigned int bucket_order __read_mostly; static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction, bool workingset) { eviction &= EVICTION_MASK; eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; eviction = (eviction << NODES_SHIFT) | pgdat->node_id; eviction = (eviction << WORKINGSET_SHIFT) | workingset; return xa_mk_value(eviction); } static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, unsigned long *evictionp, bool *workingsetp) { unsigned long entry = xa_to_value(shadow); int memcgid, nid; bool workingset; workingset = entry & ((1UL << WORKINGSET_SHIFT) - 1); entry >>= WORKINGSET_SHIFT; nid = entry & ((1UL << NODES_SHIFT) - 1); entry >>= NODES_SHIFT; memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1); entry >>= MEM_CGROUP_ID_SHIFT; *memcgidp = memcgid; *pgdat = NODE_DATA(nid); *evictionp = entry; *workingsetp = workingset; } #ifdef CONFIG_LRU_GEN static void *lru_gen_eviction(struct folio *folio) { int hist; unsigned long token; unsigned long min_seq; struct lruvec *lruvec; struct lru_gen_folio *lrugen; int type = folio_is_file_lru(folio); int delta = folio_nr_pages(folio); int refs = folio_lru_refs(folio); int tier = lru_tier_from_refs(refs); struct mem_cgroup *memcg = folio_memcg(folio); struct pglist_data *pgdat = folio_pgdat(folio); BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT); lruvec = mem_cgroup_lruvec(memcg, pgdat); lrugen = &lruvec->lrugen; min_seq = READ_ONCE(lrugen->min_seq[type]); token = (min_seq << LRU_REFS_WIDTH) | max(refs - 1, 0); hist = lru_hist_from_seq(min_seq); atomic_long_add(delta, &lrugen->evicted[hist][type][tier]); return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs); } /* * Tests if the shadow entry is for a folio that was recently evicted. * Fills in @lruvec, @token, @workingset with the values unpacked from shadow. */ static bool lru_gen_test_recent(void *shadow, bool file, struct lruvec **lruvec, unsigned long *token, bool *workingset) { int memcg_id; unsigned long min_seq; struct mem_cgroup *memcg; struct pglist_data *pgdat; unpack_shadow(shadow, &memcg_id, &pgdat, token, workingset); memcg = mem_cgroup_from_id(memcg_id); *lruvec = mem_cgroup_lruvec(memcg, pgdat); min_seq = READ_ONCE((*lruvec)->lrugen.min_seq[file]); return (*token >> LRU_REFS_WIDTH) == (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)); } static void lru_gen_refault(struct folio *folio, void *shadow) { bool recent; int hist, tier, refs; bool workingset; unsigned long token; struct lruvec *lruvec; struct lru_gen_folio *lrugen; int type = folio_is_file_lru(folio); int delta = folio_nr_pages(folio); rcu_read_lock(); recent = lru_gen_test_recent(shadow, type, &lruvec, &token, &workingset); if (lruvec != folio_lruvec(folio)) goto unlock; mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type, delta); if (!recent) goto unlock; lrugen = &lruvec->lrugen; hist = lru_hist_from_seq(READ_ONCE(lrugen->min_seq[type])); /* see the comment in folio_lru_refs() */ refs = (token & (BIT(LRU_REFS_WIDTH) - 1)) + workingset; tier = lru_tier_from_refs(refs); atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]); mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta); /* * Count the following two cases as stalls: * 1. For pages accessed through page tables, hotter pages pushed out * hot pages which refaulted immediately. * 2. For pages accessed multiple times through file descriptors, * they would have been protected by sort_folio(). */ if (lru_gen_in_fault() || refs >= BIT(LRU_REFS_WIDTH) - 1) { set_mask_bits(&folio->flags, 0, LRU_REFS_MASK | BIT(PG_workingset)); mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta); } unlock: rcu_read_unlock(); } #else /* !CONFIG_LRU_GEN */ static void *lru_gen_eviction(struct folio *folio) { return NULL; } static bool lru_gen_test_recent(void *shadow, bool file, struct lruvec **lruvec, unsigned long *token, bool *workingset) { return false; } static void lru_gen_refault(struct folio *folio, void *shadow) { } #endif /* CONFIG_LRU_GEN */ /** * workingset_age_nonresident - age non-resident entries as LRU ages * @lruvec: the lruvec that was aged * @nr_pages: the number of pages to count * * As in-memory pages are aged, non-resident pages need to be aged as * well, in order for the refault distances later on to be comparable * to the in-memory dimensions. This function allows reclaim and LRU * operations to drive the non-resident aging along in parallel. */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages) { /* * Reclaiming a cgroup means reclaiming all its children in a * round-robin fashion. That means that each cgroup has an LRU * order that is composed of the LRU orders of its child * cgroups; and every page has an LRU position not just in the * cgroup that owns it, but in all of that group's ancestors. * * So when the physical inactive list of a leaf cgroup ages, * the virtual inactive lists of all its parents, including * the root cgroup's, age as well. */ do { atomic_long_add(nr_pages, &lruvec->nonresident_age); } while ((lruvec = parent_lruvec(lruvec))); } /** * workingset_eviction - note the eviction of a folio from memory * @target_memcg: the cgroup that is causing the reclaim * @folio: the folio being evicted * * Return: a shadow entry to be stored in @folio->mapping->i_pages in place * of the evicted @folio so that a later refault can be detected. */ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg) { struct pglist_data *pgdat = folio_pgdat(folio); unsigned long eviction; struct lruvec *lruvec; int memcgid; /* Folio is fully exclusive and pins folio's memory cgroup pointer */ VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (lru_gen_enabled()) return lru_gen_eviction(folio); lruvec = mem_cgroup_lruvec(target_memcg, pgdat); /* XXX: target_memcg can be NULL, go through lruvec */ memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); eviction = atomic_long_read(&lruvec->nonresident_age); eviction >>= bucket_order; workingset_age_nonresident(lruvec, folio_nr_pages(folio)); return pack_shadow(memcgid, pgdat, eviction, folio_test_workingset(folio)); } /** * workingset_test_recent - tests if the shadow entry is for a folio that was * recently evicted. Also fills in @workingset with the value unpacked from * shadow. * @shadow: the shadow entry to be tested. * @file: whether the corresponding folio is from the file lru. * @workingset: where the workingset value unpacked from shadow should * be stored. * @flush: whether to flush cgroup rstat. * * Return: true if the shadow is for a recently evicted folio; false otherwise. */ bool workingset_test_recent(void *shadow, bool file, bool *workingset, bool flush) { struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; unsigned long refault_distance; unsigned long workingset_size; unsigned long refault; int memcgid; struct pglist_data *pgdat; unsigned long eviction; rcu_read_lock(); if (lru_gen_enabled()) { bool recent = lru_gen_test_recent(shadow, file, &eviction_lruvec, &eviction, workingset); rcu_read_unlock(); return recent; } unpack_shadow(shadow, &memcgid, &pgdat, &eviction, workingset); eviction <<= bucket_order; /* * Look up the memcg associated with the stored ID. It might * have been deleted since the folio's eviction. * * Note that in rare events the ID could have been recycled * for a new cgroup that refaults a shared folio. This is * impossible to tell from the available data. However, this * should be a rare and limited disturbance, and activations * are always speculative anyway. Ultimately, it's the aging * algorithm's job to shake out the minimum access frequency * for the active cache. * * XXX: On !CONFIG_MEMCG, this will always return NULL; it * would be better if the root_mem_cgroup existed in all * configurations instead. */ eviction_memcg = mem_cgroup_from_id(memcgid); if (!mem_cgroup_disabled() && (!eviction_memcg || !mem_cgroup_tryget(eviction_memcg))) { rcu_read_unlock(); return false; } rcu_read_unlock(); /* * Flush stats (and potentially sleep) outside the RCU read section. * * Note that workingset_test_recent() itself might be called in RCU read * section (for e.g, in cachestat) - these callers need to skip flushing * stats (via the flush argument). * * XXX: With per-memcg flushing and thresholding, is ratelimiting * still needed here? */ if (flush) mem_cgroup_flush_stats_ratelimited(eviction_memcg); eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); refault = atomic_long_read(&eviction_lruvec->nonresident_age); /* * Calculate the refault distance * * The unsigned subtraction here gives an accurate distance * across nonresident_age overflows in most cases. There is a * special case: usually, shadow entries have a short lifetime * and are either refaulted or reclaimed along with the inode * before they get too old. But it is not impossible for the * nonresident_age to lap a shadow entry in the field, which * can then result in a false small refault distance, leading * to a false activation should this old entry actually * refault again. However, earlier kernels used to deactivate * unconditionally with *every* reclaim invocation for the * longest time, so the occasional inappropriate activation * leading to pressure on the active list is not a problem. */ refault_distance = (refault - eviction) & EVICTION_MASK; /* * Compare the distance to the existing workingset size. We * don't activate pages that couldn't stay resident even if * all the memory was available to the workingset. Whether * workingset competition needs to consider anon or not depends * on having free swap space. */ workingset_size = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE); if (!file) { workingset_size += lruvec_page_state(eviction_lruvec, NR_INACTIVE_FILE); } if (mem_cgroup_get_nr_swap_pages(eviction_memcg) > 0) { workingset_size += lruvec_page_state(eviction_lruvec, NR_ACTIVE_ANON); if (file) { workingset_size += lruvec_page_state(eviction_lruvec, NR_INACTIVE_ANON); } } mem_cgroup_put(eviction_memcg); return refault_distance <= workingset_size; } /** * workingset_refault - Evaluate the refault of a previously evicted folio. * @folio: The freshly allocated replacement folio. * @shadow: Shadow entry of the evicted folio. * * Calculates and evaluates the refault distance of the previously * evicted folio in the context of the node and the memcg whose memory * pressure caused the eviction. */ void workingset_refault(struct folio *folio, void *shadow) { bool file = folio_is_file_lru(folio); struct pglist_data *pgdat; struct mem_cgroup *memcg; struct lruvec *lruvec; bool workingset; long nr; if (lru_gen_enabled()) { lru_gen_refault(folio, shadow); return; } /* * The activation decision for this folio is made at the level * where the eviction occurred, as that is where the LRU order * during folio reclaim is being determined. * * However, the cgroup that will own the folio is the one that * is actually experiencing the refault event. Make sure the folio is * locked to guarantee folio_memcg() stability throughout. */ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); nr = folio_nr_pages(folio); memcg = folio_memcg(folio); pgdat = folio_pgdat(folio); lruvec = mem_cgroup_lruvec(memcg, pgdat); mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr); if (!workingset_test_recent(shadow, file, &workingset, true)) return; folio_set_active(folio); workingset_age_nonresident(lruvec, nr); mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file, nr); /* Folio was active prior to eviction */ if (workingset) { folio_set_workingset(folio); /* * XXX: Move to folio_add_lru() when it supports new vs * putback */ lru_note_cost_refault(folio); mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr); } } /** * workingset_activation - note a page activation * @folio: Folio that is being activated. */ void workingset_activation(struct folio *folio) { struct mem_cgroup *memcg; rcu_read_lock(); /* * Filter non-memcg pages here, e.g. unmap can call * mark_page_accessed() on VDSO pages. * * XXX: See workingset_refault() - this should return * root_mem_cgroup even for !CONFIG_MEMCG. */ memcg = folio_memcg_rcu(folio); if (!mem_cgroup_disabled() && !memcg) goto out; workingset_age_nonresident(folio_lruvec(folio), folio_nr_pages(folio)); out: rcu_read_unlock(); } /* * Shadow entries reflect the share of the working set that does not * fit into memory, so their number depends on the access pattern of * the workload. In most cases, they will refault or get reclaimed * along with the inode, but a (malicious) workload that streams * through files with a total size several times that of available * memory, while preventing the inodes from being reclaimed, can * create excessive amounts of shadow nodes. To keep a lid on this, * track shadow nodes and reclaim them when they grow way past the * point where they would still be useful. */ struct list_lru shadow_nodes; void workingset_update_node(struct xa_node *node) { struct address_space *mapping; struct page *page = virt_to_page(node); /* * Track non-empty nodes that contain only shadow entries; * unlink those that contain pages or are being freed. * * Avoid acquiring the list_lru lock when the nodes are * already where they should be. The list_empty() test is safe * as node->private_list is protected by the i_pages lock. */ mapping = container_of(node->array, struct address_space, i_pages); lockdep_assert_held(&mapping->i_pages.xa_lock); if (node->count && node->count == node->nr_values) { if (list_empty(&node->private_list)) { list_lru_add_obj(&shadow_nodes, &node->private_list); __inc_node_page_state(page, WORKINGSET_NODES); } } else { if (!list_empty(&node->private_list)) { list_lru_del_obj(&shadow_nodes, &node->private_list); __dec_node_page_state(page, WORKINGSET_NODES); } } } static unsigned long count_shadow_nodes(struct shrinker *shrinker, struct shrink_control *sc) { unsigned long max_nodes; unsigned long nodes; unsigned long pages; nodes = list_lru_shrink_count(&shadow_nodes, sc); if (!nodes) return SHRINK_EMPTY; /* * Approximate a reasonable limit for the nodes * containing shadow entries. We don't need to keep more * shadow entries than possible pages on the active list, * since refault distances bigger than that are dismissed. * * The size of the active list converges toward 100% of * overall page cache as memory grows, with only a tiny * inactive list. Assume the total cache size for that. * * Nodes might be sparsely populated, with only one shadow * entry in the extreme case. Obviously, we cannot keep one * node for every eligible shadow entry, so compromise on a * worst-case density of 1/8th. Below that, not all eligible * refaults can be detected anymore. * * On 64-bit with 7 xa_nodes per page and 64 slots * each, this will reclaim shadow entries when they consume * ~1.8% of available memory: * * PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE */ #ifdef CONFIG_MEMCG if (sc->memcg) { struct lruvec *lruvec; int i; mem_cgroup_flush_stats_ratelimited(sc->memcg); lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid)); for (pages = 0, i = 0; i < NR_LRU_LISTS; i++) pages += lruvec_page_state_local(lruvec, NR_LRU_BASE + i); pages += lruvec_page_state_local( lruvec, NR_SLAB_RECLAIMABLE_B) >> PAGE_SHIFT; pages += lruvec_page_state_local( lruvec, NR_SLAB_UNRECLAIMABLE_B) >> PAGE_SHIFT; } else #endif pages = node_present_pages(sc->nid); max_nodes = pages >> (XA_CHUNK_SHIFT - 3); if (nodes <= max_nodes) return 0; return nodes - max_nodes; } static enum lru_status shadow_lru_isolate(struct list_head *item, struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) __must_hold(lru_lock) { struct xa_node *node = container_of(item, struct xa_node, private_list); struct address_space *mapping; int ret; /* * Page cache insertions and deletions synchronously maintain * the shadow node LRU under the i_pages lock and the * lru_lock. Because the page cache tree is emptied before * the inode can be destroyed, holding the lru_lock pins any * address_space that has nodes on the LRU. * * We can then safely transition to the i_pages lock to * pin only the address_space of the particular node we want * to reclaim, take the node off-LRU, and drop the lru_lock. */ mapping = container_of(node->array, struct address_space, i_pages); /* Coming from the list, invert the lock order */ if (!xa_trylock(&mapping->i_pages)) { spin_unlock_irq(lru_lock); ret = LRU_RETRY; goto out; } /* For page cache we need to hold i_lock */ if (mapping->host != NULL) { if (!spin_trylock(&mapping->host->i_lock)) { xa_unlock(&mapping->i_pages); spin_unlock_irq(lru_lock); ret = LRU_RETRY; goto out; } } list_lru_isolate(lru, item); __dec_node_page_state(virt_to_page(node), WORKINGSET_NODES); spin_unlock(lru_lock); /* * The nodes should only contain one or more shadow entries, * no pages, so we expect to be able to remove them all and * delete and free the empty node afterwards. */ if (WARN_ON_ONCE(!node->nr_values)) goto out_invalid; if (WARN_ON_ONCE(node->count != node->nr_values)) goto out_invalid; xa_delete_node(node, workingset_update_node); __inc_lruvec_kmem_state(node, WORKINGSET_NODERECLAIM); out_invalid: xa_unlock_irq(&mapping->i_pages); if (mapping->host != NULL) { if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); } ret = LRU_REMOVED_RETRY; out: cond_resched(); spin_lock_irq(lru_lock); return ret; } static unsigned long scan_shadow_nodes(struct shrinker *shrinker, struct shrink_control *sc) { /* list_lru lock nests inside the IRQ-safe i_pages lock */ return list_lru_shrink_walk_irq(&shadow_nodes, sc, shadow_lru_isolate, NULL); } /* * Our list_lru->lock is IRQ-safe as it nests inside the IRQ-safe * i_pages lock. */ static struct lock_class_key shadow_nodes_key; static int __init workingset_init(void) { struct shrinker *workingset_shadow_shrinker; unsigned int timestamp_bits; unsigned int max_order; int ret = -ENOMEM; BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT); /* * Calculate the eviction bucket size to cover the longest * actionable refault distance, which is currently half of * memory (totalram_pages/2). However, memory hotplug may add * some more pages at runtime, so keep working with up to * double the initial memory by using totalram_pages as-is. */ timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT; max_order = fls_long(totalram_pages() - 1); if (max_order > timestamp_bits) bucket_order = max_order - timestamp_bits; pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", timestamp_bits, max_order, bucket_order); workingset_shadow_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-shadow"); if (!workingset_shadow_shrinker) goto err; ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key, workingset_shadow_shrinker); if (ret) goto err_list_lru; workingset_shadow_shrinker->count_objects = count_shadow_nodes; workingset_shadow_shrinker->scan_objects = scan_shadow_nodes; /* ->count reports only fully expendable nodes */ workingset_shadow_shrinker->seeks = 0; shrinker_register(workingset_shadow_shrinker); return 0; err_list_lru: shrinker_free(workingset_shadow_shrinker); err: return ret; } module_init(workingset_init);
3 44 57 148 1 41 2 8 9 41 41 14 30 15 15 41 41 17 5 2 3 7 3 4 2 3 2 3 1 2 2 8 3 3 3 3 6 153 6 149 51 51 96 2 11 26 26 61 51 44 44 55 55 2 55 1 55 61 51 61 21 61 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 // SPDX-License-Identifier: GPL-2.0 /* * fs/partitions/msdos.c * * Code extracted from drivers/block/genhd.c * Copyright (C) 1991-1998 Linus Torvalds * * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug * in the early extended-partition checks and added DM partitions * * Support for DiskManager v6.0x added by Mark Lord, * with information provided by OnTrack. This now works for linux fdisk * and LILO, as well as loadlin and bootln. Note that disks other than * /dev/hda *must* have a "DOS" type 0x51 partition in the first slot (hda1). * * More flexible handling of extended partitions - aeb, 950831 * * Check partition table on IDE disks for common CHS translations * * Re-organised Feb 1998 Russell King * * BSD disklabel support by Yossi Gottlieb <yogo@math.tau.ac.il> * updated by Marc Espie <Marc.Espie@openbsd.org> * * Unixware slices support by Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl> * and Krzysztof G. Baranowski <kgb@knm.org.pl> */ #include <linux/msdos_fs.h> #include <linux/msdos_partition.h> #include "check.h" #include "efi.h" /* * Many architectures don't like unaligned accesses, while * the nr_sects and start_sect partition table entries are * at a 2 (mod 4) address. */ #include <asm/unaligned.h> static inline sector_t nr_sects(struct msdos_partition *p) { return (sector_t)get_unaligned_le32(&p->nr_sects); } static inline sector_t start_sect(struct msdos_partition *p) { return (sector_t)get_unaligned_le32(&p->start_sect); } static inline int is_extended_partition(struct msdos_partition *p) { return (p->sys_ind == DOS_EXTENDED_PARTITION || p->sys_ind == WIN98_EXTENDED_PARTITION || p->sys_ind == LINUX_EXTENDED_PARTITION); } #define MSDOS_LABEL_MAGIC1 0x55 #define MSDOS_LABEL_MAGIC2 0xAA static inline int msdos_magic_present(unsigned char *p) { return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2); } /* Value is EBCDIC 'IBMA' */ #define AIX_LABEL_MAGIC1 0xC9 #define AIX_LABEL_MAGIC2 0xC2 #define AIX_LABEL_MAGIC3 0xD4 #define AIX_LABEL_MAGIC4 0xC1 static int aix_magic_present(struct parsed_partitions *state, unsigned char *p) { struct msdos_partition *pt = (struct msdos_partition *) (p + 0x1be); Sector sect; unsigned char *d; int slot, ret = 0; if (!(p[0] == AIX_LABEL_MAGIC1 && p[1] == AIX_LABEL_MAGIC2 && p[2] == AIX_LABEL_MAGIC3 && p[3] == AIX_LABEL_MAGIC4)) return 0; /* * Assume the partition table is valid if Linux partitions exists. * Note that old Solaris/x86 partitions use the same indicator as * Linux swap partitions, so we consider that a Linux partition as * well. */ for (slot = 1; slot <= 4; slot++, pt++) { if (pt->sys_ind == SOLARIS_X86_PARTITION || pt->sys_ind == LINUX_RAID_PARTITION || pt->sys_ind == LINUX_DATA_PARTITION || pt->sys_ind == LINUX_LVM_PARTITION || is_extended_partition(pt)) return 0; } d = read_part_sector(state, 7, &sect); if (d) { if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') ret = 1; put_dev_sector(sect); } return ret; } static void set_info(struct parsed_partitions *state, int slot, u32 disksig) { struct partition_meta_info *info = &state->parts[slot].info; snprintf(info->uuid, sizeof(info->uuid), "%08x-%02x", disksig, slot); info->volname[0] = 0; state->parts[slot].has_info = true; } /* * Create devices for each logical partition in an extended partition. * The logical partitions form a linked list, with each entry being * a partition table with two entries. The first entry * is the real data partition (with a start relative to the partition * table start). The second is a pointer to the next logical partition * (with a start relative to the entire extended partition). * We do not create a Linux partition for the partition tables, but * only for the actual data partitions. */ static void parse_extended(struct parsed_partitions *state, sector_t first_sector, sector_t first_size, u32 disksig) { struct msdos_partition *p; Sector sect; unsigned char *data; sector_t this_sector, this_size; sector_t sector_size; int loopct = 0; /* number of links followed without finding a data partition */ int i; sector_size = queue_logical_block_size(state->disk->queue) / 512; this_sector = first_sector; this_size = first_size; while (1) { if (++loopct > 100) return; if (state->next == state->limit) return; data = read_part_sector(state, this_sector, &sect); if (!data) return; if (!msdos_magic_present(data + 510)) goto done; p = (struct msdos_partition *) (data + 0x1be); /* * Usually, the first entry is the real data partition, * the 2nd entry is the next extended partition, or empty, * and the 3rd and 4th entries are unused. * However, DRDOS sometimes has the extended partition as * the first entry (when the data partition is empty), * and OS/2 seems to use all four entries. */ /* * First process the data partition(s) */ for (i = 0; i < 4; i++, p++) { sector_t offs, size, next; if (!nr_sects(p) || is_extended_partition(p)) continue; /* Check the 3rd and 4th entries - these sometimes contain random garbage */ offs = start_sect(p)*sector_size; size = nr_sects(p)*sector_size; next = this_sector + offs; if (i >= 2) { if (offs + size > this_size) continue; if (next < first_sector) continue; if (next + size > first_sector + first_size) continue; } put_partition(state, state->next, next, size); set_info(state, state->next, disksig); if (p->sys_ind == LINUX_RAID_PARTITION) state->parts[state->next].flags = ADDPART_FLAG_RAID; loopct = 0; if (++state->next == state->limit) goto done; } /* * Next, process the (first) extended partition, if present. * (So far, there seems to be no reason to make * parse_extended() recursive and allow a tree * of extended partitions.) * It should be a link to the next logical partition. */ p -= 4; for (i = 0; i < 4; i++, p++) if (nr_sects(p) && is_extended_partition(p)) break; if (i == 4) goto done; /* nothing left to do */ this_sector = first_sector + start_sect(p) * sector_size; this_size = nr_sects(p) * sector_size; put_dev_sector(sect); } done: put_dev_sector(sect); } #define SOLARIS_X86_NUMSLICE 16 #define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL) struct solaris_x86_slice { __le16 s_tag; /* ID tag of partition */ __le16 s_flag; /* permission flags */ __le32 s_start; /* start sector no of partition */ __le32 s_size; /* # of blocks in partition */ }; struct solaris_x86_vtoc { unsigned int v_bootinfo[3]; /* info needed by mboot */ __le32 v_sanity; /* to verify vtoc sanity */ __le32 v_version; /* layout version */ char v_volume[8]; /* volume name */ __le16 v_sectorsz; /* sector size in bytes */ __le16 v_nparts; /* number of partitions */ unsigned int v_reserved[10]; /* free space */ struct solaris_x86_slice v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */ unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp */ char v_asciilabel[128]; /* for compatibility */ }; /* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also indicates linux swap. Be careful before believing this is Solaris. */ static void parse_solaris_x86(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_SOLARIS_X86_PARTITION Sector sect; struct solaris_x86_vtoc *v; int i; short max_nparts; v = read_part_sector(state, offset + 1, &sect); if (!v) return; if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) { put_dev_sector(sect); return; } { char tmp[1 + BDEVNAME_SIZE + 10 + 11 + 1]; snprintf(tmp, sizeof(tmp), " %s%d: <solaris:", state->name, origin); strlcat(state->pp_buf, tmp, PAGE_SIZE); } if (le32_to_cpu(v->v_version) != 1) { char tmp[64]; snprintf(tmp, sizeof(tmp), " cannot handle version %d vtoc>\n", le32_to_cpu(v->v_version)); strlcat(state->pp_buf, tmp, PAGE_SIZE); put_dev_sector(sect); return; } /* Ensure we can handle previous case of VTOC with 8 entries gracefully */ max_nparts = le16_to_cpu(v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; for (i = 0; i < max_nparts && state->next < state->limit; i++) { struct solaris_x86_slice *s = &v->v_slice[i]; char tmp[3 + 10 + 1 + 1]; if (s->s_size == 0) continue; snprintf(tmp, sizeof(tmp), " [s%d]", i); strlcat(state->pp_buf, tmp, PAGE_SIZE); /* solaris partitions are relative to current MS-DOS * one; must add the offset of the current partition */ put_partition(state, state->next++, le32_to_cpu(s->s_start)+offset, le32_to_cpu(s->s_size)); } put_dev_sector(sect); strlcat(state->pp_buf, " >\n", PAGE_SIZE); #endif } /* check against BSD src/sys/sys/disklabel.h for consistency */ #define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */ #define BSD_MAXPARTITIONS 16 #define OPENBSD_MAXPARTITIONS 16 #define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */ struct bsd_disklabel { __le32 d_magic; /* the magic number */ __s16 d_type; /* drive type */ __s16 d_subtype; /* controller/d_type specific */ char d_typename[16]; /* type name, e.g. "eagle" */ char d_packname[16]; /* pack identifier */ __u32 d_secsize; /* # of bytes per sector */ __u32 d_nsectors; /* # of data sectors per track */ __u32 d_ntracks; /* # of tracks per cylinder */ __u32 d_ncylinders; /* # of data cylinders per unit */ __u32 d_secpercyl; /* # of data sectors per cylinder */ __u32 d_secperunit; /* # of data sectors per unit */ __u16 d_sparespertrack; /* # of spare sectors per track */ __u16 d_sparespercyl; /* # of spare sectors per cylinder */ __u32 d_acylinders; /* # of alt. cylinders per unit */ __u16 d_rpm; /* rotational speed */ __u16 d_interleave; /* hardware sector interleave */ __u16 d_trackskew; /* sector 0 skew, per track */ __u16 d_cylskew; /* sector 0 skew, per cylinder */ __u32 d_headswitch; /* head switch time, usec */ __u32 d_trkseek; /* track-to-track seek, usec */ __u32 d_flags; /* generic flags */ #define NDDATA 5 __u32 d_drivedata[NDDATA]; /* drive-type specific information */ #define NSPARE 5 __u32 d_spare[NSPARE]; /* reserved for future use */ __le32 d_magic2; /* the magic number (again) */ __le16 d_checksum; /* xor of data incl. partitions */ /* filesystem and partition information: */ __le16 d_npartitions; /* number of partitions in following */ __le32 d_bbsize; /* size of boot area at sn0, bytes */ __le32 d_sbsize; /* max size of fs superblock, bytes */ struct bsd_partition { /* the partition table */ __le32 p_size; /* number of sectors in partition */ __le32 p_offset; /* starting sector */ __le32 p_fsize; /* filesystem basic fragment size */ __u8 p_fstype; /* filesystem type, see below */ __u8 p_frag; /* filesystem fragments per block */ __le16 p_cpg; /* filesystem cylinders per group */ } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */ }; #if defined(CONFIG_BSD_DISKLABEL) /* * Create devices for BSD partitions listed in a disklabel, under a * dos-like partition. See parse_extended() for more information. */ static void parse_bsd(struct parsed_partitions *state, sector_t offset, sector_t size, int origin, char *flavour, int max_partitions) { Sector sect; struct bsd_disklabel *l; struct bsd_partition *p; char tmp[64]; l = read_part_sector(state, offset + 1, &sect); if (!l) return; if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) { put_dev_sector(sect); return; } snprintf(tmp, sizeof(tmp), " %s%d: <%s:", state->name, origin, flavour); strlcat(state->pp_buf, tmp, PAGE_SIZE); if (le16_to_cpu(l->d_npartitions) < max_partitions) max_partitions = le16_to_cpu(l->d_npartitions); for (p = l->d_partitions; p - l->d_partitions < max_partitions; p++) { sector_t bsd_start, bsd_size; if (state->next == state->limit) break; if (p->p_fstype == BSD_FS_UNUSED) continue; bsd_start = le32_to_cpu(p->p_offset); bsd_size = le32_to_cpu(p->p_size); /* FreeBSD has relative offset if C partition offset is zero */ if (memcmp(flavour, "bsd\0", 4) == 0 && le32_to_cpu(l->d_partitions[2].p_offset) == 0) bsd_start += offset; if (offset == bsd_start && size == bsd_size) /* full parent partition, we have it already */ continue; if (offset > bsd_start || offset+size < bsd_start+bsd_size) { strlcat(state->pp_buf, "bad subpartition - ignored\n", PAGE_SIZE); continue; } put_partition(state, state->next++, bsd_start, bsd_size); } put_dev_sector(sect); if (le16_to_cpu(l->d_npartitions) > max_partitions) { snprintf(tmp, sizeof(tmp), " (ignored %d more)", le16_to_cpu(l->d_npartitions) - max_partitions); strlcat(state->pp_buf, tmp, PAGE_SIZE); } strlcat(state->pp_buf, " >\n", PAGE_SIZE); } #endif static void parse_freebsd(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL parse_bsd(state, offset, size, origin, "bsd", BSD_MAXPARTITIONS); #endif } static void parse_netbsd(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL parse_bsd(state, offset, size, origin, "netbsd", BSD_MAXPARTITIONS); #endif } static void parse_openbsd(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL parse_bsd(state, offset, size, origin, "openbsd", OPENBSD_MAXPARTITIONS); #endif } #define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */ #define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */ #define UNIXWARE_NUMSLICE 16 #define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */ struct unixware_slice { __le16 s_label; /* label */ __le16 s_flags; /* permission flags */ __le32 start_sect; /* starting sector */ __le32 nr_sects; /* number of sectors in slice */ }; struct unixware_disklabel { __le32 d_type; /* drive type */ __le32 d_magic; /* the magic number */ __le32 d_version; /* version number */ char d_serial[12]; /* serial number of the device */ __le32 d_ncylinders; /* # of data cylinders per device */ __le32 d_ntracks; /* # of tracks per cylinder */ __le32 d_nsectors; /* # of data sectors per track */ __le32 d_secsize; /* # of bytes per sector */ __le32 d_part_start; /* # of first sector of this partition*/ __le32 d_unknown1[12]; /* ? */ __le32 d_alt_tbl; /* byte offset of alternate table */ __le32 d_alt_len; /* byte length of alternate table */ __le32 d_phys_cyl; /* # of physical cylinders per device */ __le32 d_phys_trk; /* # of physical tracks per cylinder */ __le32 d_phys_sec; /* # of physical sectors per track */ __le32 d_phys_bytes; /* # of physical bytes per sector */ __le32 d_unknown2; /* ? */ __le32 d_unknown3; /* ? */ __le32 d_pad[8]; /* pad */ struct unixware_vtoc { __le32 v_magic; /* the magic number */ __le32 v_version; /* version number */ char v_name[8]; /* volume name */ __le16 v_nslices; /* # of slices */ __le16 v_unknown1; /* ? */ __le32 v_reserved[10]; /* reserved */ struct unixware_slice v_slice[UNIXWARE_NUMSLICE]; /* slice headers */ } vtoc; }; /* 408 */ /* * Create devices for Unixware partitions listed in a disklabel, under a * dos-like partition. See parse_extended() for more information. */ static void parse_unixware(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_UNIXWARE_DISKLABEL Sector sect; struct unixware_disklabel *l; struct unixware_slice *p; l = read_part_sector(state, offset + 29, &sect); if (!l) return; if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC || le32_to_cpu(l->vtoc.v_magic) != UNIXWARE_DISKMAGIC2) { put_dev_sector(sect); return; } { char tmp[1 + BDEVNAME_SIZE + 10 + 12 + 1]; snprintf(tmp, sizeof(tmp), " %s%d: <unixware:", state->name, origin); strlcat(state->pp_buf, tmp, PAGE_SIZE); } p = &l->vtoc.v_slice[1]; /* I omit the 0th slice as it is the same as whole disk. */ while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { if (state->next == state->limit) break; if (p->s_label != UNIXWARE_FS_UNUSED) put_partition(state, state->next++, le32_to_cpu(p->start_sect), le32_to_cpu(p->nr_sects)); p++; } put_dev_sector(sect); strlcat(state->pp_buf, " >\n", PAGE_SIZE); #endif } #define MINIX_NR_SUBPARTITIONS 4 /* * Minix 2.0.0/2.0.2 subpartition support. * Anand Krishnamurthy <anandk@wiproge.med.ge.com> * Rajeev V. Pillai <rajeevvp@yahoo.com> */ static void parse_minix(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_MINIX_SUBPARTITION Sector sect; unsigned char *data; struct msdos_partition *p; int i; data = read_part_sector(state, offset, &sect); if (!data) return; p = (struct msdos_partition *)(data + 0x1be); /* The first sector of a Minix partition can have either * a secondary MBR describing its subpartitions, or * the normal boot sector. */ if (msdos_magic_present(data + 510) && p->sys_ind == MINIX_PARTITION) { /* subpartition table present */ char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1]; snprintf(tmp, sizeof(tmp), " %s%d: <minix:", state->name, origin); strlcat(state->pp_buf, tmp, PAGE_SIZE); for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) { if (state->next == state->limit) break; /* add each partition in use */ if (p->sys_ind == MINIX_PARTITION) put_partition(state, state->next++, start_sect(p), nr_sects(p)); } strlcat(state->pp_buf, " >\n", PAGE_SIZE); } put_dev_sector(sect); #endif /* CONFIG_MINIX_SUBPARTITION */ } static struct { unsigned char id; void (*parse)(struct parsed_partitions *, sector_t, sector_t, int); } subtypes[] = { {FREEBSD_PARTITION, parse_freebsd}, {NETBSD_PARTITION, parse_netbsd}, {OPENBSD_PARTITION, parse_openbsd}, {MINIX_PARTITION, parse_minix}, {UNIXWARE_PARTITION, parse_unixware}, {SOLARIS_X86_PARTITION, parse_solaris_x86}, {NEW_SOLARIS_X86_PARTITION, parse_solaris_x86}, {0, NULL}, }; int msdos_partition(struct parsed_partitions *state) { sector_t sector_size; Sector sect; unsigned char *data; struct msdos_partition *p; struct fat_boot_sector *fb; int slot; u32 disksig; sector_size = queue_logical_block_size(state->disk->queue) / 512; data = read_part_sector(state, 0, &sect); if (!data) return -1; /* * Note order! (some AIX disks, e.g. unbootable kind, * have no MSDOS 55aa) */ if (aix_magic_present(state, data)) { put_dev_sector(sect); #ifdef CONFIG_AIX_PARTITION return aix_partition(state); #else strlcat(state->pp_buf, " [AIX]", PAGE_SIZE); return 0; #endif } if (!msdos_magic_present(data + 510)) { put_dev_sector(sect); return 0; } /* * Now that the 55aa signature is present, this is probably * either the boot sector of a FAT filesystem or a DOS-type * partition table. Reject this in case the boot indicator * is not 0 or 0x80. */ p = (struct msdos_partition *) (data + 0x1be); for (slot = 1; slot <= 4; slot++, p++) { if (p->boot_ind != 0 && p->boot_ind != 0x80) { /* * Even without a valid boot indicator value * its still possible this is valid FAT filesystem * without a partition table. */ fb = (struct fat_boot_sector *) data; if (slot == 1 && fb->reserved && fb->fats && fat_valid_media(fb->media)) { strlcat(state->pp_buf, "\n", PAGE_SIZE); put_dev_sector(sect); return 1; } else { put_dev_sector(sect); return 0; } } } #ifdef CONFIG_EFI_PARTITION p = (struct msdos_partition *) (data + 0x1be); for (slot = 1 ; slot <= 4 ; slot++, p++) { /* If this is an EFI GPT disk, msdos should ignore it. */ if (p->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT) { put_dev_sector(sect); return 0; } } #endif p = (struct msdos_partition *) (data + 0x1be); disksig = le32_to_cpup((__le32 *)(data + 0x1b8)); /* * Look for partitions in two passes: * First find the primary and DOS-type extended partitions. * On the second pass look inside *BSD, Unixware and Solaris partitions. */ state->next = 5; for (slot = 1 ; slot <= 4 ; slot++, p++) { sector_t start = start_sect(p)*sector_size; sector_t size = nr_sects(p)*sector_size; if (!size) continue; if (is_extended_partition(p)) { /* * prevent someone doing mkfs or mkswap on an * extended partition, but leave room for LILO * FIXME: this uses one logical sector for > 512b * sector, although it may not be enough/proper. */ sector_t n = 2; n = min(size, max(sector_size, n)); put_partition(state, slot, start, n); strlcat(state->pp_buf, " <", PAGE_SIZE); parse_extended(state, start, size, disksig); strlcat(state->pp_buf, " >", PAGE_SIZE); continue; } put_partition(state, slot, start, size); set_info(state, slot, disksig); if (p->sys_ind == LINUX_RAID_PARTITION) state->parts[slot].flags = ADDPART_FLAG_RAID; if (p->sys_ind == DM6_PARTITION) strlcat(state->pp_buf, "[DM]", PAGE_SIZE); if (p->sys_ind == EZD_PARTITION) strlcat(state->pp_buf, "[EZD]", PAGE_SIZE); } strlcat(state->pp_buf, "\n", PAGE_SIZE); /* second pass - output for each on a separate line */ p = (struct msdos_partition *) (0x1be + data); for (slot = 1 ; slot <= 4 ; slot++, p++) { unsigned char id = p->sys_ind; int n; if (!nr_sects(p)) continue; for (n = 0; subtypes[n].parse && id != subtypes[n].id; n++) ; if (!subtypes[n].parse) continue; subtypes[n].parse(state, start_sect(p) * sector_size, nr_sects(p) * sector_size, slot); } put_dev_sector(sect); return 1; }
5227 4 4 5011 109 4916 4913 4918 3420 33 3404 3407 24 24 5336 3158 475 5338 5341 3368 5332 5338 5331 5331 1792 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 // SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/main.c - Where the driver meets power management. * * Copyright (c) 2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab * * The driver model core calls device_pm_add() when a device is registered. * This will initialize the embedded device_pm_info object in the device * and add it to the list of power-controlled devices. sysfs entries for * controlling device power management will also be added. * * A separate list is used for keeping track of power info, because the power * domain dependencies may differ from the ancestral dependencies that the * subsystem list maintains. */ #define pr_fmt(fmt) "PM: " fmt #define dev_fmt pr_fmt #include <linux/device.h> #include <linux/export.h> #include <linux/mutex.h> #include <linux/pm.h> #include <linux/pm_runtime.h> #include <linux/pm-trace.h> #include <linux/pm_wakeirq.h> #include <linux/interrupt.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/async.h> #include <linux/suspend.h> #include <trace/events/power.h> #include <linux/cpufreq.h> #include <linux/devfreq.h> #include <linux/timer.h> #include "../base.h" #include "power.h" typedef int (*pm_callback_t)(struct device *); #define list_for_each_entry_rcu_locked(pos, head, member) \ list_for_each_entry_rcu(pos, head, member, \ device_links_read_lock_held()) /* * The entries in the dpm_list list are in a depth first order, simply * because children are guaranteed to be discovered after parents, and * are inserted at the back of the list on discovery. * * Since device_pm_add() may be called with a device lock held, * we must never try to acquire a device lock while holding * dpm_list_mutex. */ LIST_HEAD(dpm_list); static LIST_HEAD(dpm_prepared_list); static LIST_HEAD(dpm_suspended_list); static LIST_HEAD(dpm_late_early_list); static LIST_HEAD(dpm_noirq_list); static DEFINE_MUTEX(dpm_list_mtx); static pm_message_t pm_transition; static int async_error; static const char *pm_verb(int event) { switch (event) { case PM_EVENT_SUSPEND: return "suspend"; case PM_EVENT_RESUME: return "resume"; case PM_EVENT_FREEZE: return "freeze"; case PM_EVENT_QUIESCE: return "quiesce"; case PM_EVENT_HIBERNATE: return "hibernate"; case PM_EVENT_THAW: return "thaw"; case PM_EVENT_RESTORE: return "restore"; case PM_EVENT_RECOVER: return "recover"; default: return "(unknown PM event)"; } } /** * device_pm_sleep_init - Initialize system suspend-related device fields. * @dev: Device object being initialized. */ void device_pm_sleep_init(struct device *dev) { dev->power.is_prepared = false; dev->power.is_suspended = false; dev->power.is_noirq_suspended = false; dev->power.is_late_suspended = false; init_completion(&dev->power.completion); complete_all(&dev->power.completion); dev->power.wakeup = NULL; INIT_LIST_HEAD(&dev->power.entry); } /** * device_pm_lock - Lock the list of active devices used by the PM core. */ void device_pm_lock(void) { mutex_lock(&dpm_list_mtx); } /** * device_pm_unlock - Unlock the list of active devices used by the PM core. */ void device_pm_unlock(void) { mutex_unlock(&dpm_list_mtx); } /** * device_pm_add - Add a device to the PM core's list of active devices. * @dev: Device to add to the list. */ void device_pm_add(struct device *dev) { /* Skip PM setup/initialization. */ if (device_pm_not_required(dev)) return; pr_debug("Adding info for %s:%s\n", dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); device_pm_check_callbacks(dev); mutex_lock(&dpm_list_mtx); if (dev->parent && dev->parent->power.is_prepared) dev_warn(dev, "parent %s should not be sleeping\n", dev_name(dev->parent)); list_add_tail(&dev->power.entry, &dpm_list); dev->power.in_dpm_list = true; mutex_unlock(&dpm_list_mtx); } /** * device_pm_remove - Remove a device from the PM core's list of active devices. * @dev: Device to be removed from the list. */ void device_pm_remove(struct device *dev) { if (device_pm_not_required(dev)) return; pr_debug("Removing info for %s:%s\n", dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); complete_all(&dev->power.completion); mutex_lock(&dpm_list_mtx); list_del_init(&dev->power.entry); dev->power.in_dpm_list = false; mutex_unlock(&dpm_list_mtx); device_wakeup_disable(dev); pm_runtime_remove(dev); device_pm_check_callbacks(dev); } /** * device_pm_move_before - Move device in the PM core's list of active devices. * @deva: Device to move in dpm_list. * @devb: Device @deva should come before. */ void device_pm_move_before(struct device *deva, struct device *devb) { pr_debug("Moving %s:%s before %s:%s\n", deva->bus ? deva->bus->name : "No Bus", dev_name(deva), devb->bus ? devb->bus->name : "No Bus", dev_name(devb)); /* Delete deva from dpm_list and reinsert before devb. */ list_move_tail(&deva->power.entry, &devb->power.entry); } /** * device_pm_move_after - Move device in the PM core's list of active devices. * @deva: Device to move in dpm_list. * @devb: Device @deva should come after. */ void device_pm_move_after(struct device *deva, struct device *devb) { pr_debug("Moving %s:%s after %s:%s\n", deva->bus ? deva->bus->name : "No Bus", dev_name(deva), devb->bus ? devb->bus->name : "No Bus", dev_name(devb)); /* Delete deva from dpm_list and reinsert after devb. */ list_move(&deva->power.entry, &devb->power.entry); } /** * device_pm_move_last - Move device to end of the PM core's list of devices. * @dev: Device to move in dpm_list. */ void device_pm_move_last(struct device *dev) { pr_debug("Moving %s:%s to end of list\n", dev->bus ? dev->bus->name : "No Bus", dev_name(dev)); list_move_tail(&dev->power.entry, &dpm_list); } static ktime_t initcall_debug_start(struct device *dev, void *cb) { if (!pm_print_times_enabled) return 0; dev_info(dev, "calling %ps @ %i, parent: %s\n", cb, task_pid_nr(current), dev->parent ? dev_name(dev->parent) : "none"); return ktime_get(); } static void initcall_debug_report(struct device *dev, ktime_t calltime, void *cb, int error) { ktime_t rettime; if (!pm_print_times_enabled) return; rettime = ktime_get(); dev_info(dev, "%ps returned %d after %Ld usecs\n", cb, error, (unsigned long long)ktime_us_delta(rettime, calltime)); } /** * dpm_wait - Wait for a PM operation to complete. * @dev: Device to wait for. * @async: If unset, wait only if the device's power.async_suspend flag is set. */ static void dpm_wait(struct device *dev, bool async) { if (!dev) return; if (async || (pm_async_enabled && dev->power.async_suspend)) wait_for_completion(&dev->power.completion); } static int dpm_wait_fn(struct device *dev, void *async_ptr) { dpm_wait(dev, *((bool *)async_ptr)); return 0; } static void dpm_wait_for_children(struct device *dev, bool async) { device_for_each_child(dev, &async, dpm_wait_fn); } static void dpm_wait_for_suppliers(struct device *dev, bool async) { struct device_link *link; int idx; idx = device_links_read_lock(); /* * If the supplier goes away right after we've checked the link to it, * we'll wait for its completion to change the state, but that's fine, * because the only things that will block as a result are the SRCU * callbacks freeing the link objects for the links in the list we're * walking. */ list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_wait(link->supplier, async); device_links_read_unlock(idx); } static bool dpm_wait_for_superior(struct device *dev, bool async) { struct device *parent; /* * If the device is resumed asynchronously and the parent's callback * deletes both the device and the parent itself, the parent object may * be freed while this function is running, so avoid that by reference * counting the parent once more unless the device has been deleted * already (in which case return right away). */ mutex_lock(&dpm_list_mtx); if (!device_pm_initialized(dev)) { mutex_unlock(&dpm_list_mtx); return false; } parent = get_device(dev->parent); mutex_unlock(&dpm_list_mtx); dpm_wait(parent, async); put_device(parent); dpm_wait_for_suppliers(dev, async); /* * If the parent's callback has deleted the device, attempting to resume * it would be invalid, so avoid doing that then. */ return device_pm_initialized(dev); } static void dpm_wait_for_consumers(struct device *dev, bool async) { struct device_link *link; int idx; idx = device_links_read_lock(); /* * The status of a device link can only be changed from "dormant" by a * probe, but that cannot happen during system suspend/resume. In * theory it can change to "dormant" at that time, but then it is * reasonable to wait for the target device anyway (eg. if it goes * away, it's better to wait for it to go away completely and then * continue instead of trying to continue in parallel with its * unregistration). */ list_for_each_entry_rcu_locked(link, &dev->links.consumers, s_node) if (READ_ONCE(link->status) != DL_STATE_DORMANT) dpm_wait(link->consumer, async); device_links_read_unlock(idx); } static void dpm_wait_for_subordinate(struct device *dev, bool async) { dpm_wait_for_children(dev, async); dpm_wait_for_consumers(dev, async); } /** * pm_op - Return the PM operation appropriate for given PM event. * @ops: PM operations to choose from. * @state: PM transition of the system being carried out. */ static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state) { switch (state.event) { #ifdef CONFIG_SUSPEND case PM_EVENT_SUSPEND: return ops->suspend; case PM_EVENT_RESUME: return ops->resume; #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATE_CALLBACKS case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: return ops->freeze; case PM_EVENT_HIBERNATE: return ops->poweroff; case PM_EVENT_THAW: case PM_EVENT_RECOVER: return ops->thaw; case PM_EVENT_RESTORE: return ops->restore; #endif /* CONFIG_HIBERNATE_CALLBACKS */ } return NULL; } /** * pm_late_early_op - Return the PM operation appropriate for given PM event. * @ops: PM operations to choose from. * @state: PM transition of the system being carried out. * * Runtime PM is disabled for @dev while this function is being executed. */ static pm_callback_t pm_late_early_op(const struct dev_pm_ops *ops, pm_message_t state) { switch (state.event) { #ifdef CONFIG_SUSPEND case PM_EVENT_SUSPEND: return ops->suspend_late; case PM_EVENT_RESUME: return ops->resume_early; #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATE_CALLBACKS case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: return ops->freeze_late; case PM_EVENT_HIBERNATE: return ops->poweroff_late; case PM_EVENT_THAW: case PM_EVENT_RECOVER: return ops->thaw_early; case PM_EVENT_RESTORE: return ops->restore_early; #endif /* CONFIG_HIBERNATE_CALLBACKS */ } return NULL; } /** * pm_noirq_op - Return the PM operation appropriate for given PM event. * @ops: PM operations to choose from. * @state: PM transition of the system being carried out. * * The driver of @dev will not receive interrupts while this function is being * executed. */ static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t state) { switch (state.event) { #ifdef CONFIG_SUSPEND case PM_EVENT_SUSPEND: return ops->suspend_noirq; case PM_EVENT_RESUME: return ops->resume_noirq; #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATE_CALLBACKS case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: return ops->freeze_noirq; case PM_EVENT_HIBERNATE: return ops->poweroff_noirq; case PM_EVENT_THAW: case PM_EVENT_RECOVER: return ops->thaw_noirq; case PM_EVENT_RESTORE: return ops->restore_noirq; #endif /* CONFIG_HIBERNATE_CALLBACKS */ } return NULL; } static void pm_dev_dbg(struct device *dev, pm_message_t state, const char *info) { dev_dbg(dev, "%s%s%s driver flags: %x\n", info, pm_verb(state.event), ((state.event & PM_EVENT_SLEEP) && device_may_wakeup(dev)) ? ", may wakeup" : "", dev->power.driver_flags); } static void pm_dev_err(struct device *dev, pm_message_t state, const char *info, int error) { dev_err(dev, "failed to %s%s: error %d\n", pm_verb(state.event), info, error); } static void dpm_show_time(ktime_t starttime, pm_message_t state, int error, const char *info) { ktime_t calltime; u64 usecs64; int usecs; calltime = ktime_get(); usecs64 = ktime_to_ns(ktime_sub(calltime, starttime)); do_div(usecs64, NSEC_PER_USEC); usecs = usecs64; if (usecs == 0) usecs = 1; pm_pr_dbg("%s%s%s of devices %s after %ld.%03ld msecs\n", info ?: "", info ? " " : "", pm_verb(state.event), error ? "aborted" : "complete", usecs / USEC_PER_MSEC, usecs % USEC_PER_MSEC); } static int dpm_run_callback(pm_callback_t cb, struct device *dev, pm_message_t state, const char *info) { ktime_t calltime; int error; if (!cb) return 0; calltime = initcall_debug_start(dev, cb); pm_dev_dbg(dev, state, info); trace_device_pm_callback_start(dev, info, state.event); error = cb(dev); trace_device_pm_callback_end(dev, error); suspend_report_result(dev, cb, error); initcall_debug_report(dev, calltime, cb, error); return error; } #ifdef CONFIG_DPM_WATCHDOG struct dpm_watchdog { struct device *dev; struct task_struct *tsk; struct timer_list timer; }; #define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \ struct dpm_watchdog wd /** * dpm_watchdog_handler - Driver suspend / resume watchdog handler. * @t: The timer that PM watchdog depends on. * * Called when a driver has timed out suspending or resuming. * There's not much we can do here to recover so panic() to * capture a crash-dump in pstore. */ static void dpm_watchdog_handler(struct timer_list *t) { struct dpm_watchdog *wd = from_timer(wd, t, timer); dev_emerg(wd->dev, "**** DPM device timeout ****\n"); show_stack(wd->tsk, NULL, KERN_EMERG); panic("%s %s: unrecoverable failure\n", dev_driver_string(wd->dev), dev_name(wd->dev)); } /** * dpm_watchdog_set - Enable pm watchdog for given device. * @wd: Watchdog. Must be allocated on the stack. * @dev: Device to handle. */ static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev) { struct timer_list *timer = &wd->timer; wd->dev = dev; wd->tsk = current; timer_setup_on_stack(timer, dpm_watchdog_handler, 0); /* use same timeout value for both suspend and resume */ timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT; add_timer(timer); } /** * dpm_watchdog_clear - Disable suspend/resume watchdog. * @wd: Watchdog to disable. */ static void dpm_watchdog_clear(struct dpm_watchdog *wd) { struct timer_list *timer = &wd->timer; del_timer_sync(timer); destroy_timer_on_stack(timer); } #else #define DECLARE_DPM_WATCHDOG_ON_STACK(wd) #define dpm_watchdog_set(x, y) #define dpm_watchdog_clear(x) #endif /*------------------------- Resume routines -------------------------*/ /** * dev_pm_skip_resume - System-wide device resume optimization check. * @dev: Target device. * * Return: * - %false if the transition under way is RESTORE. * - Return value of dev_pm_skip_suspend() if the transition under way is THAW. * - The logical negation of %power.must_resume otherwise (that is, when the * transition under way is RESUME). */ bool dev_pm_skip_resume(struct device *dev) { if (pm_transition.event == PM_EVENT_RESTORE) return false; if (pm_transition.event == PM_EVENT_THAW) return dev_pm_skip_suspend(dev); return !dev->power.must_resume; } static bool is_async(struct device *dev) { return dev->power.async_suspend && pm_async_enabled && !pm_trace_is_enabled(); } static bool dpm_async_fn(struct device *dev, async_func_t func) { reinit_completion(&dev->power.completion); if (is_async(dev)) { dev->power.async_in_progress = true; get_device(dev); if (async_schedule_dev_nocall(func, dev)) return true; put_device(dev); } /* * Because async_schedule_dev_nocall() above has returned false or it * has not been called at all, func() is not running and it is safe to * update the async_in_progress flag without extra synchronization. */ dev->power.async_in_progress = false; return false; } /** * device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. * * The driver of @dev will not receive interrupts while this function is being * executed. */ static void device_resume_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; bool skip_resume; int error = 0; TRACE_DEVICE(dev); TRACE_RESUME(0); if (dev->power.syscore || dev->power.direct_complete) goto Out; if (!dev->power.is_noirq_suspended) goto Out; if (!dpm_wait_for_superior(dev, async)) goto Out; skip_resume = dev_pm_skip_resume(dev); /* * If the driver callback is skipped below or by the middle layer * callback and device_resume_early() also skips the driver callback for * this device later, it needs to appear as "suspended" to PM-runtime, * so change its status accordingly. * * Otherwise, the device is going to be resumed, so set its PM-runtime * status to "active", but do that only if DPM_FLAG_SMART_SUSPEND is set * to avoid confusing drivers that don't use it. */ if (skip_resume) pm_runtime_set_suspended(dev); else if (dev_pm_skip_suspend(dev)) pm_runtime_set_active(dev); if (dev->pm_domain) { info = "noirq power domain "; callback = pm_noirq_op(&dev->pm_domain->ops, state); } else if (dev->type && dev->type->pm) { info = "noirq type "; callback = pm_noirq_op(dev->type->pm, state); } else if (dev->class && dev->class->pm) { info = "noirq class "; callback = pm_noirq_op(dev->class->pm, state); } else if (dev->bus && dev->bus->pm) { info = "noirq bus "; callback = pm_noirq_op(dev->bus->pm, state); } if (callback) goto Run; if (skip_resume) goto Skip; if (dev->driver && dev->driver->pm) { info = "noirq driver "; callback = pm_noirq_op(dev->driver->pm, state); } Run: error = dpm_run_callback(callback, dev, state, info); Skip: dev->power.is_noirq_suspended = false; Out: complete_all(&dev->power.completion); TRACE_RESUME(error); if (error) { async_error = error; dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); } } static void async_resume_noirq(void *data, async_cookie_t cookie) { struct device *dev = data; device_resume_noirq(dev, pm_transition, true); put_device(dev); } static void dpm_noirq_resume_devices(pm_message_t state) { struct device *dev; ktime_t starttime = ktime_get(); trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, true); async_error = 0; pm_transition = state; mutex_lock(&dpm_list_mtx); /* * Trigger the resume of "async" devices upfront so they don't have to * wait for the "non-async" ones they don't depend on. */ list_for_each_entry(dev, &dpm_noirq_list, power.entry) dpm_async_fn(dev, async_resume_noirq); while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); list_move_tail(&dev->power.entry, &dpm_late_early_list); if (!dev->power.async_in_progress) { get_device(dev); mutex_unlock(&dpm_list_mtx); device_resume_noirq(dev, state, false); put_device(dev); mutex_lock(&dpm_list_mtx); } } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); dpm_show_time(starttime, state, 0, "noirq"); if (async_error) dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false); } /** * dpm_resume_noirq - Execute "noirq resume" callbacks for all devices. * @state: PM transition of the system being carried out. * * Invoke the "noirq" resume callbacks for all devices in dpm_noirq_list and * allow device drivers' interrupt handlers to be called. */ void dpm_resume_noirq(pm_message_t state) { dpm_noirq_resume_devices(state); resume_device_irqs(); device_wakeup_disarm_wake_irqs(); } /** * device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ static void device_resume_early(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; int error = 0; TRACE_DEVICE(dev); TRACE_RESUME(0); if (dev->power.syscore || dev->power.direct_complete) goto Out; if (!dev->power.is_late_suspended) goto Out; if (!dpm_wait_for_superior(dev, async)) goto Out; if (dev->pm_domain) { info = "early power domain "; callback = pm_late_early_op(&dev->pm_domain->ops, state); } else if (dev->type && dev->type->pm) { info = "early type "; callback = pm_late_early_op(dev->type->pm, state); } else if (dev->class && dev->class->pm) { info = "early class "; callback = pm_late_early_op(dev->class->pm, state); } else if (dev->bus && dev->bus->pm) { info = "early bus "; callback = pm_late_early_op(dev->bus->pm, state); } if (callback) goto Run; if (dev_pm_skip_resume(dev)) goto Skip; if (dev->driver && dev->driver->pm) { info = "early driver "; callback = pm_late_early_op(dev->driver->pm, state); } Run: error = dpm_run_callback(callback, dev, state, info); Skip: dev->power.is_late_suspended = false; Out: TRACE_RESUME(error); pm_runtime_enable(dev); complete_all(&dev->power.completion); if (error) { async_error = error; dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async early" : " early", error); } } static void async_resume_early(void *data, async_cookie_t cookie) { struct device *dev = data; device_resume_early(dev, pm_transition, true); put_device(dev); } /** * dpm_resume_early - Execute "early resume" callbacks for all devices. * @state: PM transition of the system being carried out. */ void dpm_resume_early(pm_message_t state) { struct device *dev; ktime_t starttime = ktime_get(); trace_suspend_resume(TPS("dpm_resume_early"), state.event, true); async_error = 0; pm_transition = state; mutex_lock(&dpm_list_mtx); /* * Trigger the resume of "async" devices upfront so they don't have to * wait for the "non-async" ones they don't depend on. */ list_for_each_entry(dev, &dpm_late_early_list, power.entry) dpm_async_fn(dev, async_resume_early); while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); list_move_tail(&dev->power.entry, &dpm_suspended_list); if (!dev->power.async_in_progress) { get_device(dev); mutex_unlock(&dpm_list_mtx); device_resume_early(dev, state, false); put_device(dev); mutex_lock(&dpm_list_mtx); } } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); dpm_show_time(starttime, state, 0, "early"); if (async_error) dpm_save_failed_step(SUSPEND_RESUME_EARLY); trace_suspend_resume(TPS("dpm_resume_early"), state.event, false); } /** * dpm_resume_start - Execute "noirq" and "early" device callbacks. * @state: PM transition of the system being carried out. */ void dpm_resume_start(pm_message_t state) { dpm_resume_noirq(state); dpm_resume_early(state); } EXPORT_SYMBOL_GPL(dpm_resume_start); /** * device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. */ static void device_resume(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; int error = 0; DECLARE_DPM_WATCHDOG_ON_STACK(wd); TRACE_DEVICE(dev); TRACE_RESUME(0); if (dev->power.syscore) goto Complete; if (dev->power.direct_complete) { /* Match the pm_runtime_disable() in __device_suspend(). */ pm_runtime_enable(dev); goto Complete; } if (!dpm_wait_for_superior(dev, async)) goto Complete; dpm_watchdog_set(&wd, dev); device_lock(dev); /* * This is a fib. But we'll allow new children to be added below * a resumed device, even if the device hasn't been completed yet. */ dev->power.is_prepared = false; if (!dev->power.is_suspended) goto Unlock; if (dev->pm_domain) { info = "power domain "; callback = pm_op(&dev->pm_domain->ops, state); goto Driver; } if (dev->type && dev->type->pm) { info = "type "; callback = pm_op(dev->type->pm, state); goto Driver; } if (dev->class && dev->class->pm) { info = "class "; callback = pm_op(dev->class->pm, state); goto Driver; } if (dev->bus) { if (dev->bus->pm) { info = "bus "; callback = pm_op(dev->bus->pm, state); } else if (dev->bus->resume) { info = "legacy bus "; callback = dev->bus->resume; goto End; } } Driver: if (!callback && dev->driver && dev->driver->pm) { info = "driver "; callback = pm_op(dev->driver->pm, state); } End: error = dpm_run_callback(callback, dev, state, info); dev->power.is_suspended = false; Unlock: device_unlock(dev); dpm_watchdog_clear(&wd); Complete: complete_all(&dev->power.completion); TRACE_RESUME(error); if (error) { async_error = error; dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async" : "", error); } } static void async_resume(void *data, async_cookie_t cookie) { struct device *dev = data; device_resume(dev, pm_transition, true); put_device(dev); } /** * dpm_resume - Execute "resume" callbacks for non-sysdev devices. * @state: PM transition of the system being carried out. * * Execute the appropriate "resume" callback for all devices whose status * indicates that they are suspended. */ void dpm_resume(pm_message_t state) { struct device *dev; ktime_t starttime = ktime_get(); trace_suspend_resume(TPS("dpm_resume"), state.event, true); might_sleep(); pm_transition = state; async_error = 0; mutex_lock(&dpm_list_mtx); /* * Trigger the resume of "async" devices upfront so they don't have to * wait for the "non-async" ones they don't depend on. */ list_for_each_entry(dev, &dpm_suspended_list, power.entry) dpm_async_fn(dev, async_resume); while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); list_move_tail(&dev->power.entry, &dpm_prepared_list); if (!dev->power.async_in_progress) { get_device(dev); mutex_unlock(&dpm_list_mtx); device_resume(dev, state, false); put_device(dev); mutex_lock(&dpm_list_mtx); } } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); dpm_show_time(starttime, state, 0, NULL); if (async_error) dpm_save_failed_step(SUSPEND_RESUME); cpufreq_resume(); devfreq_resume(); trace_suspend_resume(TPS("dpm_resume"), state.event, false); } /** * device_complete - Complete a PM transition for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. */ static void device_complete(struct device *dev, pm_message_t state) { void (*callback)(struct device *) = NULL; const char *info = NULL; if (dev->power.syscore) goto out; device_lock(dev); if (dev->pm_domain) { info = "completing power domain "; callback = dev->pm_domain->ops.complete; } else if (dev->type && dev->type->pm) { info = "completing type "; callback = dev->type->pm->complete; } else if (dev->class && dev->class->pm) { info = "completing class "; callback = dev->class->pm->complete; } else if (dev->bus && dev->bus->pm) { info = "completing bus "; callback = dev->bus->pm->complete; } if (!callback && dev->driver && dev->driver->pm) { info = "completing driver "; callback = dev->driver->pm->complete; } if (callback) { pm_dev_dbg(dev, state, info); callback(dev); } device_unlock(dev); out: pm_runtime_put(dev); } /** * dpm_complete - Complete a PM transition for all non-sysdev devices. * @state: PM transition of the system being carried out. * * Execute the ->complete() callbacks for all devices whose PM status is not * DPM_ON (this allows new devices to be registered). */ void dpm_complete(pm_message_t state) { struct list_head list; trace_suspend_resume(TPS("dpm_complete"), state.event, true); might_sleep(); INIT_LIST_HEAD(&list); mutex_lock(&dpm_list_mtx); while (!list_empty(&dpm_prepared_list)) { struct device *dev = to_device(dpm_prepared_list.prev); get_device(dev); dev->power.is_prepared = false; list_move(&dev->power.entry, &list); mutex_unlock(&dpm_list_mtx); trace_device_pm_callback_start(dev, "", state.event); device_complete(dev, state); trace_device_pm_callback_end(dev, 0); put_device(dev); mutex_lock(&dpm_list_mtx); } list_splice(&list, &dpm_list); mutex_unlock(&dpm_list_mtx); /* Allow device probing and trigger re-probing of deferred devices */ device_unblock_probing(); trace_suspend_resume(TPS("dpm_complete"), state.event, false); } /** * dpm_resume_end - Execute "resume" callbacks and complete system transition. * @state: PM transition of the system being carried out. * * Execute "resume" callbacks for all devices and complete the PM transition of * the system. */ void dpm_resume_end(pm_message_t state) { dpm_resume(state); dpm_complete(state); } EXPORT_SYMBOL_GPL(dpm_resume_end); /*------------------------- Suspend routines -------------------------*/ /** * resume_event - Return a "resume" message for given "suspend" sleep state. * @sleep_state: PM message representing a sleep state. * * Return a PM message representing the resume event corresponding to given * sleep state. */ static pm_message_t resume_event(pm_message_t sleep_state) { switch (sleep_state.event) { case PM_EVENT_SUSPEND: return PMSG_RESUME; case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: return PMSG_RECOVER; case PM_EVENT_HIBERNATE: return PMSG_RESTORE; } return PMSG_ON; } static void dpm_superior_set_must_resume(struct device *dev) { struct device_link *link; int idx; if (dev->parent) dev->parent->power.must_resume = true; idx = device_links_read_lock(); list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) link->supplier->power.must_resume = true; device_links_read_unlock(idx); } /** * device_suspend_noirq - Execute a "noirq suspend" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being suspended asynchronously. * * The driver of @dev will not receive interrupts while this function is being * executed. */ static int device_suspend_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; int error = 0; TRACE_DEVICE(dev); TRACE_SUSPEND(0); dpm_wait_for_subordinate(dev, async); if (async_error) goto Complete; if (dev->power.syscore || dev->power.direct_complete) goto Complete; if (dev->pm_domain) { info = "noirq power domain "; callback = pm_noirq_op(&dev->pm_domain->ops, state); } else if (dev->type && dev->type->pm) { info = "noirq type "; callback = pm_noirq_op(dev->type->pm, state); } else if (dev->class && dev->class->pm) { info = "noirq class "; callback = pm_noirq_op(dev->class->pm, state); } else if (dev->bus && dev->bus->pm) { info = "noirq bus "; callback = pm_noirq_op(dev->bus->pm, state); } if (callback) goto Run; if (dev_pm_skip_suspend(dev)) goto Skip; if (dev->driver && dev->driver->pm) { info = "noirq driver "; callback = pm_noirq_op(dev->driver->pm, state); } Run: error = dpm_run_callback(callback, dev, state, info); if (error) { async_error = error; dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); goto Complete; } Skip: dev->power.is_noirq_suspended = true; /* * Skipping the resume of devices that were in use right before the * system suspend (as indicated by their PM-runtime usage counters) * would be suboptimal. Also resume them if doing that is not allowed * to be skipped. */ if (atomic_read(&dev->power.usage_count) > 1 || !(dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME) && dev->power.may_skip_resume)) dev->power.must_resume = true; if (dev->power.must_resume) dpm_superior_set_must_resume(dev); Complete: complete_all(&dev->power.completion); TRACE_SUSPEND(error); return error; } static void async_suspend_noirq(void *data, async_cookie_t cookie) { struct device *dev = data; device_suspend_noirq(dev, pm_transition, true); put_device(dev); } static int dpm_noirq_suspend_devices(pm_message_t state) { ktime_t starttime = ktime_get(); int error = 0; trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true); pm_transition = state; async_error = 0; mutex_lock(&dpm_list_mtx); while (!list_empty(&dpm_late_early_list)) { struct device *dev = to_device(dpm_late_early_list.prev); list_move(&dev->power.entry, &dpm_noirq_list); if (dpm_async_fn(dev, async_suspend_noirq)) continue; get_device(dev); mutex_unlock(&dpm_list_mtx); error = device_suspend_noirq(dev, state, false); put_device(dev); mutex_lock(&dpm_list_mtx); if (error || async_error) break; } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); if (!error) error = async_error; if (error) dpm_save_failed_step(SUSPEND_SUSPEND_NOIRQ); dpm_show_time(starttime, state, error, "noirq"); trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, false); return error; } /** * dpm_suspend_noirq - Execute "noirq suspend" callbacks for all devices. * @state: PM transition of the system being carried out. * * Prevent device drivers' interrupt handlers from being called and invoke * "noirq" suspend callbacks for all non-sysdev devices. */ int dpm_suspend_noirq(pm_message_t state) { int ret; device_wakeup_arm_wake_irqs(); suspend_device_irqs(); ret = dpm_noirq_suspend_devices(state); if (ret) dpm_resume_noirq(resume_event(state)); return ret; } static void dpm_propagate_wakeup_to_parent(struct device *dev) { struct device *parent = dev->parent; if (!parent) return; spin_lock_irq(&parent->power.lock); if (device_wakeup_path(dev) && !parent->power.ignore_children) parent->power.wakeup_path = true; spin_unlock_irq(&parent->power.lock); } /** * device_suspend_late - Execute a "late suspend" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being suspended asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ static int device_suspend_late(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; int error = 0; TRACE_DEVICE(dev); TRACE_SUSPEND(0); __pm_runtime_disable(dev, false); dpm_wait_for_subordinate(dev, async); if (async_error) goto Complete; if (pm_wakeup_pending()) { async_error = -EBUSY; goto Complete; } if (dev->power.syscore || dev->power.direct_complete) goto Complete; if (dev->pm_domain) { info = "late power domain "; callback = pm_late_early_op(&dev->pm_domain->ops, state); } else if (dev->type && dev->type->pm) { info = "late type "; callback = pm_late_early_op(dev->type->pm, state); } else if (dev->class && dev->class->pm) { info = "late class "; callback = pm_late_early_op(dev->class->pm, state); } else if (dev->bus && dev->bus->pm) { info = "late bus "; callback = pm_late_early_op(dev->bus->pm, state); } if (callback) goto Run; if (dev_pm_skip_suspend(dev)) goto Skip; if (dev->driver && dev->driver->pm) { info = "late driver "; callback = pm_late_early_op(dev->driver->pm, state); } Run: error = dpm_run_callback(callback, dev, state, info); if (error) { async_error = error; dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async late" : " late", error); goto Complete; } dpm_propagate_wakeup_to_parent(dev); Skip: dev->power.is_late_suspended = true; Complete: TRACE_SUSPEND(error); complete_all(&dev->power.completion); return error; } static void async_suspend_late(void *data, async_cookie_t cookie) { struct device *dev = data; device_suspend_late(dev, pm_transition, true); put_device(dev); } /** * dpm_suspend_late - Execute "late suspend" callbacks for all devices. * @state: PM transition of the system being carried out. */ int dpm_suspend_late(pm_message_t state) { ktime_t starttime = ktime_get(); int error = 0; trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true); pm_transition = state; async_error = 0; wake_up_all_idle_cpus(); mutex_lock(&dpm_list_mtx); while (!list_empty(&dpm_suspended_list)) { struct device *dev = to_device(dpm_suspended_list.prev); list_move(&dev->power.entry, &dpm_late_early_list); if (dpm_async_fn(dev, async_suspend_late)) continue; get_device(dev); mutex_unlock(&dpm_list_mtx); error = device_suspend_late(dev, state, false); put_device(dev); mutex_lock(&dpm_list_mtx); if (error || async_error) break; } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); if (!error) error = async_error; if (error) { dpm_save_failed_step(SUSPEND_SUSPEND_LATE); dpm_resume_early(resume_event(state)); } dpm_show_time(starttime, state, error, "late"); trace_suspend_resume(TPS("dpm_suspend_late"), state.event, false); return error; } /** * dpm_suspend_end - Execute "late" and "noirq" device suspend callbacks. * @state: PM transition of the system being carried out. */ int dpm_suspend_end(pm_message_t state) { ktime_t starttime = ktime_get(); int error; error = dpm_suspend_late(state); if (error) goto out; error = dpm_suspend_noirq(state); if (error) dpm_resume_early(resume_event(state)); out: dpm_show_time(starttime, state, error, "end"); return error; } EXPORT_SYMBOL_GPL(dpm_suspend_end); /** * legacy_suspend - Execute a legacy (bus or class) suspend callback for device. * @dev: Device to suspend. * @state: PM transition of the system being carried out. * @cb: Suspend callback to execute. * @info: string description of caller. */ static int legacy_suspend(struct device *dev, pm_message_t state, int (*cb)(struct device *dev, pm_message_t state), const char *info) { int error; ktime_t calltime; calltime = initcall_debug_start(dev, cb); trace_device_pm_callback_start(dev, info, state.event); error = cb(dev, state); trace_device_pm_callback_end(dev, error); suspend_report_result(dev, cb, error); initcall_debug_report(dev, calltime, cb, error); return error; } static void dpm_clear_superiors_direct_complete(struct device *dev) { struct device_link *link; int idx; if (dev->parent) { spin_lock_irq(&dev->parent->power.lock); dev->parent->power.direct_complete = false; spin_unlock_irq(&dev->parent->power.lock); } idx = device_links_read_lock(); list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { spin_lock_irq(&link->supplier->power.lock); link->supplier->power.direct_complete = false; spin_unlock_irq(&link->supplier->power.lock); } device_links_read_unlock(idx); } /** * device_suspend - Execute "suspend" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being suspended asynchronously. */ static int device_suspend(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; int error = 0; DECLARE_DPM_WATCHDOG_ON_STACK(wd); TRACE_DEVICE(dev); TRACE_SUSPEND(0); dpm_wait_for_subordinate(dev, async); if (async_error) { dev->power.direct_complete = false; goto Complete; } /* * Wait for possible runtime PM transitions of the device in progress * to complete and if there's a runtime resume request pending for it, * resume it before proceeding with invoking the system-wide suspend * callbacks for it. * * If the system-wide suspend callbacks below change the configuration * of the device, they must disable runtime PM for it or otherwise * ensure that its runtime-resume callbacks will not be confused by that * change in case they are invoked going forward. */ pm_runtime_barrier(dev); if (pm_wakeup_pending()) { dev->power.direct_complete = false; async_error = -EBUSY; goto Complete; } if (dev->power.syscore) goto Complete; /* Avoid direct_complete to let wakeup_path propagate. */ if (device_may_wakeup(dev) || device_wakeup_path(dev)) dev->power.direct_complete = false; if (dev->power.direct_complete) { if (pm_runtime_status_suspended(dev)) { pm_runtime_disable(dev); if (pm_runtime_status_suspended(dev)) { pm_dev_dbg(dev, state, "direct-complete "); goto Complete; } pm_runtime_enable(dev); } dev->power.direct_complete = false; } dev->power.may_skip_resume = true; dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME); dpm_watchdog_set(&wd, dev); device_lock(dev); if (dev->pm_domain) { info = "power domain "; callback = pm_op(&dev->pm_domain->ops, state); goto Run; } if (dev->type && dev->type->pm) { info = "type "; callback = pm_op(dev->type->pm, state); goto Run; } if (dev->class && dev->class->pm) { info = "class "; callback = pm_op(dev->class->pm, state); goto Run; } if (dev->bus) { if (dev->bus->pm) { info = "bus "; callback = pm_op(dev->bus->pm, state); } else if (dev->bus->suspend) { pm_dev_dbg(dev, state, "legacy bus "); error = legacy_suspend(dev, state, dev->bus->suspend, "legacy bus "); goto End; } } Run: if (!callback && dev->driver && dev->driver->pm) { info = "driver "; callback = pm_op(dev->driver->pm, state); } error = dpm_run_callback(callback, dev, state, info); End: if (!error) { dev->power.is_suspended = true; if (device_may_wakeup(dev)) dev->power.wakeup_path = true; dpm_propagate_wakeup_to_parent(dev); dpm_clear_superiors_direct_complete(dev); } device_unlock(dev); dpm_watchdog_clear(&wd); Complete: if (error) { async_error = error; dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async" : "", error); } complete_all(&dev->power.completion); TRACE_SUSPEND(error); return error; } static void async_suspend(void *data, async_cookie_t cookie) { struct device *dev = data; device_suspend(dev, pm_transition, true); put_device(dev); } /** * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices. * @state: PM transition of the system being carried out. */ int dpm_suspend(pm_message_t state) { ktime_t starttime = ktime_get(); int error = 0; trace_suspend_resume(TPS("dpm_suspend"), state.event, true); might_sleep(); devfreq_suspend(); cpufreq_suspend(); pm_transition = state; async_error = 0; mutex_lock(&dpm_list_mtx); while (!list_empty(&dpm_prepared_list)) { struct device *dev = to_device(dpm_prepared_list.prev); list_move(&dev->power.entry, &dpm_suspended_list); if (dpm_async_fn(dev, async_suspend)) continue; get_device(dev); mutex_unlock(&dpm_list_mtx); error = device_suspend(dev, state, false); put_device(dev); mutex_lock(&dpm_list_mtx); if (error || async_error) break; } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); if (!error) error = async_error; if (error) dpm_save_failed_step(SUSPEND_SUSPEND); dpm_show_time(starttime, state, error, NULL); trace_suspend_resume(TPS("dpm_suspend"), state.event, false); return error; } /** * device_prepare - Prepare a device for system power transition. * @dev: Device to handle. * @state: PM transition of the system being carried out. * * Execute the ->prepare() callback(s) for given device. No new children of the * device may be registered after this function has returned. */ static int device_prepare(struct device *dev, pm_message_t state) { int (*callback)(struct device *) = NULL; int ret = 0; /* * If a device's parent goes into runtime suspend at the wrong time, * it won't be possible to resume the device. To prevent this we * block runtime suspend here, during the prepare phase, and allow * it again during the complete phase. */ pm_runtime_get_noresume(dev); if (dev->power.syscore) return 0; device_lock(dev); dev->power.wakeup_path = false; if (dev->power.no_pm_callbacks) goto unlock; if (dev->pm_domain) callback = dev->pm_domain->ops.prepare; else if (dev->type && dev->type->pm) callback = dev->type->pm->prepare; else if (dev->class && dev->class->pm) callback = dev->class->pm->prepare; else if (dev->bus && dev->bus->pm) callback = dev->bus->pm->prepare; if (!callback && dev->driver && dev->driver->pm) callback = dev->driver->pm->prepare; if (callback) ret = callback(dev); unlock: device_unlock(dev); if (ret < 0) { suspend_report_result(dev, callback, ret); pm_runtime_put(dev); return ret; } /* * A positive return value from ->prepare() means "this device appears * to be runtime-suspended and its state is fine, so if it really is * runtime-suspended, you can leave it in that state provided that you * will do the same thing with all of its descendants". This only * applies to suspend transitions, however. */ spin_lock_irq(&dev->power.lock); dev->power.direct_complete = state.event == PM_EVENT_SUSPEND && (ret > 0 || dev->power.no_pm_callbacks) && !dev_pm_test_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); spin_unlock_irq(&dev->power.lock); return 0; } /** * dpm_prepare - Prepare all non-sysdev devices for a system PM transition. * @state: PM transition of the system being carried out. * * Execute the ->prepare() callback(s) for all devices. */ int dpm_prepare(pm_message_t state) { int error = 0; trace_suspend_resume(TPS("dpm_prepare"), state.event, true); might_sleep(); /* * Give a chance for the known devices to complete their probes, before * disable probing of devices. This sync point is important at least * at boot time + hibernation restore. */ wait_for_device_probe(); /* * It is unsafe if probing of devices will happen during suspend or * hibernation and system behavior will be unpredictable in this case. * So, let's prohibit device's probing here and defer their probes * instead. The normal behavior will be restored in dpm_complete(). */ device_block_probing(); mutex_lock(&dpm_list_mtx); while (!list_empty(&dpm_list) && !error) { struct device *dev = to_device(dpm_list.next); get_device(dev); mutex_unlock(&dpm_list_mtx); trace_device_pm_callback_start(dev, "", state.event); error = device_prepare(dev, state); trace_device_pm_callback_end(dev, error); mutex_lock(&dpm_list_mtx); if (!error) { dev->power.is_prepared = true; if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); } else if (error == -EAGAIN) { error = 0; } else { dev_info(dev, "not prepared for power transition: code %d\n", error); } mutex_unlock(&dpm_list_mtx); put_device(dev); mutex_lock(&dpm_list_mtx); } mutex_unlock(&dpm_list_mtx); trace_suspend_resume(TPS("dpm_prepare"), state.event, false); return error; } /** * dpm_suspend_start - Prepare devices for PM transition and suspend them. * @state: PM transition of the system being carried out. * * Prepare all non-sysdev devices for system PM transition and execute "suspend" * callbacks for them. */ int dpm_suspend_start(pm_message_t state) { ktime_t starttime = ktime_get(); int error; error = dpm_prepare(state); if (error) dpm_save_failed_step(SUSPEND_PREPARE); else error = dpm_suspend(state); dpm_show_time(starttime, state, error, "start"); return error; } EXPORT_SYMBOL_GPL(dpm_suspend_start); void __suspend_report_result(const char *function, struct device *dev, void *fn, int ret) { if (ret) dev_err(dev, "%s(): %ps returns %d\n", function, fn, ret); } EXPORT_SYMBOL_GPL(__suspend_report_result); /** * device_pm_wait_for_dev - Wait for suspend/resume of a device to complete. * @subordinate: Device that needs to wait for @dev. * @dev: Device to wait for. */ int device_pm_wait_for_dev(struct device *subordinate, struct device *dev) { dpm_wait(dev, subordinate->power.async_suspend); return async_error; } EXPORT_SYMBOL_GPL(device_pm_wait_for_dev); /** * dpm_for_each_dev - device iterator. * @data: data for the callback. * @fn: function to be called for each device. * * Iterate over devices in dpm_list, and call @fn for each device, * passing it @data. */ void dpm_for_each_dev(void *data, void (*fn)(struct device *, void *)) { struct device *dev; if (!fn) return; device_pm_lock(); list_for_each_entry(dev, &dpm_list, power.entry) fn(dev, data); device_pm_unlock(); } EXPORT_SYMBOL_GPL(dpm_for_each_dev); static bool pm_ops_is_empty(const struct dev_pm_ops *ops) { if (!ops) return true; return !ops->prepare && !ops->suspend && !ops->suspend_late && !ops->suspend_noirq && !ops->resume_noirq && !ops->resume_early && !ops->resume && !ops->complete; } void device_pm_check_callbacks(struct device *dev) { unsigned long flags; spin_lock_irqsave(&dev->power.lock, flags); dev->power.no_pm_callbacks = (!dev->bus || (pm_ops_is_empty(dev->bus->pm) && !dev->bus->suspend && !dev->bus->resume)) && (!dev->class || pm_ops_is_empty(dev->class->pm)) && (!dev->type || pm_ops_is_empty(dev->type->pm)) && (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) && (!dev->driver || (pm_ops_is_empty(dev->driver->pm) && !dev->driver->suspend && !dev->driver->resume)); spin_unlock_irqrestore(&dev->power.lock, flags); } bool dev_pm_skip_suspend(struct device *dev) { return dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) && pm_runtime_status_suspended(dev); }
3584 1839 86 86 9 9 30 4 26 1387 118 61 1831 12 92 92 41 42 49 1833 632 634 1248 238 2750 1835 73 42 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 // SPDX-License-Identifier: GPL-2.0-only /* tnum: tracked (or tristate) numbers * * A tnum tracks knowledge about the bits of a value. Each bit can be either * known (0 or 1), or unknown (x). Arithmetic operations on tnums will * propagate the unknown bits such that the tnum result represents all the * possible results for possible values of the operands. */ #include <linux/kernel.h> #include <linux/tnum.h> #define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m} /* A completely unknown value */ const struct tnum tnum_unknown = { .value = 0, .mask = -1 }; struct tnum tnum_const(u64 value) { return TNUM(value, 0); } struct tnum tnum_range(u64 min, u64 max) { u64 chi = min ^ max, delta; u8 bits = fls64(chi); /* special case, needed because 1ULL << 64 is undefined */ if (bits > 63) return tnum_unknown; /* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7. * if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return * constant min (since min == max). */ delta = (1ULL << bits) - 1; return TNUM(min & ~delta, delta); } struct tnum tnum_lshift(struct tnum a, u8 shift) { return TNUM(a.value << shift, a.mask << shift); } struct tnum tnum_rshift(struct tnum a, u8 shift) { return TNUM(a.value >> shift, a.mask >> shift); } struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness) { /* if a.value is negative, arithmetic shifting by minimum shift * will have larger negative offset compared to more shifting. * If a.value is nonnegative, arithmetic shifting by minimum shift * will have larger positive offset compare to more shifting. */ if (insn_bitness == 32) return TNUM((u32)(((s32)a.value) >> min_shift), (u32)(((s32)a.mask) >> min_shift)); else return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift); } struct tnum tnum_add(struct tnum a, struct tnum b) { u64 sm, sv, sigma, chi, mu; sm = a.mask + b.mask; sv = a.value + b.value; sigma = sm + sv; chi = sigma ^ sv; mu = chi | a.mask | b.mask; return TNUM(sv & ~mu, mu); } struct tnum tnum_sub(struct tnum a, struct tnum b) { u64 dv, alpha, beta, chi, mu; dv = a.value - b.value; alpha = dv + a.mask; beta = dv - b.mask; chi = alpha ^ beta; mu = chi | a.mask | b.mask; return TNUM(dv & ~mu, mu); } struct tnum tnum_and(struct tnum a, struct tnum b) { u64 alpha, beta, v; alpha = a.value | a.mask; beta = b.value | b.mask; v = a.value & b.value; return TNUM(v, alpha & beta & ~v); } struct tnum tnum_or(struct tnum a, struct tnum b) { u64 v, mu; v = a.value | b.value; mu = a.mask | b.mask; return TNUM(v, mu & ~v); } struct tnum tnum_xor(struct tnum a, struct tnum b) { u64 v, mu; v = a.value ^ b.value; mu = a.mask | b.mask; return TNUM(v & ~mu, mu); } /* Generate partial products by multiplying each bit in the multiplier (tnum a) * with the multiplicand (tnum b), and add the partial products after * appropriately bit-shifting them. Instead of directly performing tnum addition * on the generated partial products, equivalenty, decompose each partial * product into two tnums, consisting of the value-sum (acc_v) and the * mask-sum (acc_m) and then perform tnum addition on them. The following paper * explains the algorithm in more detail: https://arxiv.org/abs/2105.05398. */ struct tnum tnum_mul(struct tnum a, struct tnum b) { u64 acc_v = a.value * b.value; struct tnum acc_m = TNUM(0, 0); while (a.value || a.mask) { /* LSB of tnum a is a certain 1 */ if (a.value & 1) acc_m = tnum_add(acc_m, TNUM(0, b.mask)); /* LSB of tnum a is uncertain */ else if (a.mask & 1) acc_m = tnum_add(acc_m, TNUM(0, b.value | b.mask)); /* Note: no case for LSB is certain 0 */ a = tnum_rshift(a, 1); b = tnum_lshift(b, 1); } return tnum_add(TNUM(acc_v, 0), acc_m); } /* Note that if a and b disagree - i.e. one has a 'known 1' where the other has * a 'known 0' - this will return a 'known 1' for that bit. */ struct tnum tnum_intersect(struct tnum a, struct tnum b) { u64 v, mu; v = a.value | b.value; mu = a.mask & b.mask; return TNUM(v & ~mu, mu); } struct tnum tnum_cast(struct tnum a, u8 size) { a.value &= (1ULL << (size * 8)) - 1; a.mask &= (1ULL << (size * 8)) - 1; return a; } bool tnum_is_aligned(struct tnum a, u64 size) { if (!size) return true; return !((a.value | a.mask) & (size - 1)); } bool tnum_in(struct tnum a, struct tnum b) { if (b.mask & ~a.mask) return false; b.value &= ~a.mask; return a.value == b.value; } int tnum_sbin(char *str, size_t size, struct tnum a) { size_t n; for (n = 64; n; n--) { if (n < size) { if (a.mask & 1) str[n - 1] = 'x'; else if (a.value & 1) str[n - 1] = '1'; else str[n - 1] = '0'; } a.mask >>= 1; a.value >>= 1; } str[min(size - 1, (size_t)64)] = 0; return 64; } struct tnum tnum_subreg(struct tnum a) { return tnum_cast(a, 4); } struct tnum tnum_clear_subreg(struct tnum a) { return tnum_lshift(tnum_rshift(a, 32), 32); } struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg) { return tnum_or(tnum_clear_subreg(reg), tnum_subreg(subreg)); } struct tnum tnum_const_subreg(struct tnum a, u32 value) { return tnum_with_subreg(a, tnum_const(value)); }
1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 // SPDX-License-Identifier: GPL-2.0-only /* * inode.c - securityfs * * Copyright (C) 2005 Greg Kroah-Hartman <gregkh@suse.de> * * Based on fs/debugfs/inode.c which had the following copyright notice: * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> * Copyright (C) 2004 IBM Inc. */ /* #define DEBUG */ #include <linux/sysfs.h> #include <linux/kobject.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/mount.h> #include <linux/pagemap.h> #include <linux/init.h> #include <linux/namei.h> #include <linux/security.h> #include <linux/lsm_hooks.h> #include <linux/magic.h> static struct vfsmount *mount; static int mount_count; static void securityfs_free_inode(struct inode *inode) { if (S_ISLNK(inode->i_mode)) kfree(inode->i_link); free_inode_nonrcu(inode); } static const struct super_operations securityfs_super_operations = { .statfs = simple_statfs, .free_inode = securityfs_free_inode, }; static int securityfs_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr files[] = {{""}}; int error; error = simple_fill_super(sb, SECURITYFS_MAGIC, files); if (error) return error; sb->s_op = &securityfs_super_operations; return 0; } static int securityfs_get_tree(struct fs_context *fc) { return get_tree_single(fc, securityfs_fill_super); } static const struct fs_context_operations securityfs_context_ops = { .get_tree = securityfs_get_tree, }; static int securityfs_init_fs_context(struct fs_context *fc) { fc->ops = &securityfs_context_ops; return 0; } static struct file_system_type fs_type = { .owner = THIS_MODULE, .name = "securityfs", .init_fs_context = securityfs_init_fs_context, .kill_sb = kill_litter_super, }; /** * securityfs_create_dentry - create a dentry in the securityfs filesystem * * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the securityfs filesystem. * @data: a pointer to something that the caller will want to get to later * on. The inode.i_private pointer will point to this value on * the open() call. * @fops: a pointer to a struct file_operations that should be used for * this file. * @iops: a point to a struct of inode_operations that should be used for * this file/dir * * This is the basic "create a file/dir/symlink" function for * securityfs. It allows for a wide range of flexibility in creating * a file, or a directory (if you want to create a directory, the * securityfs_create_dir() function is recommended to be used * instead). * * This function returns a pointer to a dentry if it succeeds. This * pointer must be passed to the securityfs_remove() function when the * file is to be removed (no automatic cleanup happens if your module * is unloaded, you are responsible here). If an error occurs, the * function will return the error value (via ERR_PTR). * * If securityfs is not enabled in the kernel, the value %-ENODEV is * returned. */ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops, const struct inode_operations *iops) { struct dentry *dentry; struct inode *dir, *inode; int error; if (!(mode & S_IFMT)) mode = (mode & S_IALLUGO) | S_IFREG; pr_debug("securityfs: creating file '%s'\n",name); error = simple_pin_fs(&fs_type, &mount, &mount_count); if (error) return ERR_PTR(error); if (!parent) parent = mount->mnt_root; dir = d_inode(parent); inode_lock(dir); dentry = lookup_one_len(name, parent, strlen(name)); if (IS_ERR(dentry)) goto out; if (d_really_is_positive(dentry)) { error = -EEXIST; goto out1; } inode = new_inode(dir->i_sb); if (!inode) { error = -ENOMEM; goto out1; } inode->i_ino = get_next_ino(); inode->i_mode = mode; simple_inode_init_ts(inode); inode->i_private = data; if (S_ISDIR(mode)) { inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; inc_nlink(inode); inc_nlink(dir); } else if (S_ISLNK(mode)) { inode->i_op = iops ? iops : &simple_symlink_inode_operations; inode->i_link = data; } else { inode->i_fop = fops; } d_instantiate(dentry, inode); dget(dentry); inode_unlock(dir); return dentry; out1: dput(dentry); dentry = ERR_PTR(error); out: inode_unlock(dir); simple_release_fs(&mount, &mount_count); return dentry; } /** * securityfs_create_file - create a file in the securityfs filesystem * * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the securityfs filesystem. * @data: a pointer to something that the caller will want to get to later * on. The inode.i_private pointer will point to this value on * the open() call. * @fops: a pointer to a struct file_operations that should be used for * this file. * * This function creates a file in securityfs with the given @name. * * This function returns a pointer to a dentry if it succeeds. This * pointer must be passed to the securityfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, * you are responsible here). If an error occurs, the function will return * the error value (via ERR_PTR). * * If securityfs is not enabled in the kernel, the value %-ENODEV is * returned. */ struct dentry *securityfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { return securityfs_create_dentry(name, mode, parent, data, fops, NULL); } EXPORT_SYMBOL_GPL(securityfs_create_file); /** * securityfs_create_dir - create a directory in the securityfs filesystem * * @name: a pointer to a string containing the name of the directory to * create. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * directory will be created in the root of the securityfs filesystem. * * This function creates a directory in securityfs with the given @name. * * This function returns a pointer to a dentry if it succeeds. This * pointer must be passed to the securityfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, * you are responsible here). If an error occurs, the function will return * the error value (via ERR_PTR). * * If securityfs is not enabled in the kernel, the value %-ENODEV is * returned. */ struct dentry *securityfs_create_dir(const char *name, struct dentry *parent) { return securityfs_create_file(name, S_IFDIR | 0755, parent, NULL, NULL); } EXPORT_SYMBOL_GPL(securityfs_create_dir); /** * securityfs_create_symlink - create a symlink in the securityfs filesystem * * @name: a pointer to a string containing the name of the symlink to * create. * @parent: a pointer to the parent dentry for the symlink. This should be a * directory dentry if set. If this parameter is %NULL, then the * directory will be created in the root of the securityfs filesystem. * @target: a pointer to a string containing the name of the symlink's target. * If this parameter is %NULL, then the @iops parameter needs to be * setup to handle .readlink and .get_link inode_operations. * @iops: a pointer to the struct inode_operations to use for the symlink. If * this parameter is %NULL, then the default simple_symlink_inode * operations will be used. * * This function creates a symlink in securityfs with the given @name. * * This function returns a pointer to a dentry if it succeeds. This * pointer must be passed to the securityfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, * you are responsible here). If an error occurs, the function will return * the error value (via ERR_PTR). * * If securityfs is not enabled in the kernel, the value %-ENODEV is * returned. */ struct dentry *securityfs_create_symlink(const char *name, struct dentry *parent, const char *target, const struct inode_operations *iops) { struct dentry *dent; char *link = NULL; if (target) { link = kstrdup(target, GFP_KERNEL); if (!link) return ERR_PTR(-ENOMEM); } dent = securityfs_create_dentry(name, S_IFLNK | 0444, parent, link, NULL, iops); if (IS_ERR(dent)) kfree(link); return dent; } EXPORT_SYMBOL_GPL(securityfs_create_symlink); /** * securityfs_remove - removes a file or directory from the securityfs filesystem * * @dentry: a pointer to a the dentry of the file or directory to be removed. * * This function removes a file or directory in securityfs that was previously * created with a call to another securityfs function (like * securityfs_create_file() or variants thereof.) * * This function is required to be called in order for the file to be * removed. No automatic cleanup of files will happen when a module is * removed; you are responsible here. */ void securityfs_remove(struct dentry *dentry) { struct inode *dir; if (!dentry || IS_ERR(dentry)) return; dir = d_inode(dentry->d_parent); inode_lock(dir); if (simple_positive(dentry)) { if (d_is_dir(dentry)) simple_rmdir(dir, dentry); else simple_unlink(dir, dentry); dput(dentry); } inode_unlock(dir); simple_release_fs(&mount, &mount_count); } EXPORT_SYMBOL_GPL(securityfs_remove); #ifdef CONFIG_SECURITY static struct dentry *lsm_dentry; static ssize_t lsm_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { return simple_read_from_buffer(buf, count, ppos, lsm_names, strlen(lsm_names)); } static const struct file_operations lsm_ops = { .read = lsm_read, .llseek = generic_file_llseek, }; #endif static int __init securityfs_init(void) { int retval; retval = sysfs_create_mount_point(kernel_kobj, "security"); if (retval) return retval; retval = register_filesystem(&fs_type); if (retval) { sysfs_remove_mount_point(kernel_kobj, "security"); return retval; } #ifdef CONFIG_SECURITY lsm_dentry = securityfs_create_file("lsm", 0444, NULL, NULL, &lsm_ops); #endif return 0; } core_initcall(securityfs_init);
2 30 696 220 4546 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _IPV6_H #define _IPV6_H #include <uapi/linux/ipv6.h> #include <linux/cache.h> #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) #define ipv6_authlen(p) (((p)->hdrlen+2) << 2) /* * This structure contains configuration options per IPv6 link. */ struct ipv6_devconf { /* RX & TX fastpath fields. */ __cacheline_group_begin(ipv6_devconf_read_txrx); __s32 disable_ipv6; __s32 hop_limit; __s32 mtu6; __s32 forwarding; __s32 disable_policy; __s32 proxy_ndp; __cacheline_group_end(ipv6_devconf_read_txrx); __s32 accept_ra; __s32 accept_redirects; __s32 autoconf; __s32 dad_transmits; __s32 rtr_solicits; __s32 rtr_solicit_interval; __s32 rtr_solicit_max_interval; __s32 rtr_solicit_delay; __s32 force_mld_version; __s32 mldv1_unsolicited_report_interval; __s32 mldv2_unsolicited_report_interval; __s32 use_tempaddr; __s32 temp_valid_lft; __s32 temp_prefered_lft; __s32 regen_min_advance; __s32 regen_max_retry; __s32 max_desync_factor; __s32 max_addresses; __s32 accept_ra_defrtr; __u32 ra_defrtr_metric; __s32 accept_ra_min_hop_limit; __s32 accept_ra_min_lft; __s32 accept_ra_pinfo; __s32 ignore_routes_with_linkdown; #ifdef CONFIG_IPV6_ROUTER_PREF __s32 accept_ra_rtr_pref; __s32 rtr_probe_interval; #ifdef CONFIG_IPV6_ROUTE_INFO __s32 accept_ra_rt_info_min_plen; __s32 accept_ra_rt_info_max_plen; #endif #endif __s32 accept_source_route; __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD __s32 optimistic_dad; __s32 use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE atomic_t mc_forwarding; #endif __s32 drop_unicast_in_l2_multicast; __s32 accept_dad; __s32 force_tllao; __s32 ndisc_notify; __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; __s32 drop_unsolicited_na; __s32 accept_untracked_na; struct ipv6_stable_secret { bool initialized; struct in6_addr secret; } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; __s32 seg6_enabled; #ifdef CONFIG_IPV6_SEG6_HMAC __s32 seg6_require_hmac; #endif __u32 enhanced_dad; __u32 addr_gen_mode; __s32 ndisc_tclass; __s32 rpl_seg_enabled; __u32 ioam6_id; __u32 ioam6_id_wide; __u8 ioam6_enabled; __u8 ndisc_evict_nocarrier; __u8 ra_honor_pio_life; struct ctl_table_header *sysctl_header; }; struct ipv6_params { __s32 disable_ipv6; __s32 autoconf; }; extern struct ipv6_params ipv6_defaults; #include <linux/tcp.h> #include <linux/udp.h> #include <net/inet_sock.h> static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb) { return (struct ipv6hdr *)skb_network_header(skb); } static inline struct ipv6hdr *inner_ipv6_hdr(const struct sk_buff *skb) { return (struct ipv6hdr *)skb_inner_network_header(skb); } static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) { return (struct ipv6hdr *)skb_transport_header(skb); } static inline unsigned int ipv6_transport_len(const struct sk_buff *skb) { return ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr) - skb_network_header_len(skb); } /* This structure contains results of exthdrs parsing as offsets from skb->nh. */ struct inet6_skb_parm { int iif; __be16 ra; __u16 dst0; __u16 srcrt; __u16 dst1; __u16 lastopt; __u16 nhoff; __u16 flags; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dsthao; #endif __u16 frag_max_size; __u16 srhoff; #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 #define IP6SKB_REROUTED 4 #define IP6SKB_ROUTERALERT 8 #define IP6SKB_FRAGMENTED 16 #define IP6SKB_HOPBYHOP 32 #define IP6SKB_L3SLAVE 64 #define IP6SKB_JUMBOGRAM 128 #define IP6SKB_SEG6 256 #define IP6SKB_FAKEJUMBO 512 #define IP6SKB_MULTIPATH 1024 }; #if defined(CONFIG_NET_L3_MASTER_DEV) static inline bool ipv6_l3mdev_skb(__u16 flags) { return flags & IP6SKB_L3SLAVE; } #else static inline bool ipv6_l3mdev_skb(__u16 flags) { return false; } #endif #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) #define IP6CBMTU(skb) ((struct ip6_mtuinfo *)((skb)->cb)) static inline int inet6_iif(const struct sk_buff *skb) { bool l3_slave = ipv6_l3mdev_skb(IP6CB(skb)->flags); return l3_slave ? skb->skb_iif : IP6CB(skb)->iif; } static inline bool inet6_is_jumbogram(const struct sk_buff *skb) { return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM); } /* can not be used in TCP layer after tcp_v6_fill_cb */ static inline int inet6_sdif(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) return IP6CB(skb)->iif; #endif return 0; } struct tcp6_request_sock { struct tcp_request_sock tcp6rsk_tcp; }; struct ipv6_mc_socklist; struct ipv6_ac_socklist; struct ipv6_fl_socklist; struct inet6_cork { struct ipv6_txoptions *opt; u8 hop_limit; u8 tclass; }; /* struct ipv6_pinfo - ipv6 private area */ struct ipv6_pinfo { struct in6_addr saddr; struct in6_pktinfo sticky_pktinfo; const struct in6_addr *daddr_cache; #ifdef CONFIG_IPV6_SUBTREES const struct in6_addr *saddr_cache; #endif __be32 flow_label; __u32 frag_size; s16 hop_limit; u8 mcast_hops; int ucast_oif; int mcast_oif; /* pktoption flags */ union { struct { __u16 srcrt:1, osrcrt:1, rxinfo:1, rxoinfo:1, rxhlim:1, rxohlim:1, hopopts:1, ohopopts:1, dstopts:1, odstopts:1, rxflow:1, rxtclass:1, rxpmtu:1, rxorigdstaddr:1, recvfragsize:1; /* 1 bits hole */ } bits; __u16 all; } rxopt; /* sockopt flags */ __u8 srcprefs; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ __u8 pmtudisc; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; __u32 dst_cookie; struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist __rcu *ipv6_fl_list; struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct inet6_cork cork; }; /* We currently use available bits from inet_sk(sk)->inet_flags, * this could change in the future. */ #define inet6_test_bit(nr, sk) \ test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) #define inet6_set_bit(nr, sk) \ set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) #define inet6_clear_bit(nr, sk) \ clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) #define inet6_assign_bit(nr, sk, val) \ assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ struct raw6_sock { /* inet_sock has to be the first member of raw6_sock */ struct inet_sock inet; __u32 checksum; /* perform checksum */ __u32 offset; /* checksum offset */ struct icmp6_filter filter; __u32 ip6mr_table; struct ipv6_pinfo inet6; }; struct udp6_sock { struct udp_sock udp; struct ipv6_pinfo inet6; }; struct tcp6_sock { struct tcp_sock tcp; struct ipv6_pinfo inet6; }; extern int inet6_sk_rebuild_header(struct sock *sk); struct tcp6_timewait_sock { struct tcp_timewait_sock tcp6tw_tcp; }; #if IS_ENABLED(CONFIG_IPV6) bool ipv6_mod_enabled(void); static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) { return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; } #define raw6_sk(ptr) container_of_const(ptr, struct raw6_sock, inet.sk) #define ipv6_only_sock(sk) (sk->sk_ipv6only) #define ipv6_sk_rxinfo(sk) ((sk)->sk_family == PF_INET6 && \ inet6_sk(sk)->rxopt.bits.rxinfo) static inline const struct in6_addr *inet6_rcv_saddr(const struct sock *sk) { if (sk->sk_family == AF_INET6) return &sk->sk_v6_rcv_saddr; return NULL; } static inline int inet_v6_ipv6only(const struct sock *sk) { /* ipv6only field is at same position for timewait and other sockets */ return ipv6_only_sock(sk); } #else #define ipv6_only_sock(sk) 0 #define ipv6_sk_rxinfo(sk) 0 static inline bool ipv6_mod_enabled(void) { return false; } static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) { return NULL; } static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return NULL; } #define inet6_rcv_saddr(__sk) NULL #define inet_v6_ipv6only(__sk) 0 #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* _IPV6_H */
131 131 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_SYNPROXY_SHARED_H #define _NF_SYNPROXY_SHARED_H #include <linux/module.h> #include <linux/skbuff.h> #include <net/ip6_checksum.h> #include <net/ip6_route.h> #include <net/tcp.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_synproxy.h> struct synproxy_stats { unsigned int syn_received; unsigned int cookie_invalid; unsigned int cookie_valid; unsigned int cookie_retrans; unsigned int conn_reopened; }; struct synproxy_net { struct nf_conn *tmpl; struct synproxy_stats __percpu *stats; unsigned int hook_ref4; unsigned int hook_ref6; }; extern unsigned int synproxy_net_id; static inline struct synproxy_net *synproxy_pernet(struct net *net) { return net_generic(net, synproxy_net_id); } struct synproxy_options { u8 options; u8 wscale; u16 mss_option; u16 mss_encode; u32 tsval; u32 tsecr; }; struct nf_synproxy_info; bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, const struct tcphdr *th, struct synproxy_options *opts); void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info, struct synproxy_options *opts); void synproxy_send_client_synack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts); bool synproxy_recv_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq); struct nf_hook_state; unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs); int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net); void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net); #if IS_ENABLED(CONFIG_IPV6) void synproxy_send_client_synack_ipv6(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts); bool synproxy_recv_client_ack_ipv6(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq); unsigned int ipv6_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs); int nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net); void nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net); #else static inline int nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net) { return 0; } static inline void nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net) {}; #endif /* CONFIG_IPV6 */ #endif /* _NF_SYNPROXY_SHARED_H */
8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 // SPDX-License-Identifier: GPL-2.0-only /* ipv6header match - matches IPv6 packets based on whether they contain certain headers */ /* Original idea: Brad Chapman * Rewritten by: Andras Kis-Szabo <kisza@sch.bme.hu> */ /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> #include <linux/types.h> #include <net/checksum.h> #include <net/ipv6.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_ipv6header.h> MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: IPv6 header types match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); static bool ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par) { const struct ip6t_ipv6header_info *info = par->matchinfo; unsigned int temp; int len; u8 nexthdr; unsigned int ptr; /* Make sure this isn't an evil packet */ /* type of the 1st exthdr */ nexthdr = ipv6_hdr(skb)->nexthdr; /* pointer to the 1st exthdr */ ptr = sizeof(struct ipv6hdr); /* available length */ len = skb->len - ptr; temp = 0; while (nf_ip6_ext_hdr(nexthdr)) { const struct ipv6_opt_hdr *hp; struct ipv6_opt_hdr _hdr; int hdrlen; /* No more exthdr -> evaluate */ if (nexthdr == NEXTHDR_NONE) { temp |= MASK_NONE; break; } /* Is there enough space for the next ext header? */ if (len < (int)sizeof(struct ipv6_opt_hdr)) return false; /* ESP -> evaluate */ if (nexthdr == NEXTHDR_ESP) { temp |= MASK_ESP; break; } hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); if (!hp) { par->hotdrop = true; return false; } /* Calculate the header length */ if (nexthdr == NEXTHDR_FRAGMENT) hdrlen = 8; else if (nexthdr == NEXTHDR_AUTH) hdrlen = ipv6_authlen(hp); else hdrlen = ipv6_optlen(hp); /* set the flag */ switch (nexthdr) { case NEXTHDR_HOP: temp |= MASK_HOPOPTS; break; case NEXTHDR_ROUTING: temp |= MASK_ROUTING; break; case NEXTHDR_FRAGMENT: temp |= MASK_FRAGMENT; break; case NEXTHDR_AUTH: temp |= MASK_AH; break; case NEXTHDR_DEST: temp |= MASK_DSTOPTS; break; default: return false; } nexthdr = hp->nexthdr; len -= hdrlen; ptr += hdrlen; if (ptr > skb->len) break; } if (nexthdr != NEXTHDR_NONE && nexthdr != NEXTHDR_ESP) temp |= MASK_PROTO; if (info->modeflag) return !((temp ^ info->matchflags ^ info->invflags) & info->matchflags); else { if (info->invflags) return temp != info->matchflags; else return temp == info->matchflags; } } static int ipv6header_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_ipv6header_info *info = par->matchinfo; /* invflags is 0 or 0xff in hard mode */ if ((!info->modeflag) && info->invflags != 0x00 && info->invflags != 0xFF) return -EINVAL; return 0; } static struct xt_match ipv6header_mt6_reg __read_mostly = { .name = "ipv6header", .family = NFPROTO_IPV6, .match = ipv6header_mt6, .matchsize = sizeof(struct ip6t_ipv6header_info), .checkentry = ipv6header_mt6_check, .destroy = NULL, .me = THIS_MODULE, }; static int __init ipv6header_mt6_init(void) { return xt_register_match(&ipv6header_mt6_reg); } static void __exit ipv6header_mt6_exit(void) { xt_unregister_match(&ipv6header_mt6_reg); } module_init(ipv6header_mt6_init); module_exit(ipv6header_mt6_exit);
1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/module.h> #include <linux/of_mdio.h> #include <linux/phy.h> #include <linux/usb.h> #define USB_MARVELL_VID 0x1286 static const struct usb_device_id mvusb_mdio_table[] = { { USB_DEVICE(USB_MARVELL_VID, 0x1fa4) }, {} }; MODULE_DEVICE_TABLE(usb, mvusb_mdio_table); enum { MVUSB_CMD_PREAMBLE0, MVUSB_CMD_PREAMBLE1, MVUSB_CMD_ADDR, MVUSB_CMD_VAL, }; struct mvusb_mdio { struct usb_device *udev; struct mii_bus *mdio; __le16 buf[4]; }; static int mvusb_mdio_read(struct mii_bus *mdio, int dev, int reg) { struct mvusb_mdio *mvusb = mdio->priv; int err, alen; mvusb->buf[MVUSB_CMD_ADDR] = cpu_to_le16(0xa400 | (dev << 5) | reg); err = usb_bulk_msg(mvusb->udev, usb_sndbulkpipe(mvusb->udev, 2), mvusb->buf, 6, &alen, 100); if (err) return err; err = usb_bulk_msg(mvusb->udev, usb_rcvbulkpipe(mvusb->udev, 6), &mvusb->buf[MVUSB_CMD_VAL], 2, &alen, 100); if (err) return err; return le16_to_cpu(mvusb->buf[MVUSB_CMD_VAL]); } static int mvusb_mdio_write(struct mii_bus *mdio, int dev, int reg, u16 val) { struct mvusb_mdio *mvusb = mdio->priv; int alen; mvusb->buf[MVUSB_CMD_ADDR] = cpu_to_le16(0x8000 | (dev << 5) | reg); mvusb->buf[MVUSB_CMD_VAL] = cpu_to_le16(val); return usb_bulk_msg(mvusb->udev, usb_sndbulkpipe(mvusb->udev, 2), mvusb->buf, 8, &alen, 100); } static int mvusb_mdio_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct device *dev = &interface->dev; struct mvusb_mdio *mvusb; struct mii_bus *mdio; int ret; mdio = devm_mdiobus_alloc_size(dev, sizeof(*mvusb)); if (!mdio) return -ENOMEM; mvusb = mdio->priv; mvusb->mdio = mdio; mvusb->udev = usb_get_dev(interface_to_usbdev(interface)); /* Reversed from USB PCAPs, no idea what these mean. */ mvusb->buf[MVUSB_CMD_PREAMBLE0] = cpu_to_le16(0xe800); mvusb->buf[MVUSB_CMD_PREAMBLE1] = cpu_to_le16(0x0001); snprintf(mdio->id, MII_BUS_ID_SIZE, "mvusb-%s", dev_name(dev)); mdio->name = mdio->id; mdio->parent = dev; mdio->read = mvusb_mdio_read; mdio->write = mvusb_mdio_write; usb_set_intfdata(interface, mvusb); ret = of_mdiobus_register(mdio, dev->of_node); if (ret) goto put_dev; return 0; put_dev: usb_put_dev(mvusb->udev); return ret; } static void mvusb_mdio_disconnect(struct usb_interface *interface) { struct mvusb_mdio *mvusb = usb_get_intfdata(interface); struct usb_device *udev = mvusb->udev; mdiobus_unregister(mvusb->mdio); usb_set_intfdata(interface, NULL); usb_put_dev(udev); } static struct usb_driver mvusb_mdio_driver = { .name = "mvusb_mdio", .id_table = mvusb_mdio_table, .probe = mvusb_mdio_probe, .disconnect = mvusb_mdio_disconnect, }; module_usb_driver(mvusb_mdio_driver); MODULE_AUTHOR("Tobias Waldekranz <tobias@waldekranz.com>"); MODULE_DESCRIPTION("Marvell USB MDIO Adapter"); MODULE_LICENSE("GPL");
4 252 252 252 249 3 3 3 247 252 269 235 11 46 248 248 6 244 251 252 252 4 248 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 // SPDX-License-Identifier: GPL-2.0-only /* * Pluggable TCP upper layer protocol support. * * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. * */ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> #include <linux/list.h> #include <linux/gfp.h> #include <net/tcp.h> static DEFINE_SPINLOCK(tcp_ulp_list_lock); static LIST_HEAD(tcp_ulp_list); /* Simple linear search, don't expect many entries! */ static struct tcp_ulp_ops *tcp_ulp_find(const char *name) { struct tcp_ulp_ops *e; list_for_each_entry_rcu(e, &tcp_ulp_list, list, lockdep_is_held(&tcp_ulp_list_lock)) { if (strcmp(e->name, name) == 0) return e; } return NULL; } static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) { const struct tcp_ulp_ops *ulp = NULL; rcu_read_lock(); ulp = tcp_ulp_find(name); #ifdef CONFIG_MODULES if (!ulp && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); request_module("tcp-ulp-%s", name); rcu_read_lock(); ulp = tcp_ulp_find(name); } #endif if (!ulp || !try_module_get(ulp->owner)) ulp = NULL; rcu_read_unlock(); return ulp; } /* Attach new upper layer protocol to the list * of available protocols. */ int tcp_register_ulp(struct tcp_ulp_ops *ulp) { int ret = 0; spin_lock(&tcp_ulp_list_lock); if (tcp_ulp_find(ulp->name)) ret = -EEXIST; else list_add_tail_rcu(&ulp->list, &tcp_ulp_list); spin_unlock(&tcp_ulp_list_lock); return ret; } EXPORT_SYMBOL_GPL(tcp_register_ulp); void tcp_unregister_ulp(struct tcp_ulp_ops *ulp) { spin_lock(&tcp_ulp_list_lock); list_del_rcu(&ulp->list); spin_unlock(&tcp_ulp_list_lock); synchronize_rcu(); } EXPORT_SYMBOL_GPL(tcp_unregister_ulp); /* Build string with list of available upper layer protocl values */ void tcp_get_available_ulp(char *buf, size_t maxlen) { struct tcp_ulp_ops *ulp_ops; size_t offs = 0; *buf = '\0'; rcu_read_lock(); list_for_each_entry_rcu(ulp_ops, &tcp_ulp_list, list) { offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ulp_ops->name); if (WARN_ON_ONCE(offs >= maxlen)) break; } rcu_read_unlock(); } void tcp_update_ulp(struct sock *sk, struct proto *proto, void (*write_space)(struct sock *sk)) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ulp_ops->update) icsk->icsk_ulp_ops->update(sk, proto, write_space); } void tcp_cleanup_ulp(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); /* No sock_owned_by_me() check here as at the time the * stack calls this function, the socket is dead and * about to be destroyed. */ if (!icsk->icsk_ulp_ops) return; if (icsk->icsk_ulp_ops->release) icsk->icsk_ulp_ops->release(sk); module_put(icsk->icsk_ulp_ops->owner); icsk->icsk_ulp_ops = NULL; } static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) { struct inet_connection_sock *icsk = inet_csk(sk); int err; err = -EEXIST; if (icsk->icsk_ulp_ops) goto out_err; if (sk->sk_socket) clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); err = -ENOTCONN; if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN) goto out_err; err = ulp_ops->init(sk); if (err) goto out_err; icsk->icsk_ulp_ops = ulp_ops; return 0; out_err: module_put(ulp_ops->owner); return err; } int tcp_set_ulp(struct sock *sk, const char *name) { const struct tcp_ulp_ops *ulp_ops; sock_owned_by_me(sk); ulp_ops = __tcp_ulp_find_autoload(name); if (!ulp_ops) return -ENOENT; return __tcp_set_ulp(sk, ulp_ops); }
198 400 399 400 400 3 375 279 396 9 305 26 151 11 62 176 331 279 2 29 9 40 19 396 251 330 394 304 26 151 11 62 176 9 331 279 2 29 9 40 20 395 395 394 395 395 395 395 395 394 395 395 395 249 318 318 406 354 220 395 297 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_trace.h" #include "xfs_trans_priv.h" #include "xfs_buf_item.h" #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_error.h" #include "xfs_rtbitmap.h" #include <linux/iversion.h> struct kmem_cache *xfs_ili_cache; /* inode log item */ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_inode_log_item, ili_item); } static uint64_t xfs_inode_item_sort( struct xfs_log_item *lip) { return INODE_ITEM(lip)->ili_inode->i_ino; } #ifdef DEBUG_EXPENSIVE static void xfs_inode_item_precommit_check( struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; struct xfs_dinode *dip; xfs_failaddr_t fa; dip = kzalloc(mp->m_sb.sb_inodesize, GFP_KERNEL | GFP_NOFS); if (!dip) { ASSERT(dip != NULL); return; } xfs_inode_to_disk(ip, dip, 0); xfs_dinode_calc_crc(mp, dip); fa = xfs_dinode_verify(mp, ip->i_ino, dip); if (fa) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, sizeof(*dip), fa); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); ASSERT(fa == NULL); } kfree(dip); } #else # define xfs_inode_item_precommit_check(ip) ((void)0) #endif /* * Prior to finally logging the inode, we have to ensure that all the * per-modification inode state changes are applied. This includes VFS inode * state updates, format conversions, verifier state synchronisation and * ensuring the inode buffer remains in memory whilst the inode is dirty. * * We have to be careful when we grab the inode cluster buffer due to lock * ordering constraints. The unlinked inode modifications (xfs_iunlink_item) * require AGI -> inode cluster buffer lock order. The inode cluster buffer is * not locked until ->precommit, so it happens after everything else has been * modified. * * Further, we have AGI -> AGF lock ordering, and with O_TMPFILE handling we * have AGI -> AGF -> iunlink item -> inode cluster buffer lock order. Hence we * cannot safely lock the inode cluster buffer in xfs_trans_log_inode() because * it can be called on a inode (e.g. via bumplink/droplink) before we take the * AGF lock modifying directory blocks. * * Rather than force a complete rework of all the transactions to call * xfs_trans_log_inode() once and once only at the end of every transaction, we * move the pinning of the inode cluster buffer to a ->precommit operation. This * matches how the xfs_iunlink_item locks the inode cluster buffer, and it * ensures that the inode cluster buffer locking is always done last in a * transaction. i.e. we ensure the lock order is always AGI -> AGF -> inode * cluster buffer. * * If we return the inode number as the precommit sort key then we'll also * guarantee that the order all inode cluster buffer locking is the same all the * inodes and unlink items in the transaction. */ static int xfs_inode_item_precommit( struct xfs_trans *tp, struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct inode *inode = VFS_I(ip); unsigned int flags = iip->ili_dirty_flags; /* * Don't bother with i_lock for the I_DIRTY_TIME check here, as races * don't matter - we either will need an extra transaction in 24 hours * to log the timestamps, or will clear already cleared fields in the * worst case. */ if (inode->i_state & I_DIRTY_TIME) { spin_lock(&inode->i_lock); inode->i_state &= ~I_DIRTY_TIME; spin_unlock(&inode->i_lock); } /* * If we're updating the inode core or the timestamps and it's possible * to upgrade this inode to bigtime format, do so now. */ if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) && xfs_has_bigtime(ip->i_mount) && !xfs_inode_has_bigtime(ip)) { ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME; flags |= XFS_ILOG_CORE; } /* * Inode verifiers do not check that the extent size hint is an integer * multiple of the rt extent size on a directory with both rtinherit * and extszinherit flags set. If we're logging a directory that is * misconfigured in this way, clear the hint. */ if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) { ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT); ip->i_extsize = 0; flags |= XFS_ILOG_CORE; } /* * Record the specific change for fdatasync optimisation. This allows * fdatasync to skip log forces for inodes that are only timestamp * dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it * to XFS_ILOG_CORE so that the actual on-disk dirty tracking * (ili_fields) correctly tracks that the version has changed. */ spin_lock(&iip->ili_lock); iip->ili_fsync_fields |= (flags & ~XFS_ILOG_IVERSION); if (flags & XFS_ILOG_IVERSION) flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE); if (!iip->ili_item.li_buf) { struct xfs_buf *bp; int error; /* * We hold the ILOCK here, so this inode is not going to be * flushed while we are here. Further, because there is no * buffer attached to the item, we know that there is no IO in * progress, so nothing will clear the ili_fields while we read * in the buffer. Hence we can safely drop the spin lock and * read the buffer knowing that the state will not change from * here. */ spin_unlock(&iip->ili_lock); error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp); if (error) return error; /* * We need an explicit buffer reference for the log item but * don't want the buffer to remain attached to the transaction. * Hold the buffer but release the transaction reference once * we've attached the inode log item to the buffer log item * list. */ xfs_buf_hold(bp); spin_lock(&iip->ili_lock); iip->ili_item.li_buf = bp; bp->b_flags |= _XBF_INODES; list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list); xfs_trans_brelse(tp, bp); } /* * Always OR in the bits from the ili_last_fields field. This is to * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines * in the eventual clearing of the ili_fields bits. See the big comment * in xfs_iflush() for an explanation of this coordination mechanism. */ iip->ili_fields |= (flags | iip->ili_last_fields); spin_unlock(&iip->ili_lock); xfs_inode_item_precommit_check(ip); /* * We are done with the log item transaction dirty state, so clear it so * that it doesn't pollute future transactions. */ iip->ili_dirty_flags = 0; return 0; } /* * The logged size of an inode fork is always the current size of the inode * fork. This means that when an inode fork is relogged, the size of the logged * region is determined by the current state, not the combination of the * previously logged state + the current state. This is different relogging * behaviour to most other log items which will retain the size of the * previously logged changes when smaller regions are relogged. * * Hence operations that remove data from the inode fork (e.g. shortform * dir/attr remove, extent form extent removal, etc), the size of the relogged * inode gets -smaller- rather than stays the same size as the previously logged * size and this can result in the committing transaction reducing the amount of * space being consumed by the CIL. */ STATIC void xfs_inode_item_data_fork_size( struct xfs_inode_log_item *iip, int *nvecs, int *nbytes) { struct xfs_inode *ip = iip->ili_inode; switch (ip->i_df.if_format) { case XFS_DINODE_FMT_EXTENTS: if ((iip->ili_fields & XFS_ILOG_DEXT) && ip->i_df.if_nextents > 0 && ip->i_df.if_bytes > 0) { /* worst case, doesn't subtract delalloc extents */ *nbytes += xfs_inode_data_fork_size(ip); *nvecs += 1; } break; case XFS_DINODE_FMT_BTREE: if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) { *nbytes += ip->i_df.if_broot_bytes; *nvecs += 1; } break; case XFS_DINODE_FMT_LOCAL: if ((iip->ili_fields & XFS_ILOG_DDATA) && ip->i_df.if_bytes > 0) { *nbytes += xlog_calc_iovec_len(ip->i_df.if_bytes); *nvecs += 1; } break; case XFS_DINODE_FMT_DEV: break; default: ASSERT(0); break; } } STATIC void xfs_inode_item_attr_fork_size( struct xfs_inode_log_item *iip, int *nvecs, int *nbytes) { struct xfs_inode *ip = iip->ili_inode; switch (ip->i_af.if_format) { case XFS_DINODE_FMT_EXTENTS: if ((iip->ili_fields & XFS_ILOG_AEXT) && ip->i_af.if_nextents > 0 && ip->i_af.if_bytes > 0) { /* worst case, doesn't subtract unused space */ *nbytes += xfs_inode_attr_fork_size(ip); *nvecs += 1; } break; case XFS_DINODE_FMT_BTREE: if ((iip->ili_fields & XFS_ILOG_ABROOT) && ip->i_af.if_broot_bytes > 0) { *nbytes += ip->i_af.if_broot_bytes; *nvecs += 1; } break; case XFS_DINODE_FMT_LOCAL: if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_af.if_bytes > 0) { *nbytes += xlog_calc_iovec_len(ip->i_af.if_bytes); *nvecs += 1; } break; default: ASSERT(0); break; } } /* * This returns the number of iovecs needed to log the given inode item. * * We need one iovec for the inode log format structure, one for the * inode core, and possibly one for the inode data/extents/b-tree root * and one for the inode attribute data/extents/b-tree root. */ STATIC void xfs_inode_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; *nvecs += 2; *nbytes += sizeof(struct xfs_inode_log_format) + xfs_log_dinode_size(ip->i_mount); xfs_inode_item_data_fork_size(iip, nvecs, nbytes); if (xfs_inode_has_attr_fork(ip)) xfs_inode_item_attr_fork_size(iip, nvecs, nbytes); } STATIC void xfs_inode_item_format_data_fork( struct xfs_inode_log_item *iip, struct xfs_inode_log_format *ilf, struct xfs_log_vec *lv, struct xfs_log_iovec **vecp) { struct xfs_inode *ip = iip->ili_inode; size_t data_bytes; switch (ip->i_df.if_format) { case XFS_DINODE_FMT_EXTENTS: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV); if ((iip->ili_fields & XFS_ILOG_DEXT) && ip->i_df.if_nextents > 0 && ip->i_df.if_bytes > 0) { struct xfs_bmbt_rec *p; ASSERT(xfs_iext_count(&ip->i_df) > 0); p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK); xlog_finish_iovec(lv, *vecp, data_bytes); ASSERT(data_bytes <= ip->i_df.if_bytes); ilf->ilf_dsize = data_bytes; ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_DEXT; } break; case XFS_DINODE_FMT_BTREE: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV); if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) { ASSERT(ip->i_df.if_broot != NULL); xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT, ip->i_df.if_broot, ip->i_df.if_broot_bytes); ilf->ilf_dsize = ip->i_df.if_broot_bytes; ilf->ilf_size++; } else { ASSERT(!(iip->ili_fields & XFS_ILOG_DBROOT)); iip->ili_fields &= ~XFS_ILOG_DBROOT; } break; case XFS_DINODE_FMT_LOCAL: iip->ili_fields &= ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV); if ((iip->ili_fields & XFS_ILOG_DDATA) && ip->i_df.if_bytes > 0) { ASSERT(ip->i_df.if_data != NULL); ASSERT(ip->i_disk_size > 0); xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, ip->i_df.if_data, ip->i_df.if_bytes); ilf->ilf_dsize = (unsigned)ip->i_df.if_bytes; ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_DDATA; } break; case XFS_DINODE_FMT_DEV: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT); if (iip->ili_fields & XFS_ILOG_DEV) ilf->ilf_u.ilfu_rdev = sysv_encode_dev(VFS_I(ip)->i_rdev); break; default: ASSERT(0); break; } } STATIC void xfs_inode_item_format_attr_fork( struct xfs_inode_log_item *iip, struct xfs_inode_log_format *ilf, struct xfs_log_vec *lv, struct xfs_log_iovec **vecp) { struct xfs_inode *ip = iip->ili_inode; size_t data_bytes; switch (ip->i_af.if_format) { case XFS_DINODE_FMT_EXTENTS: iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); if ((iip->ili_fields & XFS_ILOG_AEXT) && ip->i_af.if_nextents > 0 && ip->i_af.if_bytes > 0) { struct xfs_bmbt_rec *p; ASSERT(xfs_iext_count(&ip->i_af) == ip->i_af.if_nextents); p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); xlog_finish_iovec(lv, *vecp, data_bytes); ilf->ilf_asize = data_bytes; ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_AEXT; } break; case XFS_DINODE_FMT_BTREE: iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); if ((iip->ili_fields & XFS_ILOG_ABROOT) && ip->i_af.if_broot_bytes > 0) { ASSERT(ip->i_af.if_broot != NULL); xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT, ip->i_af.if_broot, ip->i_af.if_broot_bytes); ilf->ilf_asize = ip->i_af.if_broot_bytes; ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_ABROOT; } break; case XFS_DINODE_FMT_LOCAL: iip->ili_fields &= ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_af.if_bytes > 0) { ASSERT(ip->i_af.if_data != NULL); xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, ip->i_af.if_data, ip->i_af.if_bytes); ilf->ilf_asize = (unsigned)ip->i_af.if_bytes; ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_ADATA; } break; default: ASSERT(0); break; } } /* * Convert an incore timestamp to a log timestamp. Note that the log format * specifies host endian format! */ static inline xfs_log_timestamp_t xfs_inode_to_log_dinode_ts( struct xfs_inode *ip, const struct timespec64 tv) { struct xfs_log_legacy_timestamp *lits; xfs_log_timestamp_t its; if (xfs_inode_has_bigtime(ip)) return xfs_inode_encode_bigtime(tv); lits = (struct xfs_log_legacy_timestamp *)&its; lits->t_sec = tv.tv_sec; lits->t_nsec = tv.tv_nsec; return its; } /* * The legacy DMAPI fields are only present in the on-disk and in-log inodes, * but not in the in-memory one. But we are guaranteed to have an inode buffer * in memory when logging an inode, so we can just copy it from the on-disk * inode to the in-log inode here so that recovery of file system with these * fields set to non-zero values doesn't lose them. For all other cases we zero * the fields. */ static void xfs_copy_dm_fields_to_log_dinode( struct xfs_inode *ip, struct xfs_log_dinode *to) { struct xfs_dinode *dip; dip = xfs_buf_offset(ip->i_itemp->ili_item.li_buf, ip->i_imap.im_boffset); if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS)) { to->di_dmevmask = be32_to_cpu(dip->di_dmevmask); to->di_dmstate = be16_to_cpu(dip->di_dmstate); } else { to->di_dmevmask = 0; to->di_dmstate = 0; } } static inline void xfs_inode_to_log_dinode_iext_counters( struct xfs_inode *ip, struct xfs_log_dinode *to) { if (xfs_inode_has_large_extent_counts(ip)) { to->di_big_nextents = xfs_ifork_nextents(&ip->i_df); to->di_big_anextents = xfs_ifork_nextents(&ip->i_af); to->di_nrext64_pad = 0; } else { to->di_nextents = xfs_ifork_nextents(&ip->i_df); to->di_anextents = xfs_ifork_nextents(&ip->i_af); } } static void xfs_inode_to_log_dinode( struct xfs_inode *ip, struct xfs_log_dinode *to, xfs_lsn_t lsn) { struct inode *inode = VFS_I(ip); to->di_magic = XFS_DINODE_MAGIC; to->di_format = xfs_ifork_format(&ip->i_df); to->di_uid = i_uid_read(inode); to->di_gid = i_gid_read(inode); to->di_projid_lo = ip->i_projid & 0xffff; to->di_projid_hi = ip->i_projid >> 16; memset(to->di_pad3, 0, sizeof(to->di_pad3)); to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode_get_atime(inode)); to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode_get_mtime(inode)); to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode_get_ctime(inode)); to->di_nlink = inode->i_nlink; to->di_gen = inode->i_generation; to->di_mode = inode->i_mode; to->di_size = ip->i_disk_size; to->di_nblocks = ip->i_nblocks; to->di_extsize = ip->i_extsize; to->di_forkoff = ip->i_forkoff; to->di_aformat = xfs_ifork_format(&ip->i_af); to->di_flags = ip->i_diflags; xfs_copy_dm_fields_to_log_dinode(ip, to); /* log a dummy value to ensure log structure is fully initialised */ to->di_next_unlinked = NULLAGINO; if (xfs_has_v3inodes(ip->i_mount)) { to->di_version = 3; to->di_changecount = inode_peek_iversion(inode); to->di_crtime = xfs_inode_to_log_dinode_ts(ip, ip->i_crtime); to->di_flags2 = ip->i_diflags2; to->di_cowextsize = ip->i_cowextsize; to->di_ino = ip->i_ino; to->di_lsn = lsn; memset(to->di_pad2, 0, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); to->di_v3_pad = 0; /* dummy value for initialisation */ to->di_crc = 0; } else { to->di_version = 2; to->di_flushiter = ip->i_flushiter; memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); } xfs_inode_to_log_dinode_iext_counters(ip, to); } /* * Format the inode core. Current timestamp data is only in the VFS inode * fields, so we need to grab them from there. Hence rather than just copying * the XFS inode core structure, format the fields directly into the iovec. */ static void xfs_inode_item_format_core( struct xfs_inode *ip, struct xfs_log_vec *lv, struct xfs_log_iovec **vecp) { struct xfs_log_dinode *dic; dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE); xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn); xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_mount)); } /* * This is called to fill in the vector of log iovecs for the given inode * log item. It fills the first item with an inode log format structure, * the second with the on-disk inode structure, and a possible third and/or * fourth with the inode data/extents/b-tree root and inode attributes * data/extents/b-tree root. * * Note: Always use the 64 bit inode log format structure so we don't * leave an uninitialised hole in the format item on 64 bit systems. Log * recovery on 32 bit systems handles this just fine, so there's no reason * for not using an initialising the properly padded structure all the time. */ STATIC void xfs_inode_item_format( struct xfs_log_item *lip, struct xfs_log_vec *lv) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_log_iovec *vecp = NULL; struct xfs_inode_log_format *ilf; ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); ilf->ilf_type = XFS_LI_INODE; ilf->ilf_ino = ip->i_ino; ilf->ilf_blkno = ip->i_imap.im_blkno; ilf->ilf_len = ip->i_imap.im_len; ilf->ilf_boffset = ip->i_imap.im_boffset; ilf->ilf_fields = XFS_ILOG_CORE; ilf->ilf_size = 2; /* format + core */ /* * make sure we don't leak uninitialised data into the log in the case * when we don't log every field in the inode. */ ilf->ilf_dsize = 0; ilf->ilf_asize = 0; ilf->ilf_pad = 0; memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u)); xlog_finish_iovec(lv, vecp, sizeof(*ilf)); xfs_inode_item_format_core(ip, lv, &vecp); xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); if (xfs_inode_has_attr_fork(ip)) { xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp); } else { iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); } /* update the format with the exact fields we actually logged */ ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); } /* * This is called to pin the inode associated with the inode log * item in memory so it cannot be written out. */ STATIC void xfs_inode_item_pin( struct xfs_log_item *lip) { struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode; xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ASSERT(lip->li_buf); trace_xfs_inode_pin(ip, _RET_IP_); atomic_inc(&ip->i_pincount); } /* * This is called to unpin the inode associated with the inode log * item which was previously pinned with a call to xfs_inode_item_pin(). * * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0. * * Note that unpin can race with inode cluster buffer freeing marking the buffer * stale. In that case, flush completions are run from the buffer unpin call, * which may happen before the inode is unpinned. If we lose the race, there * will be no buffer attached to the log item, but the inode will be marked * XFS_ISTALE. */ STATIC void xfs_inode_item_unpin( struct xfs_log_item *lip, int remove) { struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode; trace_xfs_inode_unpin(ip, _RET_IP_); ASSERT(lip->li_buf || xfs_iflags_test(ip, XFS_ISTALE)); ASSERT(atomic_read(&ip->i_pincount) > 0); if (atomic_dec_and_test(&ip->i_pincount)) wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT); } STATIC uint xfs_inode_item_push( struct xfs_log_item *lip, struct list_head *buffer_list) __releases(&lip->li_ailp->ail_lock) __acquires(&lip->li_ailp->ail_lock) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_buf *bp = lip->li_buf; uint rval = XFS_ITEM_SUCCESS; int error; if (!bp || (ip->i_flags & XFS_ISTALE)) { /* * Inode item/buffer is being aborted due to cluster * buffer deletion. Trigger a log force to have that operation * completed and items removed from the AIL before the next push * attempt. */ return XFS_ITEM_PINNED; } if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) return XFS_ITEM_PINNED; if (xfs_iflags_test(ip, XFS_IFLUSHING)) return XFS_ITEM_FLUSHING; if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; spin_unlock(&lip->li_ailp->ail_lock); /* * We need to hold a reference for flushing the cluster buffer as it may * fail the buffer without IO submission. In which case, we better get a * reference for that completion because otherwise we don't get a * reference for IO until we queue the buffer for delwri submission. */ xfs_buf_hold(bp); error = xfs_iflush_cluster(bp); if (!error) { if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_relse(bp); } else { /* * Release the buffer if we were unable to flush anything. On * any other error, the buffer has already been released. */ if (error == -EAGAIN) xfs_buf_relse(bp); rval = XFS_ITEM_LOCKED; } spin_lock(&lip->li_ailp->ail_lock); return rval; } /* * Unlock the inode associated with the inode log item. */ STATIC void xfs_inode_item_release( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; unsigned short lock_flags; ASSERT(ip->i_itemp != NULL); xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); lock_flags = iip->ili_lock_flags; iip->ili_lock_flags = 0; if (lock_flags) xfs_iunlock(ip, lock_flags); } /* * This is called to find out where the oldest active copy of the inode log * item in the on disk log resides now that the last log write of it completed * at the given lsn. Since we always re-log all dirty data in an inode, the * latest copy in the on disk log is the only one that matters. Therefore, * simply return the given lsn. * * If the inode has been marked stale because the cluster is being freed, we * don't want to (re-)insert this inode into the AIL. There is a race condition * where the cluster buffer may be unpinned before the inode is inserted into * the AIL during transaction committed processing. If the buffer is unpinned * before the inode item has been committed and inserted, then it is possible * for the buffer to be written and IO completes before the inode is inserted * into the AIL. In that case, we'd be inserting a clean, stale inode into the * AIL which will never get removed. It will, however, get reclaimed which * triggers an assert in xfs_inode_free() complaining about freein an inode * still in the AIL. * * To avoid this, just unpin the inode directly and return a LSN of -1 so the * transaction committed code knows that it does not need to do any further * processing on the item. */ STATIC xfs_lsn_t xfs_inode_item_committed( struct xfs_log_item *lip, xfs_lsn_t lsn) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; if (xfs_iflags_test(ip, XFS_ISTALE)) { xfs_inode_item_unpin(lip, 0); return -1; } return lsn; } STATIC void xfs_inode_item_committing( struct xfs_log_item *lip, xfs_csn_t seq) { INODE_ITEM(lip)->ili_commit_seq = seq; return xfs_inode_item_release(lip); } static const struct xfs_item_ops xfs_inode_item_ops = { .iop_sort = xfs_inode_item_sort, .iop_precommit = xfs_inode_item_precommit, .iop_size = xfs_inode_item_size, .iop_format = xfs_inode_item_format, .iop_pin = xfs_inode_item_pin, .iop_unpin = xfs_inode_item_unpin, .iop_release = xfs_inode_item_release, .iop_committed = xfs_inode_item_committed, .iop_push = xfs_inode_item_push, .iop_committing = xfs_inode_item_committing, }; /* * Initialize the inode log item for a newly allocated (in-core) inode. */ void xfs_inode_item_init( struct xfs_inode *ip, struct xfs_mount *mp) { struct xfs_inode_log_item *iip; ASSERT(ip->i_itemp == NULL); iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_cache, GFP_KERNEL | __GFP_NOFAIL); iip->ili_inode = ip; spin_lock_init(&iip->ili_lock); xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE, &xfs_inode_item_ops); } /* * Free the inode log item and any memory hanging off of it. */ void xfs_inode_item_destroy( struct xfs_inode *ip) { struct xfs_inode_log_item *iip = ip->i_itemp; ASSERT(iip->ili_item.li_buf == NULL); ip->i_itemp = NULL; kvfree(iip->ili_item.li_lv_shadow); kmem_cache_free(xfs_ili_cache, iip); } /* * We only want to pull the item from the AIL if it is actually there * and its location in the log has not changed since we started the * flush. Thus, we only bother if the inode's lsn has not changed. */ static void xfs_iflush_ail_updates( struct xfs_ail *ailp, struct list_head *list) { struct xfs_log_item *lip; xfs_lsn_t tail_lsn = 0; /* this is an opencoded batch version of xfs_trans_ail_delete */ spin_lock(&ailp->ail_lock); list_for_each_entry(lip, list, li_bio_list) { xfs_lsn_t lsn; clear_bit(XFS_LI_FAILED, &lip->li_flags); if (INODE_ITEM(lip)->ili_flush_lsn != lip->li_lsn) continue; /* * dgc: Not sure how this happens, but it happens very * occassionaly via generic/388. xfs_iflush_abort() also * silently handles this same "under writeback but not in AIL at * shutdown" condition via xfs_trans_ail_delete(). */ if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { ASSERT(xlog_is_shutdown(lip->li_log)); continue; } lsn = xfs_ail_delete_one(ailp, lip); if (!tail_lsn && lsn) tail_lsn = lsn; } xfs_ail_update_finish(ailp, tail_lsn); } /* * Walk the list of inodes that have completed their IOs. If they are clean * remove them from the list and dissociate them from the buffer. Buffers that * are still dirty remain linked to the buffer and on the list. Caller must * handle them appropriately. */ static void xfs_iflush_finish( struct xfs_buf *bp, struct list_head *list) { struct xfs_log_item *lip, *n; list_for_each_entry_safe(lip, n, list, li_bio_list) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); bool drop_buffer = false; spin_lock(&iip->ili_lock); /* * Remove the reference to the cluster buffer if the inode is * clean in memory and drop the buffer reference once we've * dropped the locks we hold. */ ASSERT(iip->ili_item.li_buf == bp); if (!iip->ili_fields) { iip->ili_item.li_buf = NULL; list_del_init(&lip->li_bio_list); drop_buffer = true; } iip->ili_last_fields = 0; iip->ili_flush_lsn = 0; clear_bit(XFS_LI_FLUSHING, &lip->li_flags); spin_unlock(&iip->ili_lock); xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING); if (drop_buffer) xfs_buf_rele(bp); } } /* * Inode buffer IO completion routine. It is responsible for removing inodes * attached to the buffer from the AIL if they have not been re-logged and * completing the inode flush. */ void xfs_buf_inode_iodone( struct xfs_buf *bp) { struct xfs_log_item *lip, *n; LIST_HEAD(flushed_inodes); LIST_HEAD(ail_updates); /* * Pull the attached inodes from the buffer one at a time and take the * appropriate action on them. */ list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); if (xfs_iflags_test(iip->ili_inode, XFS_ISTALE)) { xfs_iflush_abort(iip->ili_inode); continue; } if (!iip->ili_last_fields) continue; /* Do an unlocked check for needing the AIL lock. */ if (iip->ili_flush_lsn == lip->li_lsn || test_bit(XFS_LI_FAILED, &lip->li_flags)) list_move_tail(&lip->li_bio_list, &ail_updates); else list_move_tail(&lip->li_bio_list, &flushed_inodes); } if (!list_empty(&ail_updates)) { xfs_iflush_ail_updates(bp->b_mount->m_ail, &ail_updates); list_splice_tail(&ail_updates, &flushed_inodes); } xfs_iflush_finish(bp, &flushed_inodes); if (!list_empty(&flushed_inodes)) list_splice_tail(&flushed_inodes, &bp->b_li_list); } void xfs_buf_inode_io_fail( struct xfs_buf *bp) { struct xfs_log_item *lip; list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { set_bit(XFS_LI_FAILED, &lip->li_flags); clear_bit(XFS_LI_FLUSHING, &lip->li_flags); } } /* * Clear the inode logging fields so no more flushes are attempted. If we are * on a buffer list, it is now safe to remove it because the buffer is * guaranteed to be locked. The caller will drop the reference to the buffer * the log item held. */ static void xfs_iflush_abort_clean( struct xfs_inode_log_item *iip) { iip->ili_last_fields = 0; iip->ili_fields = 0; iip->ili_fsync_fields = 0; iip->ili_flush_lsn = 0; iip->ili_item.li_buf = NULL; list_del_init(&iip->ili_item.li_bio_list); clear_bit(XFS_LI_FLUSHING, &iip->ili_item.li_flags); } /* * Abort flushing the inode from a context holding the cluster buffer locked. * * This is the normal runtime method of aborting writeback of an inode that is * attached to a cluster buffer. It occurs when the inode and the backing * cluster buffer have been freed (i.e. inode is XFS_ISTALE), or when cluster * flushing or buffer IO completion encounters a log shutdown situation. * * If we need to abort inode writeback and we don't already hold the buffer * locked, call xfs_iflush_shutdown_abort() instead as this should only ever be * necessary in a shutdown situation. */ void xfs_iflush_abort( struct xfs_inode *ip) { struct xfs_inode_log_item *iip = ip->i_itemp; struct xfs_buf *bp; if (!iip) { /* clean inode, nothing to do */ xfs_iflags_clear(ip, XFS_IFLUSHING); return; } /* * Remove the inode item from the AIL before we clear its internal * state. Whilst the inode is in the AIL, it should have a valid buffer * pointer for push operations to access - it is only safe to remove the * inode from the buffer once it has been removed from the AIL. * * We also clear the failed bit before removing the item from the AIL * as xfs_trans_ail_delete()->xfs_clear_li_failed() will release buffer * references the inode item owns and needs to hold until we've fully * aborted the inode log item and detached it from the buffer. */ clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags); xfs_trans_ail_delete(&iip->ili_item, 0); /* * Grab the inode buffer so can we release the reference the inode log * item holds on it. */ spin_lock(&iip->ili_lock); bp = iip->ili_item.li_buf; xfs_iflush_abort_clean(iip); spin_unlock(&iip->ili_lock); xfs_iflags_clear(ip, XFS_IFLUSHING); if (bp) xfs_buf_rele(bp); } /* * Abort an inode flush in the case of a shutdown filesystem. This can be called * from anywhere with just an inode reference and does not require holding the * inode cluster buffer locked. If the inode is attached to a cluster buffer, * it will grab and lock it safely, then abort the inode flush. */ void xfs_iflush_shutdown_abort( struct xfs_inode *ip) { struct xfs_inode_log_item *iip = ip->i_itemp; struct xfs_buf *bp; if (!iip) { /* clean inode, nothing to do */ xfs_iflags_clear(ip, XFS_IFLUSHING); return; } spin_lock(&iip->ili_lock); bp = iip->ili_item.li_buf; if (!bp) { spin_unlock(&iip->ili_lock); xfs_iflush_abort(ip); return; } /* * We have to take a reference to the buffer so that it doesn't get * freed when we drop the ili_lock and then wait to lock the buffer. * We'll clean up the extra reference after we pick up the ili_lock * again. */ xfs_buf_hold(bp); spin_unlock(&iip->ili_lock); xfs_buf_lock(bp); spin_lock(&iip->ili_lock); if (!iip->ili_item.li_buf) { /* * Raced with another removal, hold the only reference * to bp now. Inode should not be in the AIL now, so just clean * up and return; */ ASSERT(list_empty(&iip->ili_item.li_bio_list)); ASSERT(!test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)); xfs_iflush_abort_clean(iip); spin_unlock(&iip->ili_lock); xfs_iflags_clear(ip, XFS_IFLUSHING); xfs_buf_relse(bp); return; } /* * Got two references to bp. The first will get dropped by * xfs_iflush_abort() when the item is removed from the buffer list, but * we can't drop our reference until _abort() returns because we have to * unlock the buffer as well. Hence we abort and then unlock and release * our reference to the buffer. */ ASSERT(iip->ili_item.li_buf == bp); spin_unlock(&iip->ili_lock); xfs_iflush_abort(ip); xfs_buf_relse(bp); } /* * convert an xfs_inode_log_format struct from the old 32 bit version * (which can have different field alignments) to the native 64 bit version */ int xfs_inode_item_format_convert( struct xfs_log_iovec *buf, struct xfs_inode_log_format *in_f) { struct xfs_inode_log_format_32 *in_f32 = buf->i_addr; if (buf->i_len != sizeof(*in_f32)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); return -EFSCORRUPTED; } in_f->ilf_type = in_f32->ilf_type; in_f->ilf_size = in_f32->ilf_size; in_f->ilf_fields = in_f32->ilf_fields; in_f->ilf_asize = in_f32->ilf_asize; in_f->ilf_dsize = in_f32->ilf_dsize; in_f->ilf_ino = in_f32->ilf_ino; memcpy(&in_f->ilf_u, &in_f32->ilf_u, sizeof(in_f->ilf_u)); in_f->ilf_blkno = in_f32->ilf_blkno; in_f->ilf_len = in_f32->ilf_len; in_f->ilf_boffset = in_f32->ilf_boffset; return 0; }
12 230 105 257 256 265 10 14 14 405 367 257 380 297 294 206 204 68 272 66 31 5 22 276 202 123 74 257 32 10 1 15 2 1 59 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 /* SPDX-License-Identifier: GPL-2.0-only */ /* * * Copyright (C) 2011 Novell Inc. */ #include <linux/kernel.h> #include <linux/uuid.h> #include <linux/fs.h> #include <linux/fsverity.h> #include <linux/namei.h> #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> #include "ovl_entry.h" #undef pr_fmt #define pr_fmt(fmt) "overlayfs: " fmt enum ovl_path_type { __OVL_PATH_UPPER = (1 << 0), __OVL_PATH_MERGE = (1 << 1), __OVL_PATH_ORIGIN = (1 << 2), }; #define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER) #define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE) #define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN) #define OVL_XATTR_NAMESPACE "overlay." #define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE #define OVL_XATTR_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) #define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE #define OVL_XATTR_USER_PREFIX_LEN (sizeof(OVL_XATTR_USER_PREFIX) - 1) #define OVL_XATTR_ESCAPE_PREFIX OVL_XATTR_NAMESPACE #define OVL_XATTR_ESCAPE_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_PREFIX) - 1) #define OVL_XATTR_ESCAPE_TRUSTED_PREFIX OVL_XATTR_TRUSTED_PREFIX OVL_XATTR_ESCAPE_PREFIX #define OVL_XATTR_ESCAPE_TRUSTED_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_TRUSTED_PREFIX) - 1) #define OVL_XATTR_ESCAPE_USER_PREFIX OVL_XATTR_USER_PREFIX OVL_XATTR_ESCAPE_PREFIX #define OVL_XATTR_ESCAPE_USER_PREFIX_LEN (sizeof(OVL_XATTR_ESCAPE_USER_PREFIX) - 1) enum ovl_xattr { OVL_XATTR_OPAQUE, OVL_XATTR_REDIRECT, OVL_XATTR_ORIGIN, OVL_XATTR_IMPURE, OVL_XATTR_NLINK, OVL_XATTR_UPPER, OVL_XATTR_UUID, OVL_XATTR_METACOPY, OVL_XATTR_PROTATTR, OVL_XATTR_XWHITEOUT, }; enum ovl_inode_flag { /* Pure upper dir that may contain non pure upper entries */ OVL_IMPURE, /* Non-merge dir that may contain whiteout entries */ OVL_WHITEOUTS, OVL_INDEX, OVL_UPPERDATA, /* Inode number will remain constant over copy up. */ OVL_CONST_INO, OVL_HAS_DIGEST, OVL_VERIFIED_DIGEST, }; enum ovl_entry_flag { OVL_E_UPPER_ALIAS, OVL_E_OPAQUE, OVL_E_CONNECTED, /* Lower stack may contain xwhiteout entries */ OVL_E_XWHITEOUTS, }; enum { OVL_REDIRECT_OFF, /* "off" mode is never used. In effect */ OVL_REDIRECT_FOLLOW, /* ...it translates to either "follow" */ OVL_REDIRECT_NOFOLLOW, /* ...or "nofollow". */ OVL_REDIRECT_ON, }; enum { OVL_UUID_OFF, OVL_UUID_NULL, OVL_UUID_AUTO, OVL_UUID_ON, }; enum { OVL_XINO_OFF, OVL_XINO_AUTO, OVL_XINO_ON, }; enum { OVL_VERITY_OFF, OVL_VERITY_ON, OVL_VERITY_REQUIRE, }; /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, * where: * origin.fh - exported file handle of the lower file * origin.uuid - uuid of the lower filesystem */ #define OVL_FH_VERSION 0 #define OVL_FH_MAGIC 0xfb /* CPU byte order required for fid decoding: */ #define OVL_FH_FLAG_BIG_ENDIAN (1 << 0) #define OVL_FH_FLAG_ANY_ENDIAN (1 << 1) /* Is the real inode encoded in fid an upper inode? */ #define OVL_FH_FLAG_PATH_UPPER (1 << 2) #define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN | \ OVL_FH_FLAG_PATH_UPPER) #if defined(__LITTLE_ENDIAN) #define OVL_FH_FLAG_CPU_ENDIAN 0 #elif defined(__BIG_ENDIAN) #define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN #else #error Endianness not defined #endif /* The type used to be returned by overlay exportfs for misaligned fid */ #define OVL_FILEID_V0 0xfb /* The type returned by overlay exportfs for 32bit aligned fid */ #define OVL_FILEID_V1 0xf8 /* On-disk format for "origin" file handle */ struct ovl_fb { u8 version; /* 0 */ u8 magic; /* 0xfb */ u8 len; /* size of this header + size of fid */ u8 flags; /* OVL_FH_FLAG_* */ u8 type; /* fid_type of fid */ uuid_t uuid; /* uuid of filesystem */ u32 fid[]; /* file identifier should be 32bit aligned in-memory */ } __packed; /* In-memory and on-wire format for overlay file handle */ struct ovl_fh { u8 padding[3]; /* make sure fb.fid is 32bit aligned */ union { struct ovl_fb fb; DECLARE_FLEX_ARRAY(u8, buf); }; } __packed; #define OVL_FH_WIRE_OFFSET offsetof(struct ovl_fh, fb) #define OVL_FH_LEN(fh) (OVL_FH_WIRE_OFFSET + (fh)->fb.len) #define OVL_FH_FID_OFFSET (OVL_FH_WIRE_OFFSET + \ offsetof(struct ovl_fb, fid)) /* On-disk format for "metacopy" xattr (if non-zero size) */ struct ovl_metacopy { u8 version; /* 0 */ u8 len; /* size of this header + used digest bytes */ u8 flags; u8 digest_algo; /* FS_VERITY_HASH_ALG_* constant, 0 for no digest */ u8 digest[FS_VERITY_MAX_DIGEST_SIZE]; /* Only the used part on disk */ } __packed; #define OVL_METACOPY_MAX_SIZE (sizeof(struct ovl_metacopy)) #define OVL_METACOPY_MIN_SIZE (OVL_METACOPY_MAX_SIZE - FS_VERITY_MAX_DIGEST_SIZE) #define OVL_METACOPY_INIT { 0, OVL_METACOPY_MIN_SIZE } static inline int ovl_metadata_digest_size(const struct ovl_metacopy *metacopy) { if (metacopy->len < OVL_METACOPY_MIN_SIZE) return 0; return (int)metacopy->len - OVL_METACOPY_MIN_SIZE; } /* No atime modification on underlying */ #define OVL_OPEN_FLAGS (O_NOATIME) extern const char *const ovl_xattr_table[][2]; static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox) { return ovl_xattr_table[ox][ofs->config.userxattr]; } /* * When changing ownership of an upper object map the intended ownership * according to the upper layer's idmapping. When an upper mount idmaps files * that are stored on-disk as owned by id 1001 to id 1000 this means stat on * this object will report it as being owned by id 1000 when calling stat via * the upper mount. * In order to change ownership of an object so stat reports id 1000 when * called on an idmapped upper mount the value written to disk - i.e., the * value stored in ia_*id - must 1001. The mount mapping helper will thus take * care to map 1000 to 1001. * The mnt idmapping helpers are nops if the upper layer isn't idmapped. */ static inline int ovl_do_notify_change(struct ovl_fs *ofs, struct dentry *upperdentry, struct iattr *attr) { return notify_change(ovl_upper_mnt_idmap(ofs), upperdentry, attr, NULL); } static inline int ovl_do_rmdir(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { int err = vfs_rmdir(ovl_upper_mnt_idmap(ofs), dir, dentry); pr_debug("rmdir(%pd2) = %i\n", dentry, err); return err; } static inline int ovl_do_unlink(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { int err = vfs_unlink(ovl_upper_mnt_idmap(ofs), dir, dentry, NULL); pr_debug("unlink(%pd2) = %i\n", dentry, err); return err; } static inline int ovl_do_link(struct ovl_fs *ofs, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { int err = vfs_link(old_dentry, ovl_upper_mnt_idmap(ofs), dir, new_dentry, NULL); pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err); return err; } static inline int ovl_do_create(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode) { int err = vfs_create(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, true); pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); return err; } static inline int ovl_do_mkdir(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode) { int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); return err; } static inline int ovl_do_mknod(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev); pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err); return err; } static inline int ovl_do_symlink(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, const char *oldname) { int err = vfs_symlink(ovl_upper_mnt_idmap(ofs), dir, dentry, oldname); pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); return err; } static inline ssize_t ovl_do_getxattr(const struct path *path, const char *name, void *value, size_t size) { int err, len; WARN_ON(path->dentry->d_sb != path->mnt->mnt_sb); err = vfs_getxattr(mnt_idmap(path->mnt), path->dentry, name, value, size); len = (value && err > 0) ? err : 0; pr_debug("getxattr(%pd2, \"%s\", \"%*pE\", %zu, 0) = %i\n", path->dentry, name, min(len, 48), value, size, err); return err; } static inline ssize_t ovl_getxattr_upper(struct ovl_fs *ofs, struct dentry *upperdentry, enum ovl_xattr ox, void *value, size_t size) { struct path upperpath = { .dentry = upperdentry, .mnt = ovl_upper_mnt(ofs), }; return ovl_do_getxattr(&upperpath, ovl_xattr(ofs, ox), value, size); } static inline ssize_t ovl_path_getxattr(struct ovl_fs *ofs, const struct path *path, enum ovl_xattr ox, void *value, size_t size) { return ovl_do_getxattr(path, ovl_xattr(ofs, ox), value, size); } static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { int err = vfs_setxattr(ovl_upper_mnt_idmap(ofs), dentry, name, value, size, flags); pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n", dentry, name, min((int)size, 48), value, size, flags, err); return err; } static inline int ovl_setxattr(struct ovl_fs *ofs, struct dentry *dentry, enum ovl_xattr ox, const void *value, size_t size) { return ovl_do_setxattr(ofs, dentry, ovl_xattr(ofs, ox), value, size, 0); } static inline int ovl_do_removexattr(struct ovl_fs *ofs, struct dentry *dentry, const char *name) { int err = vfs_removexattr(ovl_upper_mnt_idmap(ofs), dentry, name); pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err); return err; } static inline int ovl_removexattr(struct ovl_fs *ofs, struct dentry *dentry, enum ovl_xattr ox) { return ovl_do_removexattr(ofs, dentry, ovl_xattr(ofs, ox)); } static inline int ovl_do_set_acl(struct ovl_fs *ofs, struct dentry *dentry, const char *acl_name, struct posix_acl *acl) { return vfs_set_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name, acl); } static inline int ovl_do_remove_acl(struct ovl_fs *ofs, struct dentry *dentry, const char *acl_name) { return vfs_remove_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name); } static inline int ovl_do_rename(struct ovl_fs *ofs, struct inode *olddir, struct dentry *olddentry, struct inode *newdir, struct dentry *newdentry, unsigned int flags) { int err; struct renamedata rd = { .old_mnt_idmap = ovl_upper_mnt_idmap(ofs), .old_dir = olddir, .old_dentry = olddentry, .new_mnt_idmap = ovl_upper_mnt_idmap(ofs), .new_dir = newdir, .new_dentry = newdentry, .flags = flags, }; pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags); err = vfs_rename(&rd); if (err) { pr_debug("...rename(%pd2, %pd2, ...) = %i\n", olddentry, newdentry, err); } return err; } static inline int ovl_do_whiteout(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { int err = vfs_whiteout(ovl_upper_mnt_idmap(ofs), dir, dentry); pr_debug("whiteout(%pd2) = %i\n", dentry, err); return err; } static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs, struct dentry *dentry, umode_t mode) { struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry }; struct file *file = kernel_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path, mode, O_LARGEFILE | O_WRONLY, current_cred()); int err = PTR_ERR_OR_ZERO(file); pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err); return file; } static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs, const char *name, struct dentry *base, int len) { return lookup_one(ovl_upper_mnt_idmap(ofs), name, base, len); } static inline bool ovl_open_flags_need_copy_up(int flags) { if (!flags) return false; return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)); } static inline int ovl_do_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { if (flags & AT_GETATTR_NOSEC) return vfs_getattr_nosec(path, stat, request_mask, flags); return vfs_getattr(path, stat, request_mask, flags); } /* util.c */ int ovl_get_write_access(struct dentry *dentry); void ovl_put_write_access(struct dentry *dentry); void ovl_start_write(struct dentry *dentry); void ovl_end_write(struct dentry *dentry); int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); static inline const struct cred *ovl_creds(struct super_block *sb) { return OVL_FS(sb)->creator_cred; } int ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); bool ovl_index_all(struct super_block *sb); bool ovl_verify_lower(struct super_block *sb); struct ovl_path *ovl_stack_alloc(unsigned int n); void ovl_stack_cpy(struct ovl_path *dst, struct ovl_path *src, unsigned int n); void ovl_stack_put(struct ovl_path *stack, unsigned int n); void ovl_stack_free(struct ovl_path *stack, unsigned int n); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); void ovl_free_entry(struct ovl_entry *oe); bool ovl_dentry_remote(struct dentry *dentry); void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry); void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry, struct ovl_entry *oe); void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry, struct ovl_entry *oe, unsigned int mask); bool ovl_dentry_weird(struct dentry *dentry); enum ovl_path_type ovl_path_type(struct dentry *dentry); void ovl_path_upper(struct dentry *dentry, struct path *path); void ovl_path_lower(struct dentry *dentry, struct path *path); void ovl_path_lowerdata(struct dentry *dentry, struct path *path); struct inode *ovl_i_path_real(struct inode *inode, struct path *path); enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); struct dentry *ovl_dentry_lowerdata(struct dentry *dentry); int ovl_dentry_set_lowerdata(struct dentry *dentry, struct ovl_path *datapath); const struct ovl_layer *ovl_i_layer_lower(struct inode *inode); const struct ovl_layer *ovl_layer_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); struct dentry *ovl_i_dentry_upper(struct inode *inode); struct inode *ovl_inode_upper(struct inode *inode); struct inode *ovl_inode_lower(struct inode *inode); struct inode *ovl_inode_lowerdata(struct inode *inode); struct inode *ovl_inode_real(struct inode *inode); struct inode *ovl_inode_realdata(struct inode *inode); const char *ovl_lowerdata_redirect(struct inode *inode); struct ovl_dir_cache *ovl_dir_cache(struct inode *inode); void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache); void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry); void ovl_dentry_clear_flag(unsigned long flag, struct dentry *dentry); bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry); bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); bool ovl_dentry_has_xwhiteouts(struct dentry *dentry); void ovl_dentry_set_xwhiteouts(struct dentry *dentry); void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs, const struct ovl_layer *layer); bool ovl_dentry_has_upper_alias(struct dentry *dentry); void ovl_dentry_set_upper_alias(struct dentry *dentry); bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags); bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags); bool ovl_has_upperdata(struct inode *inode); void ovl_set_upperdata(struct inode *inode); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); void ovl_dir_modified(struct dentry *dentry, bool impurity); u64 ovl_inode_version_get(struct inode *inode); bool ovl_is_whiteout(struct dentry *dentry); bool ovl_path_is_whiteout(struct ovl_fs *ofs, const struct path *path); struct file *ovl_path_open(const struct path *path, int flags); int ovl_copy_up_start(struct dentry *dentry, int flags); void ovl_copy_up_end(struct dentry *dentry); bool ovl_already_copied_up(struct dentry *dentry, int flags); char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, enum ovl_xattr ox); bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path); bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path); bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, const struct path *upperpath); static inline bool ovl_upper_is_whiteout(struct ovl_fs *ofs, struct dentry *upperdentry) { struct path upperpath = { .dentry = upperdentry, .mnt = ovl_upper_mnt(ofs), }; return ovl_path_is_whiteout(ofs, &upperpath); } static inline bool ovl_check_origin_xattr(struct ovl_fs *ofs, struct dentry *upperdentry) { struct path upperpath = { .dentry = upperdentry, .mnt = ovl_upper_mnt(ofs), }; return ovl_path_check_origin_xattr(ofs, &upperpath); } int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, enum ovl_xattr ox, const void *value, size_t size, int xerr); int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); bool ovl_inuse_trylock(struct dentry *dentry); void ovl_inuse_unlock(struct dentry *dentry); bool ovl_is_inuse(struct dentry *dentry); bool ovl_need_index(struct dentry *dentry); int ovl_nlink_start(struct dentry *dentry); void ovl_nlink_end(struct dentry *dentry); int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path, struct ovl_metacopy *data); int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, struct ovl_metacopy *metacopy); bool ovl_is_metacopy_dentry(struct dentry *dentry); char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding); int ovl_ensure_verity_loaded(struct path *path); int ovl_get_verity_xattr(struct ovl_fs *ofs, const struct path *path, u8 *digest_buf, int *buf_length); int ovl_validate_verity(struct ovl_fs *ofs, struct path *metapath, struct path *datapath); int ovl_get_verity_digest(struct ovl_fs *ofs, struct path *src, struct ovl_metacopy *metacopy); int ovl_sync_status(struct ovl_fs *ofs); static inline void ovl_set_flag(unsigned long flag, struct inode *inode) { set_bit(flag, &OVL_I(inode)->flags); } static inline void ovl_clear_flag(unsigned long flag, struct inode *inode) { clear_bit(flag, &OVL_I(inode)->flags); } static inline bool ovl_test_flag(unsigned long flag, struct inode *inode) { return test_bit(flag, &OVL_I(inode)->flags); } static inline bool ovl_is_impuredir(struct super_block *sb, struct dentry *upperdentry) { struct ovl_fs *ofs = OVL_FS(sb); struct path upperpath = { .dentry = upperdentry, .mnt = ovl_upper_mnt(ofs), }; return ovl_get_dir_xattr_val(ofs, &upperpath, OVL_XATTR_IMPURE) == 'y'; } static inline char ovl_get_opaquedir_val(struct ovl_fs *ofs, const struct path *path) { return ovl_get_dir_xattr_val(ofs, path, OVL_XATTR_OPAQUE); } static inline bool ovl_redirect_follow(struct ovl_fs *ofs) { return ofs->config.redirect_mode != OVL_REDIRECT_NOFOLLOW; } static inline bool ovl_redirect_dir(struct ovl_fs *ofs) { return ofs->config.redirect_mode == OVL_REDIRECT_ON; } static inline bool ovl_origin_uuid(struct ovl_fs *ofs) { return ofs->config.uuid != OVL_UUID_OFF; } static inline bool ovl_has_fsid(struct ovl_fs *ofs) { return ofs->config.uuid == OVL_UUID_ON || ofs->config.uuid == OVL_UUID_AUTO; } /* * With xino=auto, we do best effort to keep all inodes on same st_dev and * d_ino consistent with st_ino. * With xino=on, we do the same effort but we warn if we failed. */ static inline bool ovl_xino_warn(struct ovl_fs *ofs) { return ofs->config.xino == OVL_XINO_ON; } /* * To avoid regressions in existing setups with overlay lower offline changes, * we allow lower changes only if none of the new features are used. */ static inline bool ovl_allow_offline_changes(struct ovl_fs *ofs) { return (!ofs->config.index && !ofs->config.metacopy && !ovl_redirect_dir(ofs) && !ovl_xino_warn(ofs)); } /* All layers on same fs? */ static inline bool ovl_same_fs(struct ovl_fs *ofs) { return ofs->xino_mode == 0; } /* All overlay inodes have same st_dev? */ static inline bool ovl_same_dev(struct ovl_fs *ofs) { return ofs->xino_mode >= 0; } static inline unsigned int ovl_xino_bits(struct ovl_fs *ofs) { return ovl_same_dev(ofs) ? ofs->xino_mode : 0; } static inline void ovl_inode_lock(struct inode *inode) { mutex_lock(&OVL_I(inode)->lock); } static inline int ovl_inode_lock_interruptible(struct inode *inode) { return mutex_lock_interruptible(&OVL_I(inode)->lock); } static inline void ovl_inode_unlock(struct inode *inode) { mutex_unlock(&OVL_I(inode)->lock); } /* namei.c */ int ovl_check_fb_len(struct ovl_fb *fb, int fb_len); static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len) { if (fh_len < sizeof(struct ovl_fh)) return -EINVAL; return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET); } struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, struct vfsmount *mnt, bool connected); int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, struct dentry *upperdentry, struct ovl_path **stackp); int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, enum ovl_xattr ox, const struct ovl_fh *fh, bool is_upper, bool set); int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, enum ovl_xattr ox, struct dentry *real, bool is_upper, bool set); struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index, bool connected); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name); int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, struct qstr *name); struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool verify); int ovl_path_next(int idx, struct dentry *dentry, struct path *path, const struct ovl_layer **layer); int ovl_verify_lowerdata(struct dentry *dentry); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); static inline int ovl_verify_origin_fh(struct ovl_fs *ofs, struct dentry *upper, const struct ovl_fh *fh, bool set) { return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, fh, false, set); } static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool set) { return ovl_verify_origin_xattr(ofs, upper, OVL_XATTR_ORIGIN, origin, false, set); } static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index, struct dentry *upper, bool set) { return ovl_verify_origin_xattr(ofs, index, OVL_XATTR_UPPER, upper, true, set); } /* readdir.c */ extern const struct file_operations ovl_dir_operations; struct file *ovl_dir_real_file(const struct file *file, bool want_upper); int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper, struct list_head *list); void ovl_cache_free(struct list_head *list); void ovl_dir_cache_free(struct inode *inode); int ovl_check_d_type_supported(const struct path *realpath); int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir, struct vfsmount *mnt, struct dentry *dentry, int level); int ovl_indexdir_cleanup(struct ovl_fs *ofs); /* * Can we iterate real dir directly? * * Non-merge dir may contain whiteouts from a time it was a merge upper, before * lower dir was removed under it and possibly before it was rotated from upper * to lower layer. */ static inline bool ovl_dir_is_real(struct inode *dir) { return !ovl_test_flag(OVL_WHITEOUTS, dir); } /* inode.c */ int ovl_set_nlink_upper(struct dentry *dentry); int ovl_set_nlink_lower(struct dentry *dentry); unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry, struct dentry *upperdentry, unsigned int fallback); int ovl_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap, struct inode *inode, int type, bool rcu, bool noperm); static inline struct posix_acl *ovl_get_inode_acl(struct inode *inode, int type, bool rcu) { return do_ovl_get_acl(&nop_mnt_idmap, inode, type, rcu, true); } static inline struct posix_acl *ovl_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, int type) { return do_ovl_get_acl(idmap, d_inode(dentry), type, false, false); } int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); struct posix_acl *ovl_get_acl_path(const struct path *path, const char *acl_name, bool noperm); #else #define ovl_get_inode_acl NULL #define ovl_get_acl NULL #define ovl_set_acl NULL static inline struct posix_acl *ovl_get_acl_path(const struct path *path, const char *acl_name, bool noperm) { return NULL; } #endif int ovl_update_time(struct inode *inode, int flags); bool ovl_is_private_xattr(struct super_block *sb, const char *name); struct ovl_inode_params { struct inode *newinode; struct dentry *upperdentry; struct ovl_entry *oe; bool index; char *redirect; char *lowerdata_redirect; }; void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, unsigned long ino, int fsid); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, bool is_upper); bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir); struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir); struct inode *ovl_get_inode(struct super_block *sb, struct ovl_inode_params *oip); void ovl_copyattr(struct inode *to); /* vfs inode flags copied from real to ovl inode */ #define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE) /* vfs inode flags read from overlay.protattr xattr to ovl inode */ #define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE) /* * fileattr flags copied from lower to upper inode on copy up. * We cannot copy up immutable/append-only flags, because that would prevent * linking temp inode to upper dir, so we store them in xattr instead. */ #define OVL_COPY_FS_FLAGS_MASK (FS_SYNC_FL | FS_NOATIME_FL) #define OVL_COPY_FSX_FLAGS_MASK (FS_XFLAG_SYNC | FS_XFLAG_NOATIME) #define OVL_PROT_FS_FLAGS_MASK (FS_APPEND_FL | FS_IMMUTABLE_FL) #define OVL_PROT_FSX_FLAGS_MASK (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE) void ovl_check_protattr(struct inode *inode, struct dentry *upper); int ovl_set_protattr(struct inode *inode, struct dentry *upper, struct fileattr *fa); static inline void ovl_copyflags(struct inode *from, struct inode *to) { unsigned int mask = OVL_COPY_I_FLAGS_MASK; inode_set_flags(to, from->i_flags & mask, mask); } /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry); struct ovl_cattr { dev_t rdev; umode_t mode; const char *link; struct dentry *hardlink; }; #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, struct dentry **newdentry, umode_t mode); struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr); int ovl_cleanup(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry); struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir); struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, struct ovl_cattr *attr); /* file.c */ extern const struct file_operations ovl_file_operations; int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa); int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa); int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa); int ovl_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_with_data(struct dentry *dentry); int ovl_maybe_copy_up(struct dentry *dentry, int flags); int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new); int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat); struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, bool is_upper); struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin); int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh, struct dentry *upper); /* export.c */ extern const struct export_operations ovl_export_operations; extern const struct export_operations ovl_export_fid_operations; /* super.c */ int ovl_fill_super(struct super_block *sb, struct fs_context *fc); /* Will this overlay be forced to mount/remount ro? */ static inline bool ovl_force_readonly(struct ovl_fs *ofs) { return (!ovl_upper_mnt(ofs) || !ofs->workdir); } /* xattr.c */ const struct xattr_handler * const *ovl_xattr_handlers(struct ovl_fs *ofs); int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
2564 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM bpf_trace #if !defined(_TRACE_BPF_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_BPF_TRACE_H #include <linux/tracepoint.h> TRACE_EVENT(bpf_trace_printk, TP_PROTO(const char *bpf_string), TP_ARGS(bpf_string), TP_STRUCT__entry( __string(bpf_string, bpf_string) ), TP_fast_assign( __assign_str(bpf_string); ), TP_printk("%s", __get_str(bpf_string)) ); #endif /* _TRACE_BPF_TRACE_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE bpf_trace #include <trace/define_trace.h>
91 95 88 35 94 95 95 45 3 24 24 24 21 24 24 24 14 33 10 3 3 3 3 3 24 24 19 5 24 24 24 24 24 6 24 24 24 24 5 5 4 4 4 105 25 7 38 7 9 7 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 // SPDX-License-Identifier: GPL-2.0 /* * file.c - part of debugfs, a tiny little debug file system * * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> * Copyright (C) 2004 IBM Inc. * * debugfs is for people to use instead of /proc or /sys. * See Documentation/filesystems/ for more details. */ #include <linux/module.h> #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/pagemap.h> #include <linux/debugfs.h> #include <linux/io.h> #include <linux/slab.h> #include <linux/atomic.h> #include <linux/device.h> #include <linux/pm_runtime.h> #include <linux/poll.h> #include <linux/security.h> #include "internal.h" struct poll_table_struct; static ssize_t default_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { return 0; } static ssize_t default_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { return count; } const struct file_operations debugfs_noop_file_operations = { .read = default_read_file, .write = default_write_file, .open = simple_open, .llseek = noop_llseek, }; #define F_DENTRY(filp) ((filp)->f_path.dentry) const struct file_operations *debugfs_real_fops(const struct file *filp) { struct debugfs_fsdata *fsd = F_DENTRY(filp)->d_fsdata; if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) { /* * Urgh, we've been called w/o a protecting * debugfs_file_get(). */ WARN_ON(1); return NULL; } return fsd->real_fops; } EXPORT_SYMBOL_GPL(debugfs_real_fops); /** * debugfs_file_get - mark the beginning of file data access * @dentry: the dentry object whose data is being accessed. * * Up to a matching call to debugfs_file_put(), any successive call * into the file removing functions debugfs_remove() and * debugfs_remove_recursive() will block. Since associated private * file data may only get freed after a successful return of any of * the removal functions, you may safely access it after a successful * call to debugfs_file_get() without worrying about lifetime issues. * * If -%EIO is returned, the file has already been removed and thus, * it is not safe to access any of its data. If, on the other hand, * it is allowed to access the file data, zero is returned. */ int debugfs_file_get(struct dentry *dentry) { struct debugfs_fsdata *fsd; void *d_fsd; /* * This could only happen if some debugfs user erroneously calls * debugfs_file_get() on a dentry that isn't even a file, let * them know about it. */ if (WARN_ON(!d_is_reg(dentry))) return -EINVAL; d_fsd = READ_ONCE(dentry->d_fsdata); if (!((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) { fsd = d_fsd; } else { fsd = kmalloc(sizeof(*fsd), GFP_KERNEL); if (!fsd) return -ENOMEM; fsd->real_fops = (void *)((unsigned long)d_fsd & ~DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); refcount_set(&fsd->active_users, 1); init_completion(&fsd->active_users_drained); INIT_LIST_HEAD(&fsd->cancellations); mutex_init(&fsd->cancellations_mtx); if (cmpxchg(&dentry->d_fsdata, d_fsd, fsd) != d_fsd) { mutex_destroy(&fsd->cancellations_mtx); kfree(fsd); fsd = READ_ONCE(dentry->d_fsdata); } } /* * In case of a successful cmpxchg() above, this check is * strictly necessary and must follow it, see the comment in * __debugfs_remove_file(). * OTOH, if the cmpxchg() hasn't been executed or wasn't * successful, this serves the purpose of not starving * removers. */ if (d_unlinked(dentry)) return -EIO; if (!refcount_inc_not_zero(&fsd->active_users)) return -EIO; return 0; } EXPORT_SYMBOL_GPL(debugfs_file_get); /** * debugfs_file_put - mark the end of file data access * @dentry: the dentry object formerly passed to * debugfs_file_get(). * * Allow any ongoing concurrent call into debugfs_remove() or * debugfs_remove_recursive() blocked by a former call to * debugfs_file_get() to proceed and return to its caller. */ void debugfs_file_put(struct dentry *dentry) { struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata); if (refcount_dec_and_test(&fsd->active_users)) complete(&fsd->active_users_drained); } EXPORT_SYMBOL_GPL(debugfs_file_put); /** * debugfs_enter_cancellation - enter a debugfs cancellation * @file: the file being accessed * @cancellation: the cancellation object, the cancel callback * inside of it must be initialized * * When a debugfs file is removed it needs to wait for all active * operations to complete. However, the operation itself may need * to wait for hardware or completion of some asynchronous process * or similar. As such, it may need to be cancelled to avoid long * waits or even deadlocks. * * This function can be used inside a debugfs handler that may * need to be cancelled. As soon as this function is called, the * cancellation's 'cancel' callback may be called, at which point * the caller should proceed to call debugfs_leave_cancellation() * and leave the debugfs handler function as soon as possible. * Note that the 'cancel' callback is only ever called in the * context of some kind of debugfs_remove(). * * This function must be paired with debugfs_leave_cancellation(). */ void debugfs_enter_cancellation(struct file *file, struct debugfs_cancellation *cancellation) { struct debugfs_fsdata *fsd; struct dentry *dentry = F_DENTRY(file); INIT_LIST_HEAD(&cancellation->list); if (WARN_ON(!d_is_reg(dentry))) return; if (WARN_ON(!cancellation->cancel)) return; fsd = READ_ONCE(dentry->d_fsdata); if (WARN_ON(!fsd || ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) return; mutex_lock(&fsd->cancellations_mtx); list_add(&cancellation->list, &fsd->cancellations); mutex_unlock(&fsd->cancellations_mtx); /* if we're already removing wake it up to cancel */ if (d_unlinked(dentry)) complete(&fsd->active_users_drained); } EXPORT_SYMBOL_GPL(debugfs_enter_cancellation); /** * debugfs_leave_cancellation - leave cancellation section * @file: the file being accessed * @cancellation: the cancellation previously registered with * debugfs_enter_cancellation() * * See the documentation of debugfs_enter_cancellation(). */ void debugfs_leave_cancellation(struct file *file, struct debugfs_cancellation *cancellation) { struct debugfs_fsdata *fsd; struct dentry *dentry = F_DENTRY(file); if (WARN_ON(!d_is_reg(dentry))) return; fsd = READ_ONCE(dentry->d_fsdata); if (WARN_ON(!fsd || ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))) return; mutex_lock(&fsd->cancellations_mtx); if (!list_empty(&cancellation->list)) list_del(&cancellation->list); mutex_unlock(&fsd->cancellations_mtx); } EXPORT_SYMBOL_GPL(debugfs_leave_cancellation); /* * Only permit access to world-readable files when the kernel is locked down. * We also need to exclude any file that has ways to write or alter it as root * can bypass the permissions check. */ static int debugfs_locked_down(struct inode *inode, struct file *filp, const struct file_operations *real_fops) { if ((inode->i_mode & 07777 & ~0444) == 0 && !(filp->f_mode & FMODE_WRITE) && !real_fops->unlocked_ioctl && !real_fops->compat_ioctl && !real_fops->mmap) return 0; if (security_locked_down(LOCKDOWN_DEBUGFS)) return -EPERM; return 0; } static int open_proxy_open(struct inode *inode, struct file *filp) { struct dentry *dentry = F_DENTRY(filp); const struct file_operations *real_fops = NULL; int r; r = debugfs_file_get(dentry); if (r) return r == -EIO ? -ENOENT : r; real_fops = debugfs_real_fops(filp); r = debugfs_locked_down(inode, filp, real_fops); if (r) goto out; if (!fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && real_fops->owner->state == MODULE_STATE_GOING) { r = -ENXIO; goto out; } #endif /* Huh? Module did not clean up after itself at exit? */ WARN(1, "debugfs file owner did not clean up at exit: %pd", dentry); r = -ENXIO; goto out; } replace_fops(filp, real_fops); if (real_fops->open) r = real_fops->open(inode, filp); out: debugfs_file_put(dentry); return r; } const struct file_operations debugfs_open_proxy_file_operations = { .open = open_proxy_open, }; #define PROTO(args...) args #define ARGS(args...) args #define FULL_PROXY_FUNC(name, ret_type, filp, proto, args) \ static ret_type full_proxy_ ## name(proto) \ { \ struct dentry *dentry = F_DENTRY(filp); \ const struct file_operations *real_fops; \ ret_type r; \ \ r = debugfs_file_get(dentry); \ if (unlikely(r)) \ return r; \ real_fops = debugfs_real_fops(filp); \ r = real_fops->name(args); \ debugfs_file_put(dentry); \ return r; \ } FULL_PROXY_FUNC(llseek, loff_t, filp, PROTO(struct file *filp, loff_t offset, int whence), ARGS(filp, offset, whence)); FULL_PROXY_FUNC(read, ssize_t, filp, PROTO(struct file *filp, char __user *buf, size_t size, loff_t *ppos), ARGS(filp, buf, size, ppos)); FULL_PROXY_FUNC(write, ssize_t, filp, PROTO(struct file *filp, const char __user *buf, size_t size, loff_t *ppos), ARGS(filp, buf, size, ppos)); FULL_PROXY_FUNC(unlocked_ioctl, long, filp, PROTO(struct file *filp, unsigned int cmd, unsigned long arg), ARGS(filp, cmd, arg)); static __poll_t full_proxy_poll(struct file *filp, struct poll_table_struct *wait) { struct dentry *dentry = F_DENTRY(filp); __poll_t r = 0; const struct file_operations *real_fops; if (debugfs_file_get(dentry)) return EPOLLHUP; real_fops = debugfs_real_fops(filp); r = real_fops->poll(filp, wait); debugfs_file_put(dentry); return r; } static int full_proxy_release(struct inode *inode, struct file *filp) { const struct dentry *dentry = F_DENTRY(filp); const struct file_operations *real_fops = debugfs_real_fops(filp); const struct file_operations *proxy_fops = filp->f_op; int r = 0; /* * We must not protect this against removal races here: the * original releaser should be called unconditionally in order * not to leak any resources. Releasers must not assume that * ->i_private is still being meaningful here. */ if (real_fops->release) r = real_fops->release(inode, filp); replace_fops(filp, d_inode(dentry)->i_fop); kfree(proxy_fops); fops_put(real_fops); return r; } static void __full_proxy_fops_init(struct file_operations *proxy_fops, const struct file_operations *real_fops) { proxy_fops->release = full_proxy_release; if (real_fops->llseek) proxy_fops->llseek = full_proxy_llseek; if (real_fops->read) proxy_fops->read = full_proxy_read; if (real_fops->write) proxy_fops->write = full_proxy_write; if (real_fops->poll) proxy_fops->poll = full_proxy_poll; if (real_fops->unlocked_ioctl) proxy_fops->unlocked_ioctl = full_proxy_unlocked_ioctl; } static int full_proxy_open(struct inode *inode, struct file *filp) { struct dentry *dentry = F_DENTRY(filp); const struct file_operations *real_fops = NULL; struct file_operations *proxy_fops = NULL; int r; r = debugfs_file_get(dentry); if (r) return r == -EIO ? -ENOENT : r; real_fops = debugfs_real_fops(filp); r = debugfs_locked_down(inode, filp, real_fops); if (r) goto out; if (!fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && real_fops->owner->state == MODULE_STATE_GOING) { r = -ENXIO; goto out; } #endif /* Huh? Module did not cleanup after itself at exit? */ WARN(1, "debugfs file owner did not clean up at exit: %pd", dentry); r = -ENXIO; goto out; } proxy_fops = kzalloc(sizeof(*proxy_fops), GFP_KERNEL); if (!proxy_fops) { r = -ENOMEM; goto free_proxy; } __full_proxy_fops_init(proxy_fops, real_fops); replace_fops(filp, proxy_fops); if (real_fops->open) { r = real_fops->open(inode, filp); if (r) { replace_fops(filp, d_inode(dentry)->i_fop); goto free_proxy; } else if (filp->f_op != proxy_fops) { /* No protection against file removal anymore. */ WARN(1, "debugfs file owner replaced proxy fops: %pd", dentry); goto free_proxy; } } goto out; free_proxy: kfree(proxy_fops); fops_put(real_fops); out: debugfs_file_put(dentry); return r; } const struct file_operations debugfs_full_proxy_file_operations = { .open = full_proxy_open, }; ssize_t debugfs_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { struct dentry *dentry = F_DENTRY(file); ssize_t ret; ret = debugfs_file_get(dentry); if (unlikely(ret)) return ret; ret = simple_attr_read(file, buf, len, ppos); debugfs_file_put(dentry); return ret; } EXPORT_SYMBOL_GPL(debugfs_attr_read); static ssize_t debugfs_attr_write_xsigned(struct file *file, const char __user *buf, size_t len, loff_t *ppos, bool is_signed) { struct dentry *dentry = F_DENTRY(file); ssize_t ret; ret = debugfs_file_get(dentry); if (unlikely(ret)) return ret; if (is_signed) ret = simple_attr_write_signed(file, buf, len, ppos); else ret = simple_attr_write(file, buf, len, ppos); debugfs_file_put(dentry); return ret; } ssize_t debugfs_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { return debugfs_attr_write_xsigned(file, buf, len, ppos, false); } EXPORT_SYMBOL_GPL(debugfs_attr_write); ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { return debugfs_attr_write_xsigned(file, buf, len, ppos, true); } EXPORT_SYMBOL_GPL(debugfs_attr_write_signed); static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, const struct file_operations *fops_ro, const struct file_operations *fops_wo) { /* if there are no write bits set, make read only */ if (!(mode & S_IWUGO)) return debugfs_create_file_unsafe(name, mode, parent, value, fops_ro); /* if there are no read bits set, make write only */ if (!(mode & S_IRUGO)) return debugfs_create_file_unsafe(name, mode, parent, value, fops_wo); return debugfs_create_file_unsafe(name, mode, parent, value, fops); } static int debugfs_u8_set(void *data, u64 val) { *(u8 *)data = val; return 0; } static int debugfs_u8_get(void *data, u64 *val) { *val = *(u8 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_ro, debugfs_u8_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n"); /** * debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u8, &fops_u8_ro, &fops_u8_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u8); static int debugfs_u16_set(void *data, u64 val) { *(u16 *)data = val; return 0; } static int debugfs_u16_get(void *data, u64 *val) { *val = *(u16 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_ro, debugfs_u16_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n"); /** * debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u16, &fops_u16_ro, &fops_u16_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u16); static int debugfs_u32_set(void *data, u64 val) { *(u32 *)data = val; return 0; } static int debugfs_u32_get(void *data, u64 *val) { *val = *(u32 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_ro, debugfs_u32_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n"); /** * debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, u32 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u32, &fops_u32_ro, &fops_u32_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u32); static int debugfs_u64_set(void *data, u64 val) { *(u64 *)data = val; return 0; } static int debugfs_u64_get(void *data, u64 *val) { *val = *(u64 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_ro, debugfs_u64_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); /** * debugfs_create_u64 - create a debugfs file that is used to read and write an unsigned 64-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u64, &fops_u64_ro, &fops_u64_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u64); static int debugfs_ulong_set(void *data, u64 val) { *(unsigned long *)data = val; return 0; } static int debugfs_ulong_get(void *data, u64 *val) { *val = *(unsigned long *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong, debugfs_ulong_get, debugfs_ulong_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_ro, debugfs_ulong_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n"); /** * debugfs_create_ulong - create a debugfs file that is used to read and write * an unsigned long value. * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_ulong(const char *name, umode_t mode, struct dentry *parent, unsigned long *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_ulong, &fops_ulong_ro, &fops_ulong_wo); } EXPORT_SYMBOL_GPL(debugfs_create_ulong); DEFINE_DEBUGFS_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_ro, debugfs_u16_get, NULL, "0x%04llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_wo, NULL, debugfs_u16_set, "0x%04llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_ro, debugfs_u64_get, NULL, "0x%016llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_wo, NULL, debugfs_u64_set, "0x%016llx\n"); /* * debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value * * These functions are exactly the same as the above functions (but use a hex * output for the decimal challenged). For details look at the above unsigned * decimal functions. */ /** * debugfs_create_x8 - create a debugfs file that is used to read and write an unsigned 8-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x8, &fops_x8_ro, &fops_x8_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x8); /** * debugfs_create_x16 - create a debugfs file that is used to read and write an unsigned 16-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x16, &fops_x16_ro, &fops_x16_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x16); /** * debugfs_create_x32 - create a debugfs file that is used to read and write an unsigned 32-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, u32 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x32, &fops_x32_ro, &fops_x32_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x32); /** * debugfs_create_x64 - create a debugfs file that is used to read and write an unsigned 64-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x64(const char *name, umode_t mode, struct dentry *parent, u64 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x64, &fops_x64_ro, &fops_x64_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x64); static int debugfs_size_t_set(void *data, u64 val) { *(size_t *)data = val; return 0; } static int debugfs_size_t_get(void *data, u64 *val) { *val = *(size_t *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set, "%llu\n"); /* %llu and %zu are more or less the same */ DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_ro, debugfs_size_t_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_wo, NULL, debugfs_size_t_set, "%llu\n"); /** * debugfs_create_size_t - create a debugfs file that is used to read and write an size_t value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_size_t(const char *name, umode_t mode, struct dentry *parent, size_t *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_size_t, &fops_size_t_ro, &fops_size_t_wo); } EXPORT_SYMBOL_GPL(debugfs_create_size_t); static int debugfs_atomic_t_set(void *data, u64 val) { atomic_set((atomic_t *)data, val); return 0; } static int debugfs_atomic_t_get(void *data, u64 *val) { *val = atomic_read((atomic_t *)data); return 0; } DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t, debugfs_atomic_t_get, debugfs_atomic_t_set, "%lld\n"); DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, "%lld\n"); DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, "%lld\n"); /** * debugfs_create_atomic_t - create a debugfs file that is used to read and * write an atomic_t value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_atomic_t(const char *name, umode_t mode, struct dentry *parent, atomic_t *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_atomic_t, &fops_atomic_t_ro, &fops_atomic_t_wo); } EXPORT_SYMBOL_GPL(debugfs_create_atomic_t); ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { char buf[2]; bool val; int r; struct dentry *dentry = F_DENTRY(file); r = debugfs_file_get(dentry); if (unlikely(r)) return r; val = *(bool *)file->private_data; debugfs_file_put(dentry); if (val) buf[0] = 'Y'; else buf[0] = 'N'; buf[1] = '\n'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); } EXPORT_SYMBOL_GPL(debugfs_read_file_bool); ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { bool bv; int r; bool *val = file->private_data; struct dentry *dentry = F_DENTRY(file); r = kstrtobool_from_user(user_buf, count, &bv); if (!r) { r = debugfs_file_get(dentry); if (unlikely(r)) return r; *val = bv; debugfs_file_put(dentry); } return count; } EXPORT_SYMBOL_GPL(debugfs_write_file_bool); static const struct file_operations fops_bool = { .read = debugfs_read_file_bool, .write = debugfs_write_file_bool, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_bool_ro = { .read = debugfs_read_file_bool, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_bool_wo = { .write = debugfs_write_file_bool, .open = simple_open, .llseek = default_llseek, }; /** * debugfs_create_bool - create a debugfs file that is used to read and write a boolean value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_bool, &fops_bool_ro, &fops_bool_wo); } EXPORT_SYMBOL_GPL(debugfs_create_bool); ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct dentry *dentry = F_DENTRY(file); char *str, *copy = NULL; int copy_len, len; ssize_t ret; ret = debugfs_file_get(dentry); if (unlikely(ret)) return ret; str = *(char **)file->private_data; len = strlen(str) + 1; copy = kmalloc(len, GFP_KERNEL); if (!copy) { debugfs_file_put(dentry); return -ENOMEM; } copy_len = strscpy(copy, str, len); debugfs_file_put(dentry); if (copy_len < 0) { kfree(copy); return copy_len; } copy[copy_len] = '\n'; ret = simple_read_from_buffer(user_buf, count, ppos, copy, len); kfree(copy); return ret; } EXPORT_SYMBOL_GPL(debugfs_create_str); static ssize_t debugfs_write_file_str(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct dentry *dentry = F_DENTRY(file); char *old, *new = NULL; int pos = *ppos; int r; r = debugfs_file_get(dentry); if (unlikely(r)) return r; old = *(char **)file->private_data; /* only allow strict concatenation */ r = -EINVAL; if (pos && pos != strlen(old)) goto error; r = -E2BIG; if (pos + count + 1 > PAGE_SIZE) goto error; r = -ENOMEM; new = kmalloc(pos + count + 1, GFP_KERNEL); if (!new) goto error; if (pos) memcpy(new, old, pos); r = -EFAULT; if (copy_from_user(new + pos, user_buf, count)) goto error; new[pos + count] = '\0'; strim(new); rcu_assign_pointer(*(char __rcu **)file->private_data, new); synchronize_rcu(); kfree(old); debugfs_file_put(dentry); return count; error: kfree(new); debugfs_file_put(dentry); return r; } static const struct file_operations fops_str = { .read = debugfs_read_file_str, .write = debugfs_write_file_str, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_str_ro = { .read = debugfs_read_file_str, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_str_wo = { .write = debugfs_write_file_str, .open = simple_open, .llseek = default_llseek, }; /** * debugfs_create_str - create a debugfs file that is used to read and write a string value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_str(const char *name, umode_t mode, struct dentry *parent, char **value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_str, &fops_str_ro, &fops_str_wo); } static ssize_t read_file_blob(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct debugfs_blob_wrapper *blob = file->private_data; struct dentry *dentry = F_DENTRY(file); ssize_t r; r = debugfs_file_get(dentry); if (unlikely(r)) return r; r = simple_read_from_buffer(user_buf, count, ppos, blob->data, blob->size); debugfs_file_put(dentry); return r; } static ssize_t write_file_blob(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct debugfs_blob_wrapper *blob = file->private_data; struct dentry *dentry = F_DENTRY(file); ssize_t r; r = debugfs_file_get(dentry); if (unlikely(r)) return r; r = simple_write_to_buffer(blob->data, blob->size, ppos, user_buf, count); debugfs_file_put(dentry); return r; } static const struct file_operations fops_blob = { .read = read_file_blob, .write = write_file_blob, .open = simple_open, .llseek = default_llseek, }; /** * debugfs_create_blob - create a debugfs file that is used to read and write * a binary blob * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @blob: a pointer to a struct debugfs_blob_wrapper which contains a pointer * to the blob data and the size of the data. * * This function creates a file in debugfs with the given name that exports * @blob->data as a binary blob. If the @mode variable is so set it can be * read from and written to. * * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be * returned. * * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will * be returned. */ struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob) { return debugfs_create_file_unsafe(name, mode & 0644, parent, blob, &fops_blob); } EXPORT_SYMBOL_GPL(debugfs_create_blob); static size_t u32_format_array(char *buf, size_t bufsize, u32 *array, int array_size) { size_t ret = 0; while (--array_size >= 0) { size_t len; char term = array_size ? ' ' : '\n'; len = snprintf(buf, bufsize, "%u%c", *array++, term); ret += len; buf += len; bufsize -= len; } return ret; } static int u32_array_open(struct inode *inode, struct file *file) { struct debugfs_u32_array *data = inode->i_private; int size, elements = data->n_elements; char *buf; /* * Max size: * - 10 digits + ' '/'\n' = 11 bytes per number * - terminating NUL character */ size = elements*11; buf = kmalloc(size+1, GFP_KERNEL); if (!buf) return -ENOMEM; buf[size] = 0; file->private_data = buf; u32_format_array(buf, size, data->array, data->n_elements); return nonseekable_open(inode, file); } static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { size_t size = strlen(file->private_data); return simple_read_from_buffer(buf, len, ppos, file->private_data, size); } static int u32_array_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; } static const struct file_operations u32_array_fops = { .owner = THIS_MODULE, .open = u32_array_open, .release = u32_array_release, .read = u32_array_read, .llseek = no_llseek, }; /** * debugfs_create_u32_array - create a debugfs file that is used to read u32 * array. * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @array: wrapper struct containing data pointer and size of the array. * * This function creates a file in debugfs with the given name that exports * @array as data. If the @mode variable is so set it can be read from. * Writing is not supported. Seek within the file is also not supported. * Once array is created its size can not be changed. */ void debugfs_create_u32_array(const char *name, umode_t mode, struct dentry *parent, struct debugfs_u32_array *array) { debugfs_create_file_unsafe(name, mode, parent, array, &u32_array_fops); } EXPORT_SYMBOL_GPL(debugfs_create_u32_array); #ifdef CONFIG_HAS_IOMEM /* * The regset32 stuff is used to print 32-bit registers using the * seq_file utilities. We offer printing a register set in an already-opened * sequential file or create a debugfs file that only prints a regset32. */ /** * debugfs_print_regs32 - use seq_print to describe a set of registers * @s: the seq_file structure being used to generate output * @regs: an array if struct debugfs_reg32 structures * @nregs: the length of the above array * @base: the base address to be used in reading the registers * @prefix: a string to be prefixed to every output line * * This function outputs a text block describing the current values of * some 32-bit hardware registers. It is meant to be used within debugfs * files based on seq_file that need to show registers, intermixed with other * information. The prefix argument may be used to specify a leading string, * because some peripherals have several blocks of identical registers, * for example configuration of dma channels */ void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, int nregs, void __iomem *base, char *prefix) { int i; for (i = 0; i < nregs; i++, regs++) { if (prefix) seq_printf(s, "%s", prefix); seq_printf(s, "%s = 0x%08x\n", regs->name, readl(base + regs->offset)); if (seq_has_overflowed(s)) break; } } EXPORT_SYMBOL_GPL(debugfs_print_regs32); static int debugfs_regset32_show(struct seq_file *s, void *data) { struct debugfs_regset32 *regset = s->private; if (regset->dev) pm_runtime_get_sync(regset->dev); debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, ""); if (regset->dev) pm_runtime_put(regset->dev); return 0; } DEFINE_SHOW_ATTRIBUTE(debugfs_regset32); /** * debugfs_create_regset32 - create a debugfs file that returns register values * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @regset: a pointer to a struct debugfs_regset32, which contains a pointer * to an array of register definitions, the array size and the base * address where the register bank is to be found. * * This function creates a file in debugfs with the given name that reports * the names and values of a set of 32-bit registers. If the @mode variable * is so set it can be read from. Writing is not supported. */ void debugfs_create_regset32(const char *name, umode_t mode, struct dentry *parent, struct debugfs_regset32 *regset) { debugfs_create_file(name, mode, parent, regset, &debugfs_regset32_fops); } EXPORT_SYMBOL_GPL(debugfs_create_regset32); #endif /* CONFIG_HAS_IOMEM */ struct debugfs_devm_entry { int (*read)(struct seq_file *seq, void *data); struct device *dev; }; static int debugfs_devm_entry_open(struct inode *inode, struct file *f) { struct debugfs_devm_entry *entry = inode->i_private; return single_open(f, entry->read, entry->dev); } static const struct file_operations debugfs_devm_entry_ops = { .owner = THIS_MODULE, .open = debugfs_devm_entry_open, .release = single_release, .read = seq_read, .llseek = seq_lseek }; /** * debugfs_create_devm_seqfile - create a debugfs file that is bound to device. * * @dev: device related to this debugfs file. * @name: name of the debugfs file. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @read_fn: function pointer called to print the seq_file content. */ void debugfs_create_devm_seqfile(struct device *dev, const char *name, struct dentry *parent, int (*read_fn)(struct seq_file *s, void *data)) { struct debugfs_devm_entry *entry; if (IS_ERR(parent)) return; entry = devm_kzalloc(dev, sizeof(*entry), GFP_KERNEL); if (!entry) return; entry->read = read_fn; entry->dev = dev; debugfs_create_file(name, S_IRUGO, parent, entry, &debugfs_devm_entry_ops); } EXPORT_SYMBOL_GPL(debugfs_create_devm_seqfile);
7 3 4 32 54 54 54 2 2 3 1 3 1 3 2 51 51 11 51 56 57 57 57 57 57 1 57 57 2 2 57 43 14 11 50 4 43 9 51 54 48 19 3 7 1 5 5 5 36 6 36 4 2 61 2 3 1 4 3 4 2 3 1 2 5 4 14 5 99 100 61 5 57 4 4 57 1 5 54 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 // SPDX-License-Identifier: GPL-2.0-or-later /* AF_RXRPC sendmsg() implementation. * * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/net.h> #include <linux/gfp.h> #include <linux/skbuff.h> #include <linux/export.h> #include <linux/sched/signal.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" /* * Propose an abort to be made in the I/O thread. */ bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, enum rxrpc_abort_reason why) { _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why); if (!call->send_abort && !rxrpc_call_is_complete(call)) { call->send_abort_why = why; call->send_abort_err = error; call->send_abort_seq = 0; /* Request abort locklessly vs rxrpc_input_call_event(). */ smp_store_release(&call->send_abort, abort_code); rxrpc_poke_call(call, rxrpc_call_poke_abort); return true; } return false; } /* * Wait for a call to become connected. Interruption here doesn't cause the * call to be aborted. */ static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo) { DECLARE_WAITQUEUE(myself, current); int ret = 0; _enter("%d", call->debug_id); if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) goto no_wait; add_wait_queue_exclusive(&call->waitq, &myself); for (;;) { switch (call->interruptibility) { case RXRPC_INTERRUPTIBLE: case RXRPC_PREINTERRUPTIBLE: set_current_state(TASK_INTERRUPTIBLE); break; case RXRPC_UNINTERRUPTIBLE: default: set_current_state(TASK_UNINTERRUPTIBLE); break; } if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) break; if ((call->interruptibility == RXRPC_INTERRUPTIBLE || call->interruptibility == RXRPC_PREINTERRUPTIBLE) && signal_pending(current)) { ret = sock_intr_errno(*timeo); break; } *timeo = schedule_timeout(*timeo); } remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); no_wait: if (ret == 0 && rxrpc_call_is_complete(call)) ret = call->error; _leave(" = %d", ret); return ret; } /* * Return true if there's sufficient Tx queue space. */ static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) { if (_tx_win) *_tx_win = call->tx_bottom; return call->tx_prepared - call->tx_bottom < 256; } /* * Wait for space to appear in the Tx queue or a signal to occur. */ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, struct rxrpc_call *call, long *timeo) { for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (rxrpc_check_tx_space(call, NULL)) return 0; if (rxrpc_call_is_complete(call)) return call->error; if (signal_pending(current)) return sock_intr_errno(*timeo); trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); *timeo = schedule_timeout(*timeo); } } /* * Wait for space to appear in the Tx queue uninterruptibly, but with * a timeout of 2*RTT if no progress was made and a signal occurred. */ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, struct rxrpc_call *call) { rxrpc_seq_t tx_start, tx_win; signed long rtt, timeout; rtt = READ_ONCE(call->peer->srtt_us) >> 3; rtt = usecs_to_jiffies(rtt) * 2; if (rtt < 2) rtt = 2; timeout = rtt; tx_start = smp_load_acquire(&call->acks_hard_ack); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (rxrpc_check_tx_space(call, &tx_win)) return 0; if (rxrpc_call_is_complete(call)) return call->error; if (timeout == 0 && tx_win == tx_start && signal_pending(current)) return -EINTR; if (tx_win != tx_start) { timeout = rtt; tx_start = tx_win; } trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); timeout = schedule_timeout(timeout); } } /* * Wait for space to appear in the Tx queue uninterruptibly. */ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, struct rxrpc_call *call, long *timeo) { for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (rxrpc_check_tx_space(call, NULL)) return 0; if (rxrpc_call_is_complete(call)) return call->error; trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); *timeo = schedule_timeout(*timeo); } } /* * wait for space to appear in the transmit/ACK window * - caller holds the socket locked */ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, struct rxrpc_call *call, long *timeo, bool waitall) { DECLARE_WAITQUEUE(myself, current); int ret; _enter(",{%u,%u,%u,%u}", call->tx_bottom, call->acks_hard_ack, call->tx_top, call->tx_winsize); add_wait_queue(&call->waitq, &myself); switch (call->interruptibility) { case RXRPC_INTERRUPTIBLE: if (waitall) ret = rxrpc_wait_for_tx_window_waitall(rx, call); else ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo); break; case RXRPC_PREINTERRUPTIBLE: case RXRPC_UNINTERRUPTIBLE: default: ret = rxrpc_wait_for_tx_window_nonintr(rx, call, timeo); break; } remove_wait_queue(&call->waitq, &myself); set_current_state(TASK_RUNNING); _leave(" = %d", ret); return ret; } /* * Notify the owner of the call that the transmit phase is ended and the last * packet has been queued. */ static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call, rxrpc_notify_end_tx_t notify_end_tx) { if (notify_end_tx) notify_end_tx(&rx->sk, call, call->user_call_ID); } /* * Queue a DATA packet for transmission, set the resend timeout and send * the packet immediately. Returns the error from rxrpc_send_data_packet() * in case the caller wants to do something with it. */ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, struct rxrpc_txbuf *txb, rxrpc_notify_end_tx_t notify_end_tx) { rxrpc_seq_t seq = txb->seq; bool poke, last = txb->flags & RXRPC_LAST_PACKET; rxrpc_inc_stat(call->rxnet, stat_tx_data); ASSERTCMP(txb->seq, ==, call->tx_prepared + 1); /* We have to set the timestamp before queueing as the retransmit * algorithm can see the packet as soon as we queue it. */ txb->last_sent = ktime_get_real(); if (last) trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last); else trace_rxrpc_txqueue(call, rxrpc_txqueue_queue); /* Add the packet to the call's output buffer */ spin_lock(&call->tx_lock); poke = list_empty(&call->tx_sendmsg); list_add_tail(&txb->call_link, &call->tx_sendmsg); call->tx_prepared = seq; if (last) rxrpc_notify_end_tx(rx, call, notify_end_tx); spin_unlock(&call->tx_lock); if (poke) rxrpc_poke_call(call, rxrpc_call_poke_start); } /* * send data through a socket * - must be called in process context * - The caller holds the call user access mutex, but not the socket lock. */ static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len, rxrpc_notify_end_tx_t notify_end_tx, bool *_dropped_lock) { struct rxrpc_txbuf *txb; struct sock *sk = &rx->sk; enum rxrpc_call_state state; long timeo; bool more = msg->msg_flags & MSG_MORE; int ret, copied = 0; timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); ret = rxrpc_wait_to_be_connected(call, &timeo); if (ret < 0) return ret; if (call->conn->state == RXRPC_CONN_CLIENT_UNSECURED) { ret = rxrpc_init_client_conn_security(call->conn); if (ret < 0) return ret; } /* this should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); reload: ret = -EPIPE; if (sk->sk_shutdown & SEND_SHUTDOWN) goto maybe_error; state = rxrpc_call_state(call); ret = -ESHUTDOWN; if (state >= RXRPC_CALL_COMPLETE) goto maybe_error; ret = -EPROTO; if (state != RXRPC_CALL_CLIENT_SEND_REQUEST && state != RXRPC_CALL_SERVER_ACK_REQUEST && state != RXRPC_CALL_SERVER_SEND_REPLY) { /* Request phase complete for this client call */ trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send, call->cid, call->call_id, call->rx_consumed, 0, -EPROTO); goto maybe_error; } ret = -EMSGSIZE; if (call->tx_total_len != -1) { if (len - copied > call->tx_total_len) goto maybe_error; if (!more && len - copied != call->tx_total_len) goto maybe_error; } txb = call->tx_pending; call->tx_pending = NULL; if (txb) rxrpc_see_txbuf(txb, rxrpc_txbuf_see_send_more); do { if (!txb) { size_t remain; _debug("alloc"); if (!rxrpc_check_tx_space(call, NULL)) goto wait_for_space; /* Work out the maximum size of a packet. Assume that * the security header is going to be in the padded * region (enc blocksize), but the trailer is not. */ remain = more ? INT_MAX : msg_data_left(msg); txb = call->conn->security->alloc_txbuf(call, remain, sk->sk_allocation); if (!txb) { ret = -ENOMEM; goto maybe_error; } } _debug("append"); /* append next segment of data to the current buffer */ if (msg_data_left(msg) > 0) { size_t copy = min_t(size_t, txb->space, msg_data_left(msg)); _debug("add %zu", copy); if (!copy_from_iter_full(txb->kvec[0].iov_base + txb->offset, copy, &msg->msg_iter)) goto efault; _debug("added"); txb->space -= copy; txb->len += copy; txb->offset += copy; copied += copy; if (call->tx_total_len != -1) call->tx_total_len -= copy; } /* check for the far side aborting the call or a network error * occurring */ if (rxrpc_call_is_complete(call)) goto call_terminated; /* add the packet to the send queue if it's now full */ if (!txb->space || (msg_data_left(msg) == 0 && !more)) { if (msg_data_left(msg) == 0 && !more) txb->flags |= RXRPC_LAST_PACKET; else if (call->tx_top - call->acks_hard_ack < call->tx_winsize) txb->flags |= RXRPC_MORE_PACKETS; ret = call->security->secure_packet(call, txb); if (ret < 0) goto out; txb->kvec[0].iov_len += txb->len; txb->len = txb->kvec[0].iov_len; rxrpc_queue_packet(rx, call, txb, notify_end_tx); txb = NULL; } } while (msg_data_left(msg) > 0); success: ret = copied; if (rxrpc_call_is_complete(call) && call->error < 0) ret = call->error; out: call->tx_pending = txb; _leave(" = %d", ret); return ret; call_terminated: rxrpc_put_txbuf(txb, rxrpc_txbuf_put_send_aborted); _leave(" = %d", call->error); return call->error; maybe_error: if (copied) goto success; goto out; efault: ret = -EFAULT; goto out; wait_for_space: ret = -EAGAIN; if (msg->msg_flags & MSG_DONTWAIT) goto maybe_error; mutex_unlock(&call->user_mutex); *_dropped_lock = true; ret = rxrpc_wait_for_tx_window(rx, call, &timeo, msg->msg_flags & MSG_WAITALL); if (ret < 0) goto maybe_error; if (call->interruptibility == RXRPC_INTERRUPTIBLE) { if (mutex_lock_interruptible(&call->user_mutex) < 0) { ret = sock_intr_errno(timeo); goto maybe_error; } } else { mutex_lock(&call->user_mutex); } *_dropped_lock = false; goto reload; } /* * extract control messages from the sendmsg() control buffer */ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) { struct cmsghdr *cmsg; bool got_user_ID = false; int len; if (msg->msg_controllen == 0) return -EINVAL; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; len = cmsg->cmsg_len - sizeof(struct cmsghdr); _debug("CMSG %d, %d, %d", cmsg->cmsg_level, cmsg->cmsg_type, len); if (cmsg->cmsg_level != SOL_RXRPC) continue; switch (cmsg->cmsg_type) { case RXRPC_USER_CALL_ID: if (msg->msg_flags & MSG_CMSG_COMPAT) { if (len != sizeof(u32)) return -EINVAL; p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg); } else { if (len != sizeof(unsigned long)) return -EINVAL; p->call.user_call_ID = *(unsigned long *) CMSG_DATA(cmsg); } got_user_ID = true; break; case RXRPC_ABORT: if (p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; p->command = RXRPC_CMD_SEND_ABORT; if (len != sizeof(p->abort_code)) return -EINVAL; p->abort_code = *(unsigned int *)CMSG_DATA(cmsg); if (p->abort_code == 0) return -EINVAL; break; case RXRPC_CHARGE_ACCEPT: if (p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; p->command = RXRPC_CMD_CHARGE_ACCEPT; if (len != 0) return -EINVAL; break; case RXRPC_EXCLUSIVE_CALL: p->exclusive = true; if (len != 0) return -EINVAL; break; case RXRPC_UPGRADE_SERVICE: p->upgrade = true; if (len != 0) return -EINVAL; break; case RXRPC_TX_LENGTH: if (p->call.tx_total_len != -1 || len != sizeof(__s64)) return -EINVAL; p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg); if (p->call.tx_total_len < 0) return -EINVAL; break; case RXRPC_SET_CALL_TIMEOUT: if (len & 3 || len < 4 || len > 12) return -EINVAL; memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len); p->call.nr_timeouts = len / 4; if (p->call.timeouts.hard > INT_MAX / HZ) return -ERANGE; if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000) return -ERANGE; if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000) return -ERANGE; break; default: return -EINVAL; } } if (!got_user_ID) return -EINVAL; if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) return -EINVAL; _leave(" = 0"); return 0; } /* * Create a new client call for sendmsg(). * - Called with the socket lock held, which it must release. * - If it returns a call, the call's lock will need releasing by the caller. */ static struct rxrpc_call * rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, struct rxrpc_send_params *p) __releases(&rx->sk.sk_lock.slock) __acquires(&call->user_mutex) { struct rxrpc_conn_parameters cp; struct rxrpc_peer *peer; struct rxrpc_call *call; struct key *key; DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name); _enter(""); if (!msg->msg_name) { release_sock(&rx->sk); return ERR_PTR(-EDESTADDRREQ); } peer = rxrpc_lookup_peer(rx->local, srx, GFP_KERNEL); if (!peer) { release_sock(&rx->sk); return ERR_PTR(-ENOMEM); } key = rx->key; if (key && !rx->key->payload.data[0]) key = NULL; memset(&cp, 0, sizeof(cp)); cp.local = rx->local; cp.peer = peer; cp.key = rx->key; cp.security_level = rx->min_sec_level; cp.exclusive = rx->exclusive | p->exclusive; cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; call = rxrpc_new_client_call(rx, &cp, &p->call, GFP_KERNEL, atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ rxrpc_put_peer(peer, rxrpc_peer_put_application); _leave(" = %p\n", call); return call; } /* * send a message forming part of a client call through an RxRPC socket * - caller holds the socket locked * - the socket may be either a client socket or a server socket */ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) __releases(&rx->sk.sk_lock.slock) { struct rxrpc_call *call; bool dropped_lock = false; int ret; struct rxrpc_send_params p = { .call.tx_total_len = -1, .call.user_call_ID = 0, .call.nr_timeouts = 0, .call.interruptibility = RXRPC_INTERRUPTIBLE, .abort_code = 0, .command = RXRPC_CMD_SEND_DATA, .exclusive = false, .upgrade = false, }; _enter(""); ret = rxrpc_sendmsg_cmsg(msg, &p); if (ret < 0) goto error_release_sock; if (p.command == RXRPC_CMD_CHARGE_ACCEPT) { ret = -EINVAL; if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) goto error_release_sock; ret = rxrpc_user_charge_accept(rx, p.call.user_call_ID); goto error_release_sock; } call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID); if (!call) { ret = -EBADSLT; if (p.command != RXRPC_CMD_SEND_DATA) goto error_release_sock; call = rxrpc_new_client_call_for_sendmsg(rx, msg, &p); /* The socket is now unlocked... */ if (IS_ERR(call)) return PTR_ERR(call); /* ... and we have the call lock. */ p.call.nr_timeouts = 0; ret = 0; if (rxrpc_call_is_complete(call)) goto out_put_unlock; } else { switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_AWAIT_CONN: case RXRPC_CALL_SERVER_SECURING: if (p.command == RXRPC_CMD_SEND_ABORT) break; fallthrough; case RXRPC_CALL_UNINITIALISED: case RXRPC_CALL_SERVER_PREALLOC: rxrpc_put_call(call, rxrpc_call_put_sendmsg); ret = -EBUSY; goto error_release_sock; default: break; } ret = mutex_lock_interruptible(&call->user_mutex); release_sock(&rx->sk); if (ret < 0) { ret = -ERESTARTSYS; goto error_put; } if (p.call.tx_total_len != -1) { ret = -EINVAL; if (call->tx_total_len != -1 || call->tx_pending || call->tx_top != 0) goto out_put_unlock; call->tx_total_len = p.call.tx_total_len; } } switch (p.call.nr_timeouts) { case 3: WRITE_ONCE(call->next_rx_timo, p.call.timeouts.normal); fallthrough; case 2: WRITE_ONCE(call->next_req_timo, p.call.timeouts.idle); fallthrough; case 1: if (p.call.timeouts.hard > 0) { ktime_t delay = ms_to_ktime(p.call.timeouts.hard * MSEC_PER_SEC); WRITE_ONCE(call->expect_term_by, ktime_add(p.call.timeouts.hard, ktime_get_real())); trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard); rxrpc_poke_call(call, rxrpc_call_poke_set_timeout); } break; } if (rxrpc_call_is_complete(call)) { /* it's too late for this call */ ret = -ESHUTDOWN; } else if (p.command == RXRPC_CMD_SEND_ABORT) { rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED, rxrpc_abort_call_sendmsg); ret = 0; } else if (p.command != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else { ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock); } out_put_unlock: if (!dropped_lock) mutex_unlock(&call->user_mutex); error_put: rxrpc_put_call(call, rxrpc_call_put_sendmsg); _leave(" = %d", ret); return ret; error_release_sock: release_sock(&rx->sk); return ret; } /** * rxrpc_kernel_send_data - Allow a kernel service to send data on a call * @sock: The socket the call is on * @call: The call to send data through * @msg: The data to send * @len: The amount of data to send * @notify_end_tx: Notification that the last packet is queued. * * Allow a kernel service to send data on a call. The call must be in an state * appropriate to sending data. No control data should be supplied in @msg, * nor should an address be supplied. MSG_MORE should be flagged if there's * more data to come, otherwise this data will end the transmission phase. */ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, struct msghdr *msg, size_t len, rxrpc_notify_end_tx_t notify_end_tx) { bool dropped_lock = false; int ret; _enter("{%d},", call->debug_id); ASSERTCMP(msg->msg_name, ==, NULL); ASSERTCMP(msg->msg_control, ==, NULL); mutex_lock(&call->user_mutex); ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, notify_end_tx, &dropped_lock); if (ret == -ESHUTDOWN) ret = call->error; if (!dropped_lock) mutex_unlock(&call->user_mutex); _leave(" = %d", ret); return ret; } EXPORT_SYMBOL(rxrpc_kernel_send_data); /** * rxrpc_kernel_abort_call - Allow a kernel service to abort a call * @sock: The socket the call is on * @call: The call to be aborted * @abort_code: The abort code to stick into the ABORT packet * @error: Local error value * @why: Indication as to why. * * Allow a kernel service to abort a call, if it's still in an abortable state * and return true if the call was aborted, false if it was already complete. */ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, u32 abort_code, int error, enum rxrpc_abort_reason why) { bool aborted; _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why); mutex_lock(&call->user_mutex); aborted = rxrpc_propose_abort(call, abort_code, error, why); mutex_unlock(&call->user_mutex); return aborted; } EXPORT_SYMBOL(rxrpc_kernel_abort_call); /** * rxrpc_kernel_set_tx_length - Set the total Tx length on a call * @sock: The socket the call is on * @call: The call to be informed * @tx_total_len: The amount of data to be transmitted for this call * * Allow a kernel service to set the total transmit length on a call. This * allows buffer-to-packet encrypt-and-copy to be performed. * * This function is primarily for use for setting the reply length since the * request length can be set when beginning the call. */ void rxrpc_kernel_set_tx_length(struct socket *sock, struct rxrpc_call *call, s64 tx_total_len) { WARN_ON(call->tx_total_len != -1); call->tx_total_len = tx_total_len; } EXPORT_SYMBOL(rxrpc_kernel_set_tx_length);
10 10 4 4 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 // SPDX-License-Identifier: GPL-2.0 #include "messages.h" #include "ctree.h" #include "fs.h" #include "accessors.h" void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, const char *name) { struct btrfs_super_block *disk_super; u64 features; disk_super = fs_info->super_copy; features = btrfs_super_incompat_flags(disk_super); if (!(features & flag)) { spin_lock(&fs_info->super_lock); features = btrfs_super_incompat_flags(disk_super); if (!(features & flag)) { features |= flag; btrfs_set_super_incompat_flags(disk_super, features); btrfs_info(fs_info, "setting incompat feature flag for %s (0x%llx)", name, flag); } spin_unlock(&fs_info->super_lock); set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags); } } void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, const char *name) { struct btrfs_super_block *disk_super; u64 features; disk_super = fs_info->super_copy; features = btrfs_super_incompat_flags(disk_super); if (features & flag) { spin_lock(&fs_info->super_lock); features = btrfs_super_incompat_flags(disk_super); if (features & flag) { features &= ~flag; btrfs_set_super_incompat_flags(disk_super, features); btrfs_info(fs_info, "clearing incompat feature flag for %s (0x%llx)", name, flag); } spin_unlock(&fs_info->super_lock); set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags); } } void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, const char *name) { struct btrfs_super_block *disk_super; u64 features; disk_super = fs_info->super_copy; features = btrfs_super_compat_ro_flags(disk_super); if (!(features & flag)) { spin_lock(&fs_info->super_lock); features = btrfs_super_compat_ro_flags(disk_super); if (!(features & flag)) { features |= flag; btrfs_set_super_compat_ro_flags(disk_super, features); btrfs_info(fs_info, "setting compat-ro feature flag for %s (0x%llx)", name, flag); } spin_unlock(&fs_info->super_lock); set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags); } } void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, const char *name) { struct btrfs_super_block *disk_super; u64 features; disk_super = fs_info->super_copy; features = btrfs_super_compat_ro_flags(disk_super); if (features & flag) { spin_lock(&fs_info->super_lock); features = btrfs_super_compat_ro_flags(disk_super); if (features & flag) { features &= ~flag; btrfs_set_super_compat_ro_flags(disk_super, features); btrfs_info(fs_info, "clearing compat-ro feature flag for %s (0x%llx)", name, flag); } spin_unlock(&fs_info->super_lock); set_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags); } }
19 12 13 1 4 11 10 10 8 11 4 8 4 1 3 3 8 8 9 5 10 20 13 17 4 13 12 1 19 4 3 8 3 6 1 12 7 4 15 7 4 17 10 13 4 10 8 2 2 14 7 9 11 38 38 1 37 37 28 4 13 12 12 1 2 13 1 2 29 7 2 4 14 18 18 13 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 // SPDX-License-Identifier: GPL-2.0 #include <linux/fsverity.h> #include <linux/iomap.h> #include "ctree.h" #include "delalloc-space.h" #include "direct-io.h" #include "extent-tree.h" #include "file.h" #include "fs.h" #include "transaction.h" #include "volumes.h" struct btrfs_dio_data { ssize_t submitted; struct extent_changeset *data_reserved; struct btrfs_ordered_extent *ordered; bool data_space_reserved; bool nocow_done; }; struct btrfs_dio_private { /* Range of I/O */ u64 file_offset; u32 bytes; /* This must be last */ struct btrfs_bio bbio; }; static struct bio_set btrfs_dio_bioset; static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, struct extent_state **cached_state, unsigned int iomap_flags) { const bool writing = (iomap_flags & IOMAP_WRITE); const bool nowait = (iomap_flags & IOMAP_NOWAIT); struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_ordered_extent *ordered; int ret = 0; while (1) { if (nowait) { if (!try_lock_extent(io_tree, lockstart, lockend, cached_state)) return -EAGAIN; } else { lock_extent(io_tree, lockstart, lockend, cached_state); } /* * We're concerned with the entire range that we're going to be * doing DIO to, so we need to make sure there's no ordered * extents in this range. */ ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart, lockend - lockstart + 1); /* * We need to make sure there are no buffered pages in this * range either, we could have raced between the invalidate in * generic_file_direct_write and locking the extent. The * invalidate needs to happen so that reads after a write do not * get stale data. */ if (!ordered && (!writing || !filemap_range_has_page(inode->i_mapping, lockstart, lockend))) break; unlock_extent(io_tree, lockstart, lockend, cached_state); if (ordered) { if (nowait) { btrfs_put_ordered_extent(ordered); ret = -EAGAIN; break; } /* * If we are doing a DIO read and the ordered extent we * found is for a buffered write, we can not wait for it * to complete and retry, because if we do so we can * deadlock with concurrent buffered writes on page * locks. This happens only if our DIO read covers more * than one extent map, if at this point has already * created an ordered extent for a previous extent map * and locked its range in the inode's io tree, and a * concurrent write against that previous extent map's * range and this range started (we unlock the ranges * in the io tree only when the bios complete and * buffered writes always lock pages before attempting * to lock range in the io tree). */ if (writing || test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) btrfs_start_ordered_extent(ordered); else ret = nowait ? -EAGAIN : -ENOTBLK; btrfs_put_ordered_extent(ordered); } else { /* * We could trigger writeback for this range (and wait * for it to complete) and then invalidate the pages for * this range (through invalidate_inode_pages2_range()), * but that can lead us to a deadlock with a concurrent * call to readahead (a buffered read or a defrag call * triggered a readahead) on a page lock due to an * ordered dio extent we created before but did not have * yet a corresponding bio submitted (whence it can not * complete), which makes readahead wait for that * ordered extent to complete while holding a lock on * that page. */ ret = nowait ? -EAGAIN : -ENOTBLK; } if (ret) break; cond_resched(); } return ret; } static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode, struct btrfs_dio_data *dio_data, const u64 start, const struct btrfs_file_extent *file_extent, const int type) { struct extent_map *em = NULL; struct btrfs_ordered_extent *ordered; if (type != BTRFS_ORDERED_NOCOW) { em = btrfs_create_io_em(inode, start, file_extent, type); if (IS_ERR(em)) goto out; } ordered = btrfs_alloc_ordered_extent(inode, start, file_extent, (1 << type) | (1 << BTRFS_ORDERED_DIRECT)); if (IS_ERR(ordered)) { if (em) { free_extent_map(em); btrfs_drop_extent_map_range(inode, start, start + file_extent->num_bytes - 1, false); } em = ERR_CAST(ordered); } else { ASSERT(!dio_data->ordered); dio_data->ordered = ordered; } out: return em; } static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, struct btrfs_dio_data *dio_data, u64 start, u64 len) { struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_file_extent file_extent; struct extent_map *em; struct btrfs_key ins; u64 alloc_hint; int ret; alloc_hint = btrfs_get_extent_allocation_hint(inode, start, len); again: ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize, 0, alloc_hint, &ins, 1, 1); if (ret == -EAGAIN) { ASSERT(btrfs_is_zoned(fs_info)); wait_on_bit_io(&inode->root->fs_info->flags, BTRFS_FS_NEED_ZONE_FINISH, TASK_UNINTERRUPTIBLE); goto again; } if (ret) return ERR_PTR(ret); file_extent.disk_bytenr = ins.objectid; file_extent.disk_num_bytes = ins.offset; file_extent.num_bytes = ins.offset; file_extent.ram_bytes = ins.offset; file_extent.offset = 0; file_extent.compression = BTRFS_COMPRESS_NONE; em = btrfs_create_dio_extent(inode, dio_data, start, &file_extent, BTRFS_ORDERED_REGULAR); btrfs_dec_block_group_reservations(fs_info, ins.objectid); if (IS_ERR(em)) btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); return em; } static int btrfs_get_blocks_direct_write(struct extent_map **map, struct inode *inode, struct btrfs_dio_data *dio_data, u64 start, u64 *lenp, unsigned int iomap_flags) { const bool nowait = (iomap_flags & IOMAP_NOWAIT); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_file_extent file_extent; struct extent_map *em = *map; int type; u64 block_start; struct btrfs_block_group *bg; bool can_nocow = false; bool space_reserved = false; u64 len = *lenp; u64 prev_len; int ret = 0; /* * We don't allocate a new extent in the following cases * * 1) The inode is marked as NODATACOW. In this case we'll just use the * existing extent. * 2) The extent is marked as PREALLOC. We're good to go here and can * just use the extent. * */ if ((em->flags & EXTENT_FLAG_PREALLOC) || ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && em->disk_bytenr != EXTENT_MAP_HOLE)) { if (em->flags & EXTENT_FLAG_PREALLOC) type = BTRFS_ORDERED_PREALLOC; else type = BTRFS_ORDERED_NOCOW; len = min(len, em->len - (start - em->start)); block_start = extent_map_block_start(em) + (start - em->start); if (can_nocow_extent(inode, start, &len, &file_extent, false, false) == 1) { bg = btrfs_inc_nocow_writers(fs_info, block_start); if (bg) can_nocow = true; } } prev_len = len; if (can_nocow) { struct extent_map *em2; /* We can NOCOW, so only need to reserve metadata space. */ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len, len, nowait); if (ret < 0) { /* Our caller expects us to free the input extent map. */ free_extent_map(em); *map = NULL; btrfs_dec_nocow_writers(bg); if (nowait && (ret == -ENOSPC || ret == -EDQUOT)) ret = -EAGAIN; goto out; } space_reserved = true; em2 = btrfs_create_dio_extent(BTRFS_I(inode), dio_data, start, &file_extent, type); btrfs_dec_nocow_writers(bg); if (type == BTRFS_ORDERED_PREALLOC) { free_extent_map(em); *map = em2; em = em2; } if (IS_ERR(em2)) { ret = PTR_ERR(em2); goto out; } dio_data->nocow_done = true; } else { /* Our caller expects us to free the input extent map. */ free_extent_map(em); *map = NULL; if (nowait) { ret = -EAGAIN; goto out; } /* * If we could not allocate data space before locking the file * range and we can't do a NOCOW write, then we have to fail. */ if (!dio_data->data_space_reserved) { ret = -ENOSPC; goto out; } /* * We have to COW and we have already reserved data space before, * so now we reserve only metadata. */ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len, len, false); if (ret < 0) goto out; space_reserved = true; em = btrfs_new_extent_direct(BTRFS_I(inode), dio_data, start, len); if (IS_ERR(em)) { ret = PTR_ERR(em); goto out; } *map = em; len = min(len, em->len - (start - em->start)); if (len < prev_len) btrfs_delalloc_release_metadata(BTRFS_I(inode), prev_len - len, true); } /* * We have created our ordered extent, so we can now release our reservation * for an outstanding extent. */ btrfs_delalloc_release_extents(BTRFS_I(inode), prev_len); /* * Need to update the i_size under the extent lock so buffered * readers will get the updated i_size when we unlock. */ if (start + len > i_size_read(inode)) i_size_write(inode, start + len); out: if (ret && space_reserved) { btrfs_delalloc_release_extents(BTRFS_I(inode), len); btrfs_delalloc_release_metadata(BTRFS_I(inode), len, true); } *lenp = len; return ret; } static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct extent_map *em; struct extent_state *cached_state = NULL; struct btrfs_dio_data *dio_data = iter->private; u64 lockstart, lockend; const bool write = !!(flags & IOMAP_WRITE); int ret = 0; u64 len = length; const u64 data_alloc_len = length; bool unlock_extents = false; /* * We could potentially fault if we have a buffer > PAGE_SIZE, and if * we're NOWAIT we may submit a bio for a partial range and return * EIOCBQUEUED, which would result in an errant short read. * * The best way to handle this would be to allow for partial completions * of iocb's, so we could submit the partial bio, return and fault in * the rest of the pages, and then submit the io for the rest of the * range. However we don't have that currently, so simply return * -EAGAIN at this point so that the normal path is used. */ if (!write && (flags & IOMAP_NOWAIT) && length > PAGE_SIZE) return -EAGAIN; /* * Cap the size of reads to that usually seen in buffered I/O as we need * to allocate a contiguous array for the checksums. */ if (!write) len = min_t(u64, len, fs_info->sectorsize * BTRFS_MAX_BIO_SECTORS); lockstart = start; lockend = start + len - 1; /* * iomap_dio_rw() only does filemap_write_and_wait_range(), which isn't * enough if we've written compressed pages to this area, so we need to * flush the dirty pages again to make absolutely sure that any * outstanding dirty pages are on disk - the first flush only starts * compression on the data, while keeping the pages locked, so by the * time the second flush returns we know bios for the compressed pages * were submitted and finished, and the pages no longer under writeback. * * If we have a NOWAIT request and we have any pages in the range that * are locked, likely due to compression still in progress, we don't want * to block on page locks. We also don't want to block on pages marked as * dirty or under writeback (same as for the non-compression case). * iomap_dio_rw() did the same check, but after that and before we got * here, mmap'ed writes may have happened or buffered reads started * (readpage() and readahead(), which lock pages), as we haven't locked * the file range yet. */ if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags)) { if (flags & IOMAP_NOWAIT) { if (filemap_range_needs_writeback(inode->i_mapping, lockstart, lockend)) return -EAGAIN; } else { ret = filemap_fdatawrite_range(inode->i_mapping, start, start + length - 1); if (ret) return ret; } } memset(dio_data, 0, sizeof(*dio_data)); /* * We always try to allocate data space and must do it before locking * the file range, to avoid deadlocks with concurrent writes to the same * range if the range has several extents and the writes don't expand the * current i_size (the inode lock is taken in shared mode). If we fail to * allocate data space here we continue and later, after locking the * file range, we fail with ENOSPC only if we figure out we can not do a * NOCOW write. */ if (write && !(flags & IOMAP_NOWAIT)) { ret = btrfs_check_data_free_space(BTRFS_I(inode), &dio_data->data_reserved, start, data_alloc_len, false); if (!ret) dio_data->data_space_reserved = true; else if (ret && !(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) goto err; } /* * If this errors out it's because we couldn't invalidate pagecache for * this range and we need to fallback to buffered IO, or we are doing a * NOWAIT read/write and we need to block. */ ret = lock_extent_direct(inode, lockstart, lockend, &cached_state, flags); if (ret < 0) goto err; em = btrfs_get_extent(BTRFS_I(inode), NULL, start, len); if (IS_ERR(em)) { ret = PTR_ERR(em); goto unlock_err; } /* * Ok for INLINE and COMPRESSED extents we need to fallback on buffered * io. INLINE is special, and we could probably kludge it in here, but * it's still buffered so for safety lets just fall back to the generic * buffered path. * * For COMPRESSED we _have_ to read the entire extent in so we can * decompress it, so there will be buffering required no matter what we * do, so go ahead and fallback to buffered. * * We return -ENOTBLK because that's what makes DIO go ahead and go back * to buffered IO. Don't blame me, this is the price we pay for using * the generic code. */ if (extent_map_is_compressed(em) || em->disk_bytenr == EXTENT_MAP_INLINE) { free_extent_map(em); /* * If we are in a NOWAIT context, return -EAGAIN in order to * fallback to buffered IO. This is not only because we can * block with buffered IO (no support for NOWAIT semantics at * the moment) but also to avoid returning short reads to user * space - this happens if we were able to read some data from * previous non-compressed extents and then when we fallback to * buffered IO, at btrfs_file_read_iter() by calling * filemap_read(), we fail to fault in pages for the read buffer, * in which case filemap_read() returns a short read (the number * of bytes previously read is > 0, so it does not return -EFAULT). */ ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK; goto unlock_err; } len = min(len, em->len - (start - em->start)); /* * If we have a NOWAIT request and the range contains multiple extents * (or a mix of extents and holes), then we return -EAGAIN to make the * caller fallback to a context where it can do a blocking (without * NOWAIT) request. This way we avoid doing partial IO and returning * success to the caller, which is not optimal for writes and for reads * it can result in unexpected behaviour for an application. * * When doing a read, because we use IOMAP_DIO_PARTIAL when calling * iomap_dio_rw(), we can end up returning less data then what the caller * asked for, resulting in an unexpected, and incorrect, short read. * That is, the caller asked to read N bytes and we return less than that, * which is wrong unless we are crossing EOF. This happens if we get a * page fault error when trying to fault in pages for the buffer that is * associated to the struct iov_iter passed to iomap_dio_rw(), and we * have previously submitted bios for other extents in the range, in * which case iomap_dio_rw() may return us EIOCBQUEUED if not all of * those bios have completed by the time we get the page fault error, * which we return back to our caller - we should only return EIOCBQUEUED * after we have submitted bios for all the extents in the range. */ if ((flags & IOMAP_NOWAIT) && len < length) { free_extent_map(em); ret = -EAGAIN; goto unlock_err; } if (write) { ret = btrfs_get_blocks_direct_write(&em, inode, dio_data, start, &len, flags); if (ret < 0) goto unlock_err; unlock_extents = true; /* Recalc len in case the new em is smaller than requested */ len = min(len, em->len - (start - em->start)); if (dio_data->data_space_reserved) { u64 release_offset; u64 release_len = 0; if (dio_data->nocow_done) { release_offset = start; release_len = data_alloc_len; } else if (len < data_alloc_len) { release_offset = start + len; release_len = data_alloc_len - len; } if (release_len > 0) btrfs_free_reserved_data_space(BTRFS_I(inode), dio_data->data_reserved, release_offset, release_len); } } else { /* * We need to unlock only the end area that we aren't using. * The rest is going to be unlocked by the endio routine. */ lockstart = start + len; if (lockstart < lockend) unlock_extents = true; } if (unlock_extents) unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); else free_extent_state(cached_state); /* * Translate extent map information to iomap. * We trim the extents (and move the addr) even though iomap code does * that, since we have locked only the parts we are performing I/O in. */ if ((em->disk_bytenr == EXTENT_MAP_HOLE) || ((em->flags & EXTENT_FLAG_PREALLOC) && !write)) { iomap->addr = IOMAP_NULL_ADDR; iomap->type = IOMAP_HOLE; } else { iomap->addr = extent_map_block_start(em) + (start - em->start); iomap->type = IOMAP_MAPPED; } iomap->offset = start; iomap->bdev = fs_info->fs_devices->latest_dev->bdev; iomap->length = len; free_extent_map(em); return 0; unlock_err: unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); err: if (dio_data->data_space_reserved) { btrfs_free_reserved_data_space(BTRFS_I(inode), dio_data->data_reserved, start, data_alloc_len); extent_changeset_free(dio_data->data_reserved); } return ret; } static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned int flags, struct iomap *iomap) { struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap); struct btrfs_dio_data *dio_data = iter->private; size_t submitted = dio_data->submitted; const bool write = !!(flags & IOMAP_WRITE); int ret = 0; if (!write && (iomap->type == IOMAP_HOLE)) { /* If reading from a hole, unlock and return */ unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1, NULL); return 0; } if (submitted < length) { pos += submitted; length -= submitted; if (write) btrfs_finish_ordered_extent(dio_data->ordered, NULL, pos, length, false); else unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1, NULL); ret = -ENOTBLK; } if (write) { btrfs_put_ordered_extent(dio_data->ordered); dio_data->ordered = NULL; } if (write) extent_changeset_free(dio_data->data_reserved); return ret; } static void btrfs_dio_end_io(struct btrfs_bio *bbio) { struct btrfs_dio_private *dip = container_of(bbio, struct btrfs_dio_private, bbio); struct btrfs_inode *inode = bbio->inode; struct bio *bio = &bbio->bio; if (bio->bi_status) { btrfs_warn(inode->root->fs_info, "direct IO failed ino %llu op 0x%0x offset %#llx len %u err no %d", btrfs_ino(inode), bio->bi_opf, dip->file_offset, dip->bytes, bio->bi_status); } if (btrfs_op(bio) == BTRFS_MAP_WRITE) { btrfs_finish_ordered_extent(bbio->ordered, NULL, dip->file_offset, dip->bytes, !bio->bi_status); } else { unlock_extent(&inode->io_tree, dip->file_offset, dip->file_offset + dip->bytes - 1, NULL); } bbio->bio.bi_private = bbio->private; iomap_dio_bio_end_io(bio); } static int btrfs_extract_ordered_extent(struct btrfs_bio *bbio, struct btrfs_ordered_extent *ordered) { u64 start = (u64)bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; u64 len = bbio->bio.bi_iter.bi_size; struct btrfs_ordered_extent *new; int ret; /* Must always be called for the beginning of an ordered extent. */ if (WARN_ON_ONCE(start != ordered->disk_bytenr)) return -EINVAL; /* No need to split if the ordered extent covers the entire bio. */ if (ordered->disk_num_bytes == len) { refcount_inc(&ordered->refs); bbio->ordered = ordered; return 0; } /* * Don't split the extent_map for NOCOW extents, as we're writing into * a pre-existing one. */ if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { ret = split_extent_map(bbio->inode, bbio->file_offset, ordered->num_bytes, len, ordered->disk_bytenr); if (ret) return ret; } new = btrfs_split_ordered_extent(ordered, len); if (IS_ERR(new)) return PTR_ERR(new); bbio->ordered = new; return 0; } static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio, loff_t file_offset) { struct btrfs_bio *bbio = btrfs_bio(bio); struct btrfs_dio_private *dip = container_of(bbio, struct btrfs_dio_private, bbio); struct btrfs_dio_data *dio_data = iter->private; btrfs_bio_init(bbio, BTRFS_I(iter->inode)->root->fs_info, btrfs_dio_end_io, bio->bi_private); bbio->inode = BTRFS_I(iter->inode); bbio->file_offset = file_offset; dip->file_offset = file_offset; dip->bytes = bio->bi_iter.bi_size; dio_data->submitted += bio->bi_iter.bi_size; /* * Check if we are doing a partial write. If we are, we need to split * the ordered extent to match the submitted bio. Hang on to the * remaining unfinishable ordered_extent in dio_data so that it can be * cancelled in iomap_end to avoid a deadlock wherein faulting the * remaining pages is blocked on the outstanding ordered extent. */ if (iter->flags & IOMAP_WRITE) { int ret; ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered); if (ret) { btrfs_finish_ordered_extent(dio_data->ordered, NULL, file_offset, dip->bytes, !ret); bio->bi_status = errno_to_blk_status(ret); iomap_dio_bio_end_io(bio); return; } } btrfs_submit_bio(bbio, 0); } static const struct iomap_ops btrfs_dio_iomap_ops = { .iomap_begin = btrfs_dio_iomap_begin, .iomap_end = btrfs_dio_iomap_end, }; static const struct iomap_dio_ops btrfs_dio_ops = { .submit_io = btrfs_dio_submit_io, .bio_set = &btrfs_dio_bioset, }; static ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_before) { struct btrfs_dio_data data = { 0 }; return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, IOMAP_DIO_PARTIAL, &data, done_before); } static struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, size_t done_before) { struct btrfs_dio_data data = { 0 }; return __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, IOMAP_DIO_PARTIAL, &data, done_before); } static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, const struct iov_iter *iter, loff_t offset) { const u32 blocksize_mask = fs_info->sectorsize - 1; if (offset & blocksize_mask) return -EINVAL; if (iov_iter_alignment(iter) & blocksize_mask) return -EINVAL; return 0; } ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); loff_t pos; ssize_t written = 0; ssize_t written_buffered; size_t prev_left = 0; loff_t endbyte; ssize_t ret; unsigned int ilock_flags = 0; struct iomap_dio *dio; if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; /* * If the write DIO is within EOF, use a shared lock and also only if * security bits will likely not be dropped by file_remove_privs() called * from btrfs_write_check(). Either will need to be rechecked after the * lock was acquired. */ if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode)) ilock_flags |= BTRFS_ILOCK_SHARED; relock: ret = btrfs_inode_lock(BTRFS_I(inode), ilock_flags); if (ret < 0) return ret; /* Shared lock cannot be used with security bits set. */ if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) { btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); ilock_flags &= ~BTRFS_ILOCK_SHARED; goto relock; } ret = generic_write_checks(iocb, from); if (ret <= 0) { btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); return ret; } ret = btrfs_write_check(iocb, from, ret); if (ret < 0) { btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); goto out; } pos = iocb->ki_pos; /* * Re-check since file size may have changed just before taking the * lock or pos may have changed because of O_APPEND in generic_write_check() */ if ((ilock_flags & BTRFS_ILOCK_SHARED) && pos + iov_iter_count(from) > i_size_read(inode)) { btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); ilock_flags &= ~BTRFS_ILOCK_SHARED; goto relock; } if (check_direct_IO(fs_info, from, pos)) { btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); goto buffered; } /* * The iov_iter can be mapped to the same file range we are writing to. * If that's the case, then we will deadlock in the iomap code, because * it first calls our callback btrfs_dio_iomap_begin(), which will create * an ordered extent, and after that it will fault in the pages that the * iov_iter refers to. During the fault in we end up in the readahead * pages code (starting at btrfs_readahead()), which will lock the range, * find that ordered extent and then wait for it to complete (at * btrfs_lock_and_flush_ordered_range()), resulting in a deadlock since * obviously the ordered extent can never complete as we didn't submit * yet the respective bio(s). This always happens when the buffer is * memory mapped to the same file range, since the iomap DIO code always * invalidates pages in the target file range (after starting and waiting * for any writeback). * * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ again: from->nofault = true; dio = btrfs_dio_write(iocb, from, written); from->nofault = false; if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio); } else { struct btrfs_file_private stack_private = { 0 }; struct btrfs_file_private *private; const bool have_private = (file->private_data != NULL); if (!have_private) file->private_data = &stack_private; /* * If we have a synchronous write, we must make sure the fsync * triggered by the iomap_dio_complete() call below doesn't * deadlock on the inode lock - we are already holding it and we * can't call it after unlocking because we may need to complete * partial writes due to the input buffer (or parts of it) not * being already faulted in. */ private = file->private_data; private->fsync_skip_inode_lock = true; ret = iomap_dio_complete(dio); private->fsync_skip_inode_lock = false; if (!have_private) file->private_data = NULL; } /* No increment (+=) because iomap returns a cumulative value. */ if (ret > 0) written = ret; if (iov_iter_count(from) > 0 && (ret == -EFAULT || ret > 0)) { const size_t left = iov_iter_count(from); /* * We have more data left to write. Try to fault in as many as * possible of the remainder pages and retry. We do this without * releasing and locking again the inode, to prevent races with * truncate. * * Also, in case the iov refers to pages in the file range of the * file we want to write to (due to a mmap), we could enter an * infinite loop if we retry after faulting the pages in, since * iomap will invalidate any pages in the range early on, before * it tries to fault in the pages of the iov. So we keep track of * how much was left of iov in the previous EFAULT and fallback * to buffered IO in case we haven't made any progress. */ if (left == prev_left) { ret = -ENOTBLK; } else { fault_in_iov_iter_readable(from, left); prev_left = left; goto again; } } btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); /* * If 'ret' is -ENOTBLK or we have not written all data, then it means * we must fallback to buffered IO. */ if ((ret < 0 && ret != -ENOTBLK) || !iov_iter_count(from)) goto out; buffered: /* * If we are in a NOWAIT context, then return -EAGAIN to signal the caller * it must retry the operation in a context where blocking is acceptable, * because even if we end up not blocking during the buffered IO attempt * below, we will block when flushing and waiting for the IO. */ if (iocb->ki_flags & IOCB_NOWAIT) { ret = -EAGAIN; goto out; } pos = iocb->ki_pos; written_buffered = btrfs_buffered_write(iocb, from); if (written_buffered < 0) { ret = written_buffered; goto out; } /* * Ensure all data is persisted. We want the next direct IO read to be * able to read what was just written. */ endbyte = pos + written_buffered - 1; ret = btrfs_fdatawrite_range(BTRFS_I(inode), pos, endbyte); if (ret) goto out; ret = filemap_fdatawait_range(inode->i_mapping, pos, endbyte); if (ret) goto out; written += written_buffered; iocb->ki_pos = pos + written_buffered; invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT, endbyte >> PAGE_SHIFT); out: return ret < 0 ? ret : written; } static int check_direct_read(struct btrfs_fs_info *fs_info, const struct iov_iter *iter, loff_t offset) { int ret; int i, seg; ret = check_direct_IO(fs_info, iter, offset); if (ret < 0) return ret; if (!iter_is_iovec(iter)) return 0; for (seg = 0; seg < iter->nr_segs; seg++) { for (i = seg + 1; i < iter->nr_segs; i++) { const struct iovec *iov1 = iter_iov(iter) + seg; const struct iovec *iov2 = iter_iov(iter) + i; if (iov1->iov_base == iov2->iov_base) return -EINVAL; } } return 0; } ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); size_t prev_left = 0; ssize_t read = 0; ssize_t ret; if (fsverity_active(inode)) return 0; if (check_direct_read(inode_to_fs_info(inode), to, iocb->ki_pos)) return 0; btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_SHARED); again: /* * This is similar to what we do for direct IO writes, see the comment * at btrfs_direct_write(), but we also disable page faults in addition * to disabling them only at the iov_iter level. This is because when * reading from a hole or prealloc extent, iomap calls iov_iter_zero(), * which can still trigger page fault ins despite having set ->nofault * to true of our 'to' iov_iter. * * The difference to direct IO writes is that we deadlock when trying * to lock the extent range in the inode's tree during he page reads * triggered by the fault in (while for writes it is due to waiting for * our own ordered extent). This is because for direct IO reads, * btrfs_dio_iomap_begin() returns with the extent range locked, which * is only unlocked in the endio callback (end_bio_extent_readpage()). */ pagefault_disable(); to->nofault = true; ret = btrfs_dio_read(iocb, to, read); to->nofault = false; pagefault_enable(); /* No increment (+=) because iomap returns a cumulative value. */ if (ret > 0) read = ret; if (iov_iter_count(to) > 0 && (ret == -EFAULT || ret > 0)) { const size_t left = iov_iter_count(to); if (left == prev_left) { /* * We didn't make any progress since the last attempt, * fallback to a buffered read for the remainder of the * range. This is just to avoid any possibility of looping * for too long. */ ret = read; } else { /* * We made some progress since the last retry or this is * the first time we are retrying. Fault in as many pages * as possible and retry. */ fault_in_iov_iter_writeable(to, left); prev_left = left; goto again; } } btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED); return ret < 0 ? ret : read; } int __init btrfs_init_dio(void) { if (bioset_init(&btrfs_dio_bioset, BIO_POOL_SIZE, offsetof(struct btrfs_dio_private, bbio.bio), BIOSET_NEED_BVECS)) return -ENOMEM; return 0; } void __cold btrfs_destroy_dio(void) { bioset_exit(&btrfs_dio_bioset); }
3 3 3 3 3 3 3 2 1 1 2 3 3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 // SPDX-License-Identifier: GPL-2.0-or-later /* * Driver for NXP PN533 NFC Chip - core functions * * Copyright (C) 2011 Instituto Nokia de Tecnologia * Copyright (C) 2012-2013 Tieto Poland */ #include <linux/device.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/nfc.h> #include <linux/netdevice.h> #include <net/nfc/nfc.h> #include "pn533.h" #define VERSION "0.3" /* How much time we spend listening for initiators */ #define PN533_LISTEN_TIME 2 /* Delay between each poll frame (ms) */ #define PN533_POLL_INTERVAL 10 /* structs for pn533 commands */ /* PN533_CMD_GET_FIRMWARE_VERSION */ struct pn533_fw_version { u8 ic; u8 ver; u8 rev; u8 support; }; /* PN533_CMD_RF_CONFIGURATION */ #define PN533_CFGITEM_RF_FIELD 0x01 #define PN533_CFGITEM_TIMING 0x02 #define PN533_CFGITEM_MAX_RETRIES 0x05 #define PN533_CFGITEM_PASORI 0x82 #define PN533_CFGITEM_RF_FIELD_AUTO_RFCA 0x2 #define PN533_CFGITEM_RF_FIELD_ON 0x1 #define PN533_CFGITEM_RF_FIELD_OFF 0x0 #define PN533_CONFIG_TIMING_102 0xb #define PN533_CONFIG_TIMING_204 0xc #define PN533_CONFIG_TIMING_409 0xd #define PN533_CONFIG_TIMING_819 0xe #define PN533_CONFIG_MAX_RETRIES_NO_RETRY 0x00 #define PN533_CONFIG_MAX_RETRIES_ENDLESS 0xFF struct pn533_config_max_retries { u8 mx_rty_atr; u8 mx_rty_psl; u8 mx_rty_passive_act; } __packed; struct pn533_config_timing { u8 rfu; u8 atr_res_timeout; u8 dep_timeout; } __packed; /* PN533_CMD_IN_LIST_PASSIVE_TARGET */ /* felica commands opcode */ #define PN533_FELICA_OPC_SENSF_REQ 0 #define PN533_FELICA_OPC_SENSF_RES 1 /* felica SENSF_REQ parameters */ #define PN533_FELICA_SENSF_SC_ALL 0xFFFF #define PN533_FELICA_SENSF_RC_NO_SYSTEM_CODE 0 #define PN533_FELICA_SENSF_RC_SYSTEM_CODE 1 #define PN533_FELICA_SENSF_RC_ADVANCED_PROTOCOL 2 /* type B initiator_data values */ #define PN533_TYPE_B_AFI_ALL_FAMILIES 0 #define PN533_TYPE_B_POLL_METHOD_TIMESLOT 0 #define PN533_TYPE_B_POLL_METHOD_PROBABILISTIC 1 union pn533_cmd_poll_initdata { struct { u8 afi; u8 polling_method; } __packed type_b; struct { u8 opcode; __be16 sc; u8 rc; u8 tsn; } __packed felica; }; struct pn533_poll_modulations { struct { u8 maxtg; u8 brty; union pn533_cmd_poll_initdata initiator_data; } __packed data; u8 len; }; static const struct pn533_poll_modulations poll_mod[] = { [PN533_POLL_MOD_106KBPS_A] = { .data = { .maxtg = 1, .brty = 0, }, .len = 2, }, [PN533_POLL_MOD_212KBPS_FELICA] = { .data = { .maxtg = 1, .brty = 1, .initiator_data.felica = { .opcode = PN533_FELICA_OPC_SENSF_REQ, .sc = PN533_FELICA_SENSF_SC_ALL, .rc = PN533_FELICA_SENSF_RC_SYSTEM_CODE, .tsn = 0x03, }, }, .len = 7, }, [PN533_POLL_MOD_424KBPS_FELICA] = { .data = { .maxtg = 1, .brty = 2, .initiator_data.felica = { .opcode = PN533_FELICA_OPC_SENSF_REQ, .sc = PN533_FELICA_SENSF_SC_ALL, .rc = PN533_FELICA_SENSF_RC_SYSTEM_CODE, .tsn = 0x03, }, }, .len = 7, }, [PN533_POLL_MOD_106KBPS_JEWEL] = { .data = { .maxtg = 1, .brty = 4, }, .len = 2, }, [PN533_POLL_MOD_847KBPS_B] = { .data = { .maxtg = 1, .brty = 8, .initiator_data.type_b = { .afi = PN533_TYPE_B_AFI_ALL_FAMILIES, .polling_method = PN533_TYPE_B_POLL_METHOD_TIMESLOT, }, }, .len = 3, }, [PN533_LISTEN_MOD] = { .len = 0, }, }; /* PN533_CMD_IN_ATR */ struct pn533_cmd_activate_response { u8 status; u8 nfcid3t[10]; u8 didt; u8 bst; u8 brt; u8 to; u8 ppt; /* optional */ u8 gt[]; } __packed; struct pn533_cmd_jump_dep_response { u8 status; u8 tg; u8 nfcid3t[10]; u8 didt; u8 bst; u8 brt; u8 to; u8 ppt; /* optional */ u8 gt[]; } __packed; struct pn532_autopoll_resp { u8 type; u8 ln; u8 tg; u8 tgdata[]; }; /* PN532_CMD_IN_AUTOPOLL */ #define PN532_AUTOPOLL_POLLNR_INFINITE 0xff #define PN532_AUTOPOLL_PERIOD 0x03 /* in units of 150 ms */ #define PN532_AUTOPOLL_TYPE_GENERIC_106 0x00 #define PN532_AUTOPOLL_TYPE_GENERIC_212 0x01 #define PN532_AUTOPOLL_TYPE_GENERIC_424 0x02 #define PN532_AUTOPOLL_TYPE_JEWEL 0x04 #define PN532_AUTOPOLL_TYPE_MIFARE 0x10 #define PN532_AUTOPOLL_TYPE_FELICA212 0x11 #define PN532_AUTOPOLL_TYPE_FELICA424 0x12 #define PN532_AUTOPOLL_TYPE_ISOA 0x20 #define PN532_AUTOPOLL_TYPE_ISOB 0x23 #define PN532_AUTOPOLL_TYPE_DEP_PASSIVE_106 0x40 #define PN532_AUTOPOLL_TYPE_DEP_PASSIVE_212 0x41 #define PN532_AUTOPOLL_TYPE_DEP_PASSIVE_424 0x42 #define PN532_AUTOPOLL_TYPE_DEP_ACTIVE_106 0x80 #define PN532_AUTOPOLL_TYPE_DEP_ACTIVE_212 0x81 #define PN532_AUTOPOLL_TYPE_DEP_ACTIVE_424 0x82 /* PN533_TG_INIT_AS_TARGET */ #define PN533_INIT_TARGET_PASSIVE 0x1 #define PN533_INIT_TARGET_DEP 0x2 #define PN533_INIT_TARGET_RESP_FRAME_MASK 0x3 #define PN533_INIT_TARGET_RESP_ACTIVE 0x1 #define PN533_INIT_TARGET_RESP_DEP 0x4 /* The rule: value(high byte) + value(low byte) + checksum = 0 */ static inline u8 pn533_ext_checksum(u16 value) { return ~(u8)(((value & 0xFF00) >> 8) + (u8)(value & 0xFF)) + 1; } /* The rule: value + checksum = 0 */ static inline u8 pn533_std_checksum(u8 value) { return ~value + 1; } /* The rule: sum(data elements) + checksum = 0 */ static u8 pn533_std_data_checksum(u8 *data, int datalen) { u8 sum = 0; int i; for (i = 0; i < datalen; i++) sum += data[i]; return pn533_std_checksum(sum); } static void pn533_std_tx_frame_init(void *_frame, u8 cmd_code) { struct pn533_std_frame *frame = _frame; frame->preamble = 0; frame->start_frame = cpu_to_be16(PN533_STD_FRAME_SOF); PN533_STD_FRAME_IDENTIFIER(frame) = PN533_STD_FRAME_DIR_OUT; PN533_FRAME_CMD(frame) = cmd_code; frame->datalen = 2; } static void pn533_std_tx_frame_finish(void *_frame) { struct pn533_std_frame *frame = _frame; frame->datalen_checksum = pn533_std_checksum(frame->datalen); PN533_STD_FRAME_CHECKSUM(frame) = pn533_std_data_checksum(frame->data, frame->datalen); PN533_STD_FRAME_POSTAMBLE(frame) = 0; } static void pn533_std_tx_update_payload_len(void *_frame, int len) { struct pn533_std_frame *frame = _frame; frame->datalen += len; } static bool pn533_std_rx_frame_is_valid(void *_frame, struct pn533 *dev) { u8 checksum; struct pn533_std_frame *stdf = _frame; if (stdf->start_frame != cpu_to_be16(PN533_STD_FRAME_SOF)) return false; if (likely(!PN533_STD_IS_EXTENDED(stdf))) { /* Standard frame code */ dev->ops->rx_header_len = PN533_STD_FRAME_HEADER_LEN; checksum = pn533_std_checksum(stdf->datalen); if (checksum != stdf->datalen_checksum) return false; checksum = pn533_std_data_checksum(stdf->data, stdf->datalen); if (checksum != PN533_STD_FRAME_CHECKSUM(stdf)) return false; } else { /* Extended */ struct pn533_ext_frame *eif = _frame; dev->ops->rx_header_len = PN533_EXT_FRAME_HEADER_LEN; checksum = pn533_ext_checksum(be16_to_cpu(eif->datalen)); if (checksum != eif->datalen_checksum) return false; /* check data checksum */ checksum = pn533_std_data_checksum(eif->data, be16_to_cpu(eif->datalen)); if (checksum != PN533_EXT_FRAME_CHECKSUM(eif)) return false; } return true; } bool pn533_rx_frame_is_ack(void *_frame) { struct pn533_std_frame *frame = _frame; if (frame->start_frame != cpu_to_be16(PN533_STD_FRAME_SOF)) return false; if (frame->datalen != 0 || frame->datalen_checksum != 0xFF) return false; return true; } EXPORT_SYMBOL_GPL(pn533_rx_frame_is_ack); static inline int pn533_std_rx_frame_size(void *frame) { struct pn533_std_frame *f = frame; /* check for Extended Information frame */ if (PN533_STD_IS_EXTENDED(f)) { struct pn533_ext_frame *eif = frame; return sizeof(struct pn533_ext_frame) + be16_to_cpu(eif->datalen) + PN533_STD_FRAME_TAIL_LEN; } return sizeof(struct pn533_std_frame) + f->datalen + PN533_STD_FRAME_TAIL_LEN; } static u8 pn533_std_get_cmd_code(void *frame) { struct pn533_std_frame *f = frame; struct pn533_ext_frame *eif = frame; if (PN533_STD_IS_EXTENDED(f)) return PN533_FRAME_CMD(eif); else return PN533_FRAME_CMD(f); } bool pn533_rx_frame_is_cmd_response(struct pn533 *dev, void *frame) { return (dev->ops->get_cmd_code(frame) == PN533_CMD_RESPONSE(dev->cmd->code)); } EXPORT_SYMBOL_GPL(pn533_rx_frame_is_cmd_response); static struct pn533_frame_ops pn533_std_frame_ops = { .tx_frame_init = pn533_std_tx_frame_init, .tx_frame_finish = pn533_std_tx_frame_finish, .tx_update_payload_len = pn533_std_tx_update_payload_len, .tx_header_len = PN533_STD_FRAME_HEADER_LEN, .tx_tail_len = PN533_STD_FRAME_TAIL_LEN, .rx_is_frame_valid = pn533_std_rx_frame_is_valid, .rx_frame_size = pn533_std_rx_frame_size, .rx_header_len = PN533_STD_FRAME_HEADER_LEN, .rx_tail_len = PN533_STD_FRAME_TAIL_LEN, .max_payload_len = PN533_STD_FRAME_MAX_PAYLOAD_LEN, .get_cmd_code = pn533_std_get_cmd_code, }; static void pn533_build_cmd_frame(struct pn533 *dev, u8 cmd_code, struct sk_buff *skb) { /* payload is already there, just update datalen */ int payload_len = skb->len; struct pn533_frame_ops *ops = dev->ops; skb_push(skb, ops->tx_header_len); skb_put(skb, ops->tx_tail_len); ops->tx_frame_init(skb->data, cmd_code); ops->tx_update_payload_len(skb->data, payload_len); ops->tx_frame_finish(skb->data); } static int pn533_send_async_complete(struct pn533 *dev) { struct pn533_cmd *cmd = dev->cmd; struct sk_buff *resp; int status, rc = 0; if (!cmd) { dev_dbg(dev->dev, "%s: cmd not set\n", __func__); goto done; } dev_kfree_skb(cmd->req); status = cmd->status; resp = cmd->resp; if (status < 0) { rc = cmd->complete_cb(dev, cmd->complete_cb_context, ERR_PTR(status)); dev_kfree_skb(resp); goto done; } /* when no response is set we got interrupted */ if (!resp) resp = ERR_PTR(-EINTR); if (!IS_ERR(resp)) { skb_pull(resp, dev->ops->rx_header_len); skb_trim(resp, resp->len - dev->ops->rx_tail_len); } rc = cmd->complete_cb(dev, cmd->complete_cb_context, resp); done: kfree(cmd); dev->cmd = NULL; return rc; } static int __pn533_send_async(struct pn533 *dev, u8 cmd_code, struct sk_buff *req, pn533_send_async_complete_t complete_cb, void *complete_cb_context) { struct pn533_cmd *cmd; int rc = 0; dev_dbg(dev->dev, "Sending command 0x%x\n", cmd_code); cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->code = cmd_code; cmd->req = req; cmd->complete_cb = complete_cb; cmd->complete_cb_context = complete_cb_context; pn533_build_cmd_frame(dev, cmd_code, req); mutex_lock(&dev->cmd_lock); if (!dev->cmd_pending) { dev->cmd = cmd; rc = dev->phy_ops->send_frame(dev, req); if (rc) { dev->cmd = NULL; goto error; } dev->cmd_pending = 1; goto unlock; } dev_dbg(dev->dev, "%s Queueing command 0x%x\n", __func__, cmd_code); INIT_LIST_HEAD(&cmd->queue); list_add_tail(&cmd->queue, &dev->cmd_queue); goto unlock; error: kfree(cmd); unlock: mutex_unlock(&dev->cmd_lock); return rc; } static int pn533_send_data_async(struct pn533 *dev, u8 cmd_code, struct sk_buff *req, pn533_send_async_complete_t complete_cb, void *complete_cb_context) { return __pn533_send_async(dev, cmd_code, req, complete_cb, complete_cb_context); } static int pn533_send_cmd_async(struct pn533 *dev, u8 cmd_code, struct sk_buff *req, pn533_send_async_complete_t complete_cb, void *complete_cb_context) { return __pn533_send_async(dev, cmd_code, req, complete_cb, complete_cb_context); } /* * pn533_send_cmd_direct_async * * The function sends a priority cmd directly to the chip omitting the cmd * queue. It's intended to be used by chaining mechanism of received responses * where the host has to request every single chunk of data before scheduling * next cmd from the queue. */ static int pn533_send_cmd_direct_async(struct pn533 *dev, u8 cmd_code, struct sk_buff *req, pn533_send_async_complete_t complete_cb, void *complete_cb_context) { struct pn533_cmd *cmd; int rc; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; cmd->code = cmd_code; cmd->req = req; cmd->complete_cb = complete_cb; cmd->complete_cb_context = complete_cb_context; pn533_build_cmd_frame(dev, cmd_code, req); dev->cmd = cmd; rc = dev->phy_ops->send_frame(dev, req); if (rc < 0) { dev->cmd = NULL; kfree(cmd); } return rc; } static void pn533_wq_cmd_complete(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, cmd_complete_work); int rc; rc = pn533_send_async_complete(dev); if (rc != -EINPROGRESS) queue_work(dev->wq, &dev->cmd_work); } static void pn533_wq_cmd(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, cmd_work); struct pn533_cmd *cmd; int rc; mutex_lock(&dev->cmd_lock); if (list_empty(&dev->cmd_queue)) { dev->cmd_pending = 0; mutex_unlock(&dev->cmd_lock); return; } cmd = list_first_entry(&dev->cmd_queue, struct pn533_cmd, queue); list_del(&cmd->queue); mutex_unlock(&dev->cmd_lock); dev->cmd = cmd; rc = dev->phy_ops->send_frame(dev, cmd->req); if (rc < 0) { dev->cmd = NULL; dev_kfree_skb(cmd->req); kfree(cmd); return; } } struct pn533_sync_cmd_response { struct sk_buff *resp; struct completion done; }; static int pn533_send_sync_complete(struct pn533 *dev, void *_arg, struct sk_buff *resp) { struct pn533_sync_cmd_response *arg = _arg; arg->resp = resp; complete(&arg->done); return 0; } /* pn533_send_cmd_sync * * Please note the req parameter is freed inside the function to * limit a number of return value interpretations by the caller. * * 1. negative in case of error during TX path -> req should be freed * * 2. negative in case of error during RX path -> req should not be freed * as it's been already freed at the beginning of RX path by * async_complete_cb. * * 3. valid pointer in case of successful RX path * * A caller has to check a return value with IS_ERR macro. If the test pass, * the returned pointer is valid. * */ static struct sk_buff *pn533_send_cmd_sync(struct pn533 *dev, u8 cmd_code, struct sk_buff *req) { int rc; struct pn533_sync_cmd_response arg; init_completion(&arg.done); rc = pn533_send_cmd_async(dev, cmd_code, req, pn533_send_sync_complete, &arg); if (rc) { dev_kfree_skb(req); return ERR_PTR(rc); } wait_for_completion(&arg.done); return arg.resp; } static struct sk_buff *pn533_alloc_skb(struct pn533 *dev, unsigned int size) { struct sk_buff *skb; skb = alloc_skb(dev->ops->tx_header_len + size + dev->ops->tx_tail_len, GFP_KERNEL); if (skb) skb_reserve(skb, dev->ops->tx_header_len); return skb; } struct pn533_target_type_a { __be16 sens_res; u8 sel_res; u8 nfcid_len; u8 nfcid_data[]; } __packed; #define PN533_TYPE_A_SENS_RES_NFCID1(x) ((u8)((be16_to_cpu(x) & 0x00C0) >> 6)) #define PN533_TYPE_A_SENS_RES_SSD(x) ((u8)((be16_to_cpu(x) & 0x001F) >> 0)) #define PN533_TYPE_A_SENS_RES_PLATCONF(x) ((u8)((be16_to_cpu(x) & 0x0F00) >> 8)) #define PN533_TYPE_A_SENS_RES_SSD_JEWEL 0x00 #define PN533_TYPE_A_SENS_RES_PLATCONF_JEWEL 0x0C #define PN533_TYPE_A_SEL_PROT(x) (((x) & 0x60) >> 5) #define PN533_TYPE_A_SEL_CASCADE(x) (((x) & 0x04) >> 2) #define PN533_TYPE_A_SEL_PROT_MIFARE 0 #define PN533_TYPE_A_SEL_PROT_ISO14443 1 #define PN533_TYPE_A_SEL_PROT_DEP 2 #define PN533_TYPE_A_SEL_PROT_ISO14443_DEP 3 static bool pn533_target_type_a_is_valid(struct pn533_target_type_a *type_a, int target_data_len) { u8 ssd; u8 platconf; if (target_data_len < sizeof(struct pn533_target_type_a)) return false; /* * The length check of nfcid[] and ats[] are not being performed because * the values are not being used */ /* Requirement 4.6.3.3 from NFC Forum Digital Spec */ ssd = PN533_TYPE_A_SENS_RES_SSD(type_a->sens_res); platconf = PN533_TYPE_A_SENS_RES_PLATCONF(type_a->sens_res); if ((ssd == PN533_TYPE_A_SENS_RES_SSD_JEWEL && platconf != PN533_TYPE_A_SENS_RES_PLATCONF_JEWEL) || (ssd != PN533_TYPE_A_SENS_RES_SSD_JEWEL && platconf == PN533_TYPE_A_SENS_RES_PLATCONF_JEWEL)) return false; /* Requirements 4.8.2.1, 4.8.2.3, 4.8.2.5 and 4.8.2.7 from NFC Forum */ if (PN533_TYPE_A_SEL_CASCADE(type_a->sel_res) != 0) return false; if (type_a->nfcid_len > NFC_NFCID1_MAXSIZE) return false; return true; } static int pn533_target_found_type_a(struct nfc_target *nfc_tgt, u8 *tgt_data, int tgt_data_len) { struct pn533_target_type_a *tgt_type_a; tgt_type_a = (struct pn533_target_type_a *)tgt_data; if (!pn533_target_type_a_is_valid(tgt_type_a, tgt_data_len)) return -EPROTO; switch (PN533_TYPE_A_SEL_PROT(tgt_type_a->sel_res)) { case PN533_TYPE_A_SEL_PROT_MIFARE: nfc_tgt->supported_protocols = NFC_PROTO_MIFARE_MASK; break; case PN533_TYPE_A_SEL_PROT_ISO14443: nfc_tgt->supported_protocols = NFC_PROTO_ISO14443_MASK; break; case PN533_TYPE_A_SEL_PROT_DEP: nfc_tgt->supported_protocols = NFC_PROTO_NFC_DEP_MASK; break; case PN533_TYPE_A_SEL_PROT_ISO14443_DEP: nfc_tgt->supported_protocols = NFC_PROTO_ISO14443_MASK | NFC_PROTO_NFC_DEP_MASK; break; } nfc_tgt->sens_res = be16_to_cpu(tgt_type_a->sens_res); nfc_tgt->sel_res = tgt_type_a->sel_res; nfc_tgt->nfcid1_len = tgt_type_a->nfcid_len; memcpy(nfc_tgt->nfcid1, tgt_type_a->nfcid_data, nfc_tgt->nfcid1_len); return 0; } struct pn533_target_felica { u8 pol_res; u8 opcode; u8 nfcid2[NFC_NFCID2_MAXSIZE]; u8 pad[8]; /* optional */ u8 syst_code[]; } __packed; #define PN533_FELICA_SENSF_NFCID2_DEP_B1 0x01 #define PN533_FELICA_SENSF_NFCID2_DEP_B2 0xFE static bool pn533_target_felica_is_valid(struct pn533_target_felica *felica, int target_data_len) { if (target_data_len < sizeof(struct pn533_target_felica)) return false; if (felica->opcode != PN533_FELICA_OPC_SENSF_RES) return false; return true; } static int pn533_target_found_felica(struct nfc_target *nfc_tgt, u8 *tgt_data, int tgt_data_len) { struct pn533_target_felica *tgt_felica; tgt_felica = (struct pn533_target_felica *)tgt_data; if (!pn533_target_felica_is_valid(tgt_felica, tgt_data_len)) return -EPROTO; if ((tgt_felica->nfcid2[0] == PN533_FELICA_SENSF_NFCID2_DEP_B1) && (tgt_felica->nfcid2[1] == PN533_FELICA_SENSF_NFCID2_DEP_B2)) nfc_tgt->supported_protocols = NFC_PROTO_NFC_DEP_MASK; else nfc_tgt->supported_protocols = NFC_PROTO_FELICA_MASK; memcpy(nfc_tgt->sensf_res, &tgt_felica->opcode, 9); nfc_tgt->sensf_res_len = 9; memcpy(nfc_tgt->nfcid2, tgt_felica->nfcid2, NFC_NFCID2_MAXSIZE); nfc_tgt->nfcid2_len = NFC_NFCID2_MAXSIZE; return 0; } struct pn533_target_jewel { __be16 sens_res; u8 jewelid[4]; } __packed; static bool pn533_target_jewel_is_valid(struct pn533_target_jewel *jewel, int target_data_len) { u8 ssd; u8 platconf; if (target_data_len < sizeof(struct pn533_target_jewel)) return false; /* Requirement 4.6.3.3 from NFC Forum Digital Spec */ ssd = PN533_TYPE_A_SENS_RES_SSD(jewel->sens_res); platconf = PN533_TYPE_A_SENS_RES_PLATCONF(jewel->sens_res); if ((ssd == PN533_TYPE_A_SENS_RES_SSD_JEWEL && platconf != PN533_TYPE_A_SENS_RES_PLATCONF_JEWEL) || (ssd != PN533_TYPE_A_SENS_RES_SSD_JEWEL && platconf == PN533_TYPE_A_SENS_RES_PLATCONF_JEWEL)) return false; return true; } static int pn533_target_found_jewel(struct nfc_target *nfc_tgt, u8 *tgt_data, int tgt_data_len) { struct pn533_target_jewel *tgt_jewel; tgt_jewel = (struct pn533_target_jewel *)tgt_data; if (!pn533_target_jewel_is_valid(tgt_jewel, tgt_data_len)) return -EPROTO; nfc_tgt->supported_protocols = NFC_PROTO_JEWEL_MASK; nfc_tgt->sens_res = be16_to_cpu(tgt_jewel->sens_res); nfc_tgt->nfcid1_len = 4; memcpy(nfc_tgt->nfcid1, tgt_jewel->jewelid, nfc_tgt->nfcid1_len); return 0; } struct pn533_type_b_prot_info { u8 bitrate; u8 fsci_type; u8 fwi_adc_fo; } __packed; #define PN533_TYPE_B_PROT_FCSI(x) (((x) & 0xF0) >> 4) #define PN533_TYPE_B_PROT_TYPE(x) (((x) & 0x0F) >> 0) #define PN533_TYPE_B_PROT_TYPE_RFU_MASK 0x8 struct pn533_type_b_sens_res { u8 opcode; u8 nfcid[4]; u8 appdata[4]; struct pn533_type_b_prot_info prot_info; } __packed; #define PN533_TYPE_B_OPC_SENSB_RES 0x50 struct pn533_target_type_b { struct pn533_type_b_sens_res sensb_res; u8 attrib_res_len; u8 attrib_res[]; } __packed; static bool pn533_target_type_b_is_valid(struct pn533_target_type_b *type_b, int target_data_len) { if (target_data_len < sizeof(struct pn533_target_type_b)) return false; if (type_b->sensb_res.opcode != PN533_TYPE_B_OPC_SENSB_RES) return false; if (PN533_TYPE_B_PROT_TYPE(type_b->sensb_res.prot_info.fsci_type) & PN533_TYPE_B_PROT_TYPE_RFU_MASK) return false; return true; } static int pn533_target_found_type_b(struct nfc_target *nfc_tgt, u8 *tgt_data, int tgt_data_len) { struct pn533_target_type_b *tgt_type_b; tgt_type_b = (struct pn533_target_type_b *)tgt_data; if (!pn533_target_type_b_is_valid(tgt_type_b, tgt_data_len)) return -EPROTO; nfc_tgt->supported_protocols = NFC_PROTO_ISO14443_B_MASK; return 0; } static void pn533_poll_reset_mod_list(struct pn533 *dev); static int pn533_target_found(struct pn533 *dev, u8 tg, u8 *tgdata, int tgdata_len) { struct nfc_target nfc_tgt; int rc; dev_dbg(dev->dev, "%s: modulation=%d\n", __func__, dev->poll_mod_curr); if (tg != 1) return -EPROTO; memset(&nfc_tgt, 0, sizeof(struct nfc_target)); switch (dev->poll_mod_curr) { case PN533_POLL_MOD_106KBPS_A: rc = pn533_target_found_type_a(&nfc_tgt, tgdata, tgdata_len); break; case PN533_POLL_MOD_212KBPS_FELICA: case PN533_POLL_MOD_424KBPS_FELICA: rc = pn533_target_found_felica(&nfc_tgt, tgdata, tgdata_len); break; case PN533_POLL_MOD_106KBPS_JEWEL: rc = pn533_target_found_jewel(&nfc_tgt, tgdata, tgdata_len); break; case PN533_POLL_MOD_847KBPS_B: rc = pn533_target_found_type_b(&nfc_tgt, tgdata, tgdata_len); break; default: nfc_err(dev->dev, "Unknown current poll modulation\n"); return -EPROTO; } if (rc) return rc; if (!(nfc_tgt.supported_protocols & dev->poll_protocols)) { dev_dbg(dev->dev, "The Tg found doesn't have the desired protocol\n"); return -EAGAIN; } dev_dbg(dev->dev, "Target found - supported protocols: 0x%x\n", nfc_tgt.supported_protocols); dev->tgt_available_prots = nfc_tgt.supported_protocols; pn533_poll_reset_mod_list(dev); nfc_targets_found(dev->nfc_dev, &nfc_tgt, 1); return 0; } static inline void pn533_poll_next_mod(struct pn533 *dev) { dev->poll_mod_curr = (dev->poll_mod_curr + 1) % dev->poll_mod_count; } static void pn533_poll_reset_mod_list(struct pn533 *dev) { dev->poll_mod_count = 0; } static void pn533_poll_add_mod(struct pn533 *dev, u8 mod_index) { dev->poll_mod_active[dev->poll_mod_count] = (struct pn533_poll_modulations *)&poll_mod[mod_index]; dev->poll_mod_count++; } static void pn533_poll_create_mod_list(struct pn533 *dev, u32 im_protocols, u32 tm_protocols) { pn533_poll_reset_mod_list(dev); if ((im_protocols & NFC_PROTO_MIFARE_MASK) || (im_protocols & NFC_PROTO_ISO14443_MASK) || (im_protocols & NFC_PROTO_NFC_DEP_MASK)) pn533_poll_add_mod(dev, PN533_POLL_MOD_106KBPS_A); if (im_protocols & NFC_PROTO_FELICA_MASK || im_protocols & NFC_PROTO_NFC_DEP_MASK) { pn533_poll_add_mod(dev, PN533_POLL_MOD_212KBPS_FELICA); pn533_poll_add_mod(dev, PN533_POLL_MOD_424KBPS_FELICA); } if (im_protocols & NFC_PROTO_JEWEL_MASK) pn533_poll_add_mod(dev, PN533_POLL_MOD_106KBPS_JEWEL); if (im_protocols & NFC_PROTO_ISO14443_B_MASK) pn533_poll_add_mod(dev, PN533_POLL_MOD_847KBPS_B); if (tm_protocols) pn533_poll_add_mod(dev, PN533_LISTEN_MOD); } static int pn533_start_poll_complete(struct pn533 *dev, struct sk_buff *resp) { u8 nbtg, tg, *tgdata; int rc, tgdata_len; /* Toggle the DEP polling */ if (dev->poll_protocols & NFC_PROTO_NFC_DEP_MASK) dev->poll_dep = 1; nbtg = resp->data[0]; tg = resp->data[1]; tgdata = &resp->data[2]; tgdata_len = resp->len - 2; /* nbtg + tg */ if (nbtg) { rc = pn533_target_found(dev, tg, tgdata, tgdata_len); /* We must stop the poll after a valid target found */ if (rc == 0) return 0; } return -EAGAIN; } static struct sk_buff *pn533_alloc_poll_tg_frame(struct pn533 *dev) { struct sk_buff *skb; u8 *felica, *nfcid3; u8 *gbytes = dev->gb; size_t gbytes_len = dev->gb_len; u8 felica_params[18] = {0x1, 0xfe, /* DEP */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, /* random */ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff}; /* System code */ u8 mifare_params[6] = {0x1, 0x1, /* SENS_RES */ 0x0, 0x0, 0x0, 0x40}; /* SEL_RES for DEP */ unsigned int skb_len = 36 + /* * mode (1), mifare (6), * felica (18), nfcid3 (10), gb_len (1) */ gbytes_len + 1; /* len Tk*/ skb = pn533_alloc_skb(dev, skb_len); if (!skb) return NULL; /* DEP support only */ skb_put_u8(skb, PN533_INIT_TARGET_DEP); /* MIFARE params */ skb_put_data(skb, mifare_params, 6); /* Felica params */ felica = skb_put_data(skb, felica_params, 18); get_random_bytes(felica + 2, 6); /* NFCID3 */ nfcid3 = skb_put_zero(skb, 10); memcpy(nfcid3, felica, 8); /* General bytes */ skb_put_u8(skb, gbytes_len); skb_put_data(skb, gbytes, gbytes_len); /* Len Tk */ skb_put_u8(skb, 0); return skb; } static void pn533_wq_tm_mi_recv(struct work_struct *work); static struct sk_buff *pn533_build_response(struct pn533 *dev); static int pn533_tm_get_data_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { struct sk_buff *skb; u8 status, ret, mi; int rc; if (IS_ERR(resp)) { skb_queue_purge(&dev->resp_q); return PTR_ERR(resp); } status = resp->data[0]; ret = status & PN533_CMD_RET_MASK; mi = status & PN533_CMD_MI_MASK; skb_pull(resp, sizeof(status)); if (ret != PN533_CMD_RET_SUCCESS) { rc = -EIO; goto error; } skb_queue_tail(&dev->resp_q, resp); if (mi) { queue_work(dev->wq, &dev->mi_tm_rx_work); return -EINPROGRESS; } skb = pn533_build_response(dev); if (!skb) { rc = -EIO; goto error; } return nfc_tm_data_received(dev->nfc_dev, skb); error: nfc_tm_deactivated(dev->nfc_dev); dev->tgt_mode = 0; skb_queue_purge(&dev->resp_q); dev_kfree_skb(resp); return rc; } static void pn533_wq_tm_mi_recv(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, mi_tm_rx_work); struct sk_buff *skb; int rc; skb = pn533_alloc_skb(dev, 0); if (!skb) return; rc = pn533_send_cmd_direct_async(dev, PN533_CMD_TG_GET_DATA, skb, pn533_tm_get_data_complete, NULL); if (rc < 0) dev_kfree_skb(skb); } static int pn533_tm_send_complete(struct pn533 *dev, void *arg, struct sk_buff *resp); static void pn533_wq_tm_mi_send(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, mi_tm_tx_work); struct sk_buff *skb; int rc; /* Grab the first skb in the queue */ skb = skb_dequeue(&dev->fragment_skb); if (skb == NULL) { /* No more data */ /* Reset the queue for future use */ skb_queue_head_init(&dev->fragment_skb); goto error; } /* last entry - remove MI bit */ if (skb_queue_len(&dev->fragment_skb) == 0) { rc = pn533_send_cmd_direct_async(dev, PN533_CMD_TG_SET_DATA, skb, pn533_tm_send_complete, NULL); } else rc = pn533_send_cmd_direct_async(dev, PN533_CMD_TG_SET_META_DATA, skb, pn533_tm_send_complete, NULL); if (rc == 0) /* success */ return; dev_err(dev->dev, "Error %d when trying to perform set meta data_exchange", rc); dev_kfree_skb(skb); error: dev->phy_ops->send_ack(dev, GFP_KERNEL); queue_work(dev->wq, &dev->cmd_work); } static void pn533_wq_tg_get_data(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, tg_work); struct sk_buff *skb; int rc; skb = pn533_alloc_skb(dev, 0); if (!skb) return; rc = pn533_send_data_async(dev, PN533_CMD_TG_GET_DATA, skb, pn533_tm_get_data_complete, NULL); if (rc < 0) dev_kfree_skb(skb); } #define ATR_REQ_GB_OFFSET 17 static int pn533_init_target_complete(struct pn533 *dev, struct sk_buff *resp) { u8 mode, *cmd, comm_mode = NFC_COMM_PASSIVE, *gb; size_t gb_len; int rc; if (resp->len < ATR_REQ_GB_OFFSET + 1) return -EINVAL; mode = resp->data[0]; cmd = &resp->data[1]; dev_dbg(dev->dev, "Target mode 0x%x len %d\n", mode, resp->len); if ((mode & PN533_INIT_TARGET_RESP_FRAME_MASK) == PN533_INIT_TARGET_RESP_ACTIVE) comm_mode = NFC_COMM_ACTIVE; if ((mode & PN533_INIT_TARGET_RESP_DEP) == 0) /* Only DEP supported */ return -EOPNOTSUPP; gb = cmd + ATR_REQ_GB_OFFSET; gb_len = resp->len - (ATR_REQ_GB_OFFSET + 1); rc = nfc_tm_activated(dev->nfc_dev, NFC_PROTO_NFC_DEP_MASK, comm_mode, gb, gb_len); if (rc < 0) { nfc_err(dev->dev, "Error when signaling target activation\n"); return rc; } dev->tgt_mode = 1; queue_work(dev->wq, &dev->tg_work); return 0; } static void pn533_listen_mode_timer(struct timer_list *t) { struct pn533 *dev = from_timer(dev, t, listen_timer); dev->cancel_listen = 1; pn533_poll_next_mod(dev); queue_delayed_work(dev->wq, &dev->poll_work, msecs_to_jiffies(PN533_POLL_INTERVAL)); } static int pn533_rf_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { int rc = 0; if (IS_ERR(resp)) { rc = PTR_ERR(resp); nfc_err(dev->dev, "RF setting error %d\n", rc); return rc; } queue_delayed_work(dev->wq, &dev->poll_work, msecs_to_jiffies(PN533_POLL_INTERVAL)); dev_kfree_skb(resp); return rc; } static void pn533_wq_rf(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, rf_work); struct sk_buff *skb; int rc; skb = pn533_alloc_skb(dev, 2); if (!skb) return; skb_put_u8(skb, PN533_CFGITEM_RF_FIELD); skb_put_u8(skb, PN533_CFGITEM_RF_FIELD_AUTO_RFCA); rc = pn533_send_cmd_async(dev, PN533_CMD_RF_CONFIGURATION, skb, pn533_rf_complete, NULL); if (rc < 0) { dev_kfree_skb(skb); nfc_err(dev->dev, "RF setting error %d\n", rc); } } static int pn533_poll_dep_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { struct pn533_cmd_jump_dep_response *rsp; struct nfc_target nfc_target; u8 target_gt_len; int rc; if (IS_ERR(resp)) return PTR_ERR(resp); memset(&nfc_target, 0, sizeof(struct nfc_target)); rsp = (struct pn533_cmd_jump_dep_response *)resp->data; rc = rsp->status & PN533_CMD_RET_MASK; if (rc != PN533_CMD_RET_SUCCESS) { /* Not target found, turn radio off */ queue_work(dev->wq, &dev->rf_work); dev_kfree_skb(resp); return 0; } dev_dbg(dev->dev, "Creating new target"); nfc_target.supported_protocols = NFC_PROTO_NFC_DEP_MASK; nfc_target.nfcid1_len = 10; memcpy(nfc_target.nfcid1, rsp->nfcid3t, nfc_target.nfcid1_len); rc = nfc_targets_found(dev->nfc_dev, &nfc_target, 1); if (rc) goto error; dev->tgt_available_prots = 0; dev->tgt_active_prot = NFC_PROTO_NFC_DEP; /* ATR_RES general bytes are located at offset 17 */ target_gt_len = resp->len - 17; rc = nfc_set_remote_general_bytes(dev->nfc_dev, rsp->gt, target_gt_len); if (!rc) { rc = nfc_dep_link_is_up(dev->nfc_dev, dev->nfc_dev->targets[0].idx, 0, NFC_RF_INITIATOR); if (!rc) pn533_poll_reset_mod_list(dev); } error: dev_kfree_skb(resp); return rc; } #define PASSIVE_DATA_LEN 5 static int pn533_poll_dep(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct sk_buff *skb; int rc, skb_len; u8 *next, nfcid3[NFC_NFCID3_MAXSIZE]; u8 passive_data[PASSIVE_DATA_LEN] = {0x00, 0xff, 0xff, 0x00, 0x3}; if (!dev->gb) { dev->gb = nfc_get_local_general_bytes(nfc_dev, &dev->gb_len); if (!dev->gb || !dev->gb_len) { dev->poll_dep = 0; queue_work(dev->wq, &dev->rf_work); } } skb_len = 3 + dev->gb_len; /* ActPass + BR + Next */ skb_len += PASSIVE_DATA_LEN; /* NFCID3 */ skb_len += NFC_NFCID3_MAXSIZE; nfcid3[0] = 0x1; nfcid3[1] = 0xfe; get_random_bytes(nfcid3 + 2, 6); skb = pn533_alloc_skb(dev, skb_len); if (!skb) return -ENOMEM; skb_put_u8(skb, 0x01); /* Active */ skb_put_u8(skb, 0x02); /* 424 kbps */ next = skb_put(skb, 1); /* Next */ *next = 0; /* Copy passive data */ skb_put_data(skb, passive_data, PASSIVE_DATA_LEN); *next |= 1; /* Copy NFCID3 (which is NFCID2 from SENSF_RES) */ skb_put_data(skb, nfcid3, NFC_NFCID3_MAXSIZE); *next |= 2; skb_put_data(skb, dev->gb, dev->gb_len); *next |= 4; /* We have some Gi */ rc = pn533_send_cmd_async(dev, PN533_CMD_IN_JUMP_FOR_DEP, skb, pn533_poll_dep_complete, NULL); if (rc < 0) dev_kfree_skb(skb); return rc; } static int pn533_autopoll_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { struct pn532_autopoll_resp *apr; struct nfc_target nfc_tgt; u8 nbtg; int rc; if (IS_ERR(resp)) { rc = PTR_ERR(resp); nfc_err(dev->dev, "%s autopoll complete error %d\n", __func__, rc); if (rc == -ENOENT) { if (dev->poll_mod_count != 0) return rc; goto stop_poll; } else if (rc < 0) { nfc_err(dev->dev, "Error %d when running autopoll\n", rc); goto stop_poll; } } nbtg = resp->data[0]; if ((nbtg > 2) || (nbtg <= 0)) return -EAGAIN; apr = (struct pn532_autopoll_resp *)&resp->data[1]; while (nbtg--) { memset(&nfc_tgt, 0, sizeof(struct nfc_target)); switch (apr->type) { case PN532_AUTOPOLL_TYPE_ISOA: dev_dbg(dev->dev, "ISOA\n"); rc = pn533_target_found_type_a(&nfc_tgt, apr->tgdata, apr->ln - 1); break; case PN532_AUTOPOLL_TYPE_FELICA212: case PN532_AUTOPOLL_TYPE_FELICA424: dev_dbg(dev->dev, "FELICA\n"); rc = pn533_target_found_felica(&nfc_tgt, apr->tgdata, apr->ln - 1); break; case PN532_AUTOPOLL_TYPE_JEWEL: dev_dbg(dev->dev, "JEWEL\n"); rc = pn533_target_found_jewel(&nfc_tgt, apr->tgdata, apr->ln - 1); break; case PN532_AUTOPOLL_TYPE_ISOB: dev_dbg(dev->dev, "ISOB\n"); rc = pn533_target_found_type_b(&nfc_tgt, apr->tgdata, apr->ln - 1); break; case PN532_AUTOPOLL_TYPE_MIFARE: dev_dbg(dev->dev, "Mifare\n"); rc = pn533_target_found_type_a(&nfc_tgt, apr->tgdata, apr->ln - 1); break; default: nfc_err(dev->dev, "Unknown current poll modulation\n"); rc = -EPROTO; } if (rc) goto done; if (!(nfc_tgt.supported_protocols & dev->poll_protocols)) { nfc_err(dev->dev, "The Tg found doesn't have the desired protocol\n"); rc = -EAGAIN; goto done; } dev->tgt_available_prots = nfc_tgt.supported_protocols; apr = (struct pn532_autopoll_resp *) (apr->tgdata + (apr->ln - 1)); } pn533_poll_reset_mod_list(dev); nfc_targets_found(dev->nfc_dev, &nfc_tgt, 1); done: dev_kfree_skb(resp); return rc; stop_poll: nfc_err(dev->dev, "autopoll operation has been stopped\n"); pn533_poll_reset_mod_list(dev); dev->poll_protocols = 0; return rc; } static int pn533_poll_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { struct pn533_poll_modulations *cur_mod; int rc; if (IS_ERR(resp)) { rc = PTR_ERR(resp); nfc_err(dev->dev, "%s Poll complete error %d\n", __func__, rc); if (rc == -ENOENT) { if (dev->poll_mod_count != 0) return rc; goto stop_poll; } else if (rc < 0) { nfc_err(dev->dev, "Error %d when running poll\n", rc); goto stop_poll; } } cur_mod = dev->poll_mod_active[dev->poll_mod_curr]; if (cur_mod->len == 0) { /* Target mode */ del_timer(&dev->listen_timer); rc = pn533_init_target_complete(dev, resp); goto done; } /* Initiator mode */ rc = pn533_start_poll_complete(dev, resp); if (!rc) goto done; if (!dev->poll_mod_count) { dev_dbg(dev->dev, "Polling has been stopped\n"); goto done; } pn533_poll_next_mod(dev); /* Not target found, turn radio off */ queue_work(dev->wq, &dev->rf_work); done: dev_kfree_skb(resp); return rc; stop_poll: nfc_err(dev->dev, "Polling operation has been stopped\n"); pn533_poll_reset_mod_list(dev); dev->poll_protocols = 0; return rc; } static struct sk_buff *pn533_alloc_poll_in_frame(struct pn533 *dev, struct pn533_poll_modulations *mod) { struct sk_buff *skb; skb = pn533_alloc_skb(dev, mod->len); if (!skb) return NULL; skb_put_data(skb, &mod->data, mod->len); return skb; } static int pn533_send_poll_frame(struct pn533 *dev) { struct pn533_poll_modulations *mod; struct sk_buff *skb; int rc; u8 cmd_code; mod = dev->poll_mod_active[dev->poll_mod_curr]; dev_dbg(dev->dev, "%s mod len %d\n", __func__, mod->len); if ((dev->poll_protocols & NFC_PROTO_NFC_DEP_MASK) && dev->poll_dep) { dev->poll_dep = 0; return pn533_poll_dep(dev->nfc_dev); } if (mod->len == 0) { /* Listen mode */ cmd_code = PN533_CMD_TG_INIT_AS_TARGET; skb = pn533_alloc_poll_tg_frame(dev); } else { /* Polling mode */ cmd_code = PN533_CMD_IN_LIST_PASSIVE_TARGET; skb = pn533_alloc_poll_in_frame(dev, mod); } if (!skb) { nfc_err(dev->dev, "Failed to allocate skb\n"); return -ENOMEM; } rc = pn533_send_cmd_async(dev, cmd_code, skb, pn533_poll_complete, NULL); if (rc < 0) { dev_kfree_skb(skb); nfc_err(dev->dev, "Polling loop error %d\n", rc); } return rc; } static void pn533_wq_poll(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, poll_work.work); struct pn533_poll_modulations *cur_mod; int rc; cur_mod = dev->poll_mod_active[dev->poll_mod_curr]; dev_dbg(dev->dev, "%s cancel_listen %d modulation len %d\n", __func__, dev->cancel_listen, cur_mod->len); if (dev->cancel_listen == 1) { dev->cancel_listen = 0; dev->phy_ops->abort_cmd(dev, GFP_ATOMIC); } rc = pn533_send_poll_frame(dev); if (rc) return; if (cur_mod->len == 0 && dev->poll_mod_count > 1) mod_timer(&dev->listen_timer, jiffies + PN533_LISTEN_TIME * HZ); } static int pn533_start_poll(struct nfc_dev *nfc_dev, u32 im_protocols, u32 tm_protocols) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct pn533_poll_modulations *cur_mod; struct sk_buff *skb; u8 rand_mod; int rc; dev_dbg(dev->dev, "%s: im protocols 0x%x tm protocols 0x%x\n", __func__, im_protocols, tm_protocols); if (dev->tgt_active_prot) { nfc_err(dev->dev, "Cannot poll with a target already activated\n"); return -EBUSY; } if (dev->tgt_mode) { nfc_err(dev->dev, "Cannot poll while already being activated\n"); return -EBUSY; } if (tm_protocols) { dev->gb = nfc_get_local_general_bytes(nfc_dev, &dev->gb_len); if (dev->gb == NULL) tm_protocols = 0; } dev->poll_protocols = im_protocols; dev->listen_protocols = tm_protocols; if (dev->device_type == PN533_DEVICE_PN532_AUTOPOLL) { skb = pn533_alloc_skb(dev, 4 + 6); if (!skb) return -ENOMEM; *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_POLLNR_INFINITE; *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_PERIOD; if ((im_protocols & NFC_PROTO_MIFARE_MASK) && (im_protocols & NFC_PROTO_ISO14443_MASK) && (im_protocols & NFC_PROTO_NFC_DEP_MASK)) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_GENERIC_106; else { if (im_protocols & NFC_PROTO_MIFARE_MASK) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_MIFARE; if (im_protocols & NFC_PROTO_ISO14443_MASK) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_ISOA; if (im_protocols & NFC_PROTO_NFC_DEP_MASK) { *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_DEP_PASSIVE_106; *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_DEP_PASSIVE_212; *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_DEP_PASSIVE_424; } } if (im_protocols & NFC_PROTO_FELICA_MASK || im_protocols & NFC_PROTO_NFC_DEP_MASK) { *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_FELICA212; *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_FELICA424; } if (im_protocols & NFC_PROTO_JEWEL_MASK) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_JEWEL; if (im_protocols & NFC_PROTO_ISO14443_B_MASK) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_ISOB; if (tm_protocols) *((u8 *)skb_put(skb, sizeof(u8))) = PN532_AUTOPOLL_TYPE_DEP_ACTIVE_106; rc = pn533_send_cmd_async(dev, PN533_CMD_IN_AUTOPOLL, skb, pn533_autopoll_complete, NULL); if (rc < 0) dev_kfree_skb(skb); else dev->poll_mod_count++; return rc; } pn533_poll_create_mod_list(dev, im_protocols, tm_protocols); /* Do not always start polling from the same modulation */ get_random_bytes(&rand_mod, sizeof(rand_mod)); rand_mod %= dev->poll_mod_count; dev->poll_mod_curr = rand_mod; cur_mod = dev->poll_mod_active[dev->poll_mod_curr]; rc = pn533_send_poll_frame(dev); /* Start listen timer */ if (!rc && cur_mod->len == 0 && dev->poll_mod_count > 1) mod_timer(&dev->listen_timer, jiffies + PN533_LISTEN_TIME * HZ); return rc; } static void pn533_stop_poll(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); del_timer(&dev->listen_timer); if (!dev->poll_mod_count) { dev_dbg(dev->dev, "Polling operation was not running\n"); return; } dev->phy_ops->abort_cmd(dev, GFP_KERNEL); flush_delayed_work(&dev->poll_work); pn533_poll_reset_mod_list(dev); } static int pn533_activate_target_nfcdep(struct pn533 *dev) { struct pn533_cmd_activate_response *rsp; u16 gt_len; int rc; struct sk_buff *skb; struct sk_buff *resp; skb = pn533_alloc_skb(dev, sizeof(u8) * 2); /*TG + Next*/ if (!skb) return -ENOMEM; skb_put_u8(skb, 1); /* TG */ skb_put_u8(skb, 0); /* Next */ resp = pn533_send_cmd_sync(dev, PN533_CMD_IN_ATR, skb); if (IS_ERR(resp)) return PTR_ERR(resp); rsp = (struct pn533_cmd_activate_response *)resp->data; rc = rsp->status & PN533_CMD_RET_MASK; if (rc != PN533_CMD_RET_SUCCESS) { nfc_err(dev->dev, "Target activation failed (error 0x%x)\n", rc); dev_kfree_skb(resp); return -EIO; } /* ATR_RES general bytes are located at offset 16 */ gt_len = resp->len - 16; rc = nfc_set_remote_general_bytes(dev->nfc_dev, rsp->gt, gt_len); dev_kfree_skb(resp); return rc; } static int pn533_activate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, u32 protocol) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); int rc; dev_dbg(dev->dev, "%s: protocol=%u\n", __func__, protocol); if (dev->poll_mod_count) { nfc_err(dev->dev, "Cannot activate while polling\n"); return -EBUSY; } if (dev->tgt_active_prot) { nfc_err(dev->dev, "There is already an active target\n"); return -EBUSY; } if (!dev->tgt_available_prots) { nfc_err(dev->dev, "There is no available target to activate\n"); return -EINVAL; } if (!(dev->tgt_available_prots & (1 << protocol))) { nfc_err(dev->dev, "Target doesn't support requested proto %u\n", protocol); return -EINVAL; } if (protocol == NFC_PROTO_NFC_DEP) { rc = pn533_activate_target_nfcdep(dev); if (rc) { nfc_err(dev->dev, "Activating target with DEP failed %d\n", rc); return rc; } } dev->tgt_active_prot = protocol; dev->tgt_available_prots = 0; return 0; } static int pn533_deactivate_target_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { int rc = 0; if (IS_ERR(resp)) { rc = PTR_ERR(resp); nfc_err(dev->dev, "Target release error %d\n", rc); return rc; } rc = resp->data[0] & PN533_CMD_RET_MASK; if (rc != PN533_CMD_RET_SUCCESS) nfc_err(dev->dev, "Error 0x%x when releasing the target\n", rc); dev_kfree_skb(resp); return rc; } static void pn533_deactivate_target(struct nfc_dev *nfc_dev, struct nfc_target *target, u8 mode) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct sk_buff *skb; int rc; if (!dev->tgt_active_prot) { nfc_err(dev->dev, "There is no active target\n"); return; } dev->tgt_active_prot = 0; skb_queue_purge(&dev->resp_q); skb = pn533_alloc_skb(dev, sizeof(u8)); if (!skb) return; skb_put_u8(skb, 1); /* TG*/ rc = pn533_send_cmd_async(dev, PN533_CMD_IN_RELEASE, skb, pn533_deactivate_target_complete, NULL); if (rc < 0) { dev_kfree_skb(skb); nfc_err(dev->dev, "Target release error %d\n", rc); } } static int pn533_in_dep_link_up_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { struct pn533_cmd_jump_dep_response *rsp; u8 target_gt_len; int rc; u8 active = *(u8 *)arg; kfree(arg); if (IS_ERR(resp)) return PTR_ERR(resp); if (dev->tgt_available_prots && !(dev->tgt_available_prots & (1 << NFC_PROTO_NFC_DEP))) { nfc_err(dev->dev, "The target does not support DEP\n"); rc = -EINVAL; goto error; } rsp = (struct pn533_cmd_jump_dep_response *)resp->data; rc = rsp->status & PN533_CMD_RET_MASK; if (rc != PN533_CMD_RET_SUCCESS) { nfc_err(dev->dev, "Bringing DEP link up failed (error 0x%x)\n", rc); goto error; } if (!dev->tgt_available_prots) { struct nfc_target nfc_target; dev_dbg(dev->dev, "Creating new target\n"); memset(&nfc_target, 0, sizeof(struct nfc_target)); nfc_target.supported_protocols = NFC_PROTO_NFC_DEP_MASK; nfc_target.nfcid1_len = 10; memcpy(nfc_target.nfcid1, rsp->nfcid3t, nfc_target.nfcid1_len); rc = nfc_targets_found(dev->nfc_dev, &nfc_target, 1); if (rc) goto error; dev->tgt_available_prots = 0; } dev->tgt_active_prot = NFC_PROTO_NFC_DEP; /* ATR_RES general bytes are located at offset 17 */ target_gt_len = resp->len - 17; rc = nfc_set_remote_general_bytes(dev->nfc_dev, rsp->gt, target_gt_len); if (rc == 0) rc = nfc_dep_link_is_up(dev->nfc_dev, dev->nfc_dev->targets[0].idx, !active, NFC_RF_INITIATOR); error: dev_kfree_skb(resp); return rc; } static int pn533_rf_field(struct nfc_dev *nfc_dev, u8 rf); static int pn533_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, u8 comm_mode, u8 *gb, size_t gb_len) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct sk_buff *skb; int rc, skb_len; u8 *next, *arg, nfcid3[NFC_NFCID3_MAXSIZE]; u8 passive_data[PASSIVE_DATA_LEN] = {0x00, 0xff, 0xff, 0x00, 0x3}; if (dev->poll_mod_count) { nfc_err(dev->dev, "Cannot bring the DEP link up while polling\n"); return -EBUSY; } if (dev->tgt_active_prot) { nfc_err(dev->dev, "There is already an active target\n"); return -EBUSY; } skb_len = 3 + gb_len; /* ActPass + BR + Next */ skb_len += PASSIVE_DATA_LEN; /* NFCID3 */ skb_len += NFC_NFCID3_MAXSIZE; if (target && !target->nfcid2_len) { nfcid3[0] = 0x1; nfcid3[1] = 0xfe; get_random_bytes(nfcid3 + 2, 6); } skb = pn533_alloc_skb(dev, skb_len); if (!skb) return -ENOMEM; skb_put_u8(skb, !comm_mode); /* ActPass */ skb_put_u8(skb, 0x02); /* 424 kbps */ next = skb_put(skb, 1); /* Next */ *next = 0; /* Copy passive data */ skb_put_data(skb, passive_data, PASSIVE_DATA_LEN); *next |= 1; /* Copy NFCID3 (which is NFCID2 from SENSF_RES) */ if (target && target->nfcid2_len) memcpy(skb_put(skb, NFC_NFCID3_MAXSIZE), target->nfcid2, target->nfcid2_len); else skb_put_data(skb, nfcid3, NFC_NFCID3_MAXSIZE); *next |= 2; if (gb != NULL && gb_len > 0) { skb_put_data(skb, gb, gb_len); *next |= 4; /* We have some Gi */ } else { *next = 0; } arg = kmalloc(sizeof(*arg), GFP_KERNEL); if (!arg) { dev_kfree_skb(skb); return -ENOMEM; } *arg = !comm_mode; pn533_rf_field(dev->nfc_dev, 0); rc = pn533_send_cmd_async(dev, PN533_CMD_IN_JUMP_FOR_DEP, skb, pn533_in_dep_link_up_complete, arg); if (rc < 0) { dev_kfree_skb(skb); kfree(arg); } return rc; } static int pn533_dep_link_down(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); pn533_poll_reset_mod_list(dev); if (dev->tgt_mode || dev->tgt_active_prot) dev->phy_ops->abort_cmd(dev, GFP_KERNEL); dev->tgt_active_prot = 0; dev->tgt_mode = 0; skb_queue_purge(&dev->resp_q); return 0; } struct pn533_data_exchange_arg { data_exchange_cb_t cb; void *cb_context; }; static struct sk_buff *pn533_build_response(struct pn533 *dev) { struct sk_buff *skb, *tmp, *t; unsigned int skb_len = 0, tmp_len = 0; if (skb_queue_empty(&dev->resp_q)) return NULL; if (skb_queue_len(&dev->resp_q) == 1) { skb = skb_dequeue(&dev->resp_q); goto out; } skb_queue_walk_safe(&dev->resp_q, tmp, t) skb_len += tmp->len; dev_dbg(dev->dev, "%s total length %d\n", __func__, skb_len); skb = alloc_skb(skb_len, GFP_KERNEL); if (skb == NULL) goto out; skb_put(skb, skb_len); skb_queue_walk_safe(&dev->resp_q, tmp, t) { memcpy(skb->data + tmp_len, tmp->data, tmp->len); tmp_len += tmp->len; } out: skb_queue_purge(&dev->resp_q); return skb; } static int pn533_data_exchange_complete(struct pn533 *dev, void *_arg, struct sk_buff *resp) { struct pn533_data_exchange_arg *arg = _arg; struct sk_buff *skb; int rc = 0; u8 status, ret, mi; if (IS_ERR(resp)) { rc = PTR_ERR(resp); goto _error; } status = resp->data[0]; ret = status & PN533_CMD_RET_MASK; mi = status & PN533_CMD_MI_MASK; skb_pull(resp, sizeof(status)); if (ret != PN533_CMD_RET_SUCCESS) { nfc_err(dev->dev, "Exchanging data failed (error 0x%x)\n", ret); rc = -EIO; goto error; } skb_queue_tail(&dev->resp_q, resp); if (mi) { dev->cmd_complete_mi_arg = arg; queue_work(dev->wq, &dev->mi_rx_work); return -EINPROGRESS; } /* Prepare for the next round */ if (skb_queue_len(&dev->fragment_skb) > 0) { dev->cmd_complete_dep_arg = arg; queue_work(dev->wq, &dev->mi_tx_work); return -EINPROGRESS; } skb = pn533_build_response(dev); if (!skb) { rc = -ENOMEM; goto error; } arg->cb(arg->cb_context, skb, 0); kfree(arg); return 0; error: dev_kfree_skb(resp); _error: skb_queue_purge(&dev->resp_q); arg->cb(arg->cb_context, NULL, rc); kfree(arg); return rc; } /* * Receive an incoming pn533 frame. skb contains only header and payload. * If skb == NULL, it is a notification that the link below is dead. */ void pn533_recv_frame(struct pn533 *dev, struct sk_buff *skb, int status) { if (!dev->cmd) goto sched_wq; dev->cmd->status = status; if (status != 0) { dev_dbg(dev->dev, "%s: Error received: %d\n", __func__, status); goto sched_wq; } if (skb == NULL) { dev_err(dev->dev, "NULL Frame -> link is dead\n"); goto sched_wq; } if (pn533_rx_frame_is_ack(skb->data)) { dev_dbg(dev->dev, "%s: Received ACK frame\n", __func__); dev_kfree_skb(skb); return; } print_hex_dump_debug("PN533 RX: ", DUMP_PREFIX_NONE, 16, 1, skb->data, dev->ops->rx_frame_size(skb->data), false); if (!dev->ops->rx_is_frame_valid(skb->data, dev)) { nfc_err(dev->dev, "Received an invalid frame\n"); dev->cmd->status = -EIO; } else if (!pn533_rx_frame_is_cmd_response(dev, skb->data)) { nfc_err(dev->dev, "It it not the response to the last command\n"); dev->cmd->status = -EIO; } dev->cmd->resp = skb; sched_wq: queue_work(dev->wq, &dev->cmd_complete_work); } EXPORT_SYMBOL(pn533_recv_frame); /* Split the Tx skb into small chunks */ static int pn533_fill_fragment_skbs(struct pn533 *dev, struct sk_buff *skb) { struct sk_buff *frag; int frag_size; do { /* Remaining size */ if (skb->len > PN533_CMD_DATAFRAME_MAXLEN) frag_size = PN533_CMD_DATAFRAME_MAXLEN; else frag_size = skb->len; /* Allocate and reserve */ frag = pn533_alloc_skb(dev, frag_size); if (!frag) { skb_queue_purge(&dev->fragment_skb); return -ENOMEM; } if (!dev->tgt_mode) { /* Reserve the TG/MI byte */ skb_reserve(frag, 1); /* MI + TG */ if (frag_size == PN533_CMD_DATAFRAME_MAXLEN) *(u8 *)skb_push(frag, sizeof(u8)) = (PN533_CMD_MI_MASK | 1); else *(u8 *)skb_push(frag, sizeof(u8)) = 1; /* TG */ } skb_put_data(frag, skb->data, frag_size); /* Reduce the size of incoming buffer */ skb_pull(skb, frag_size); /* Add this to skb_queue */ skb_queue_tail(&dev->fragment_skb, frag); } while (skb->len > 0); dev_kfree_skb(skb); return skb_queue_len(&dev->fragment_skb); } static int pn533_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct pn533_data_exchange_arg *arg = NULL; int rc; if (!dev->tgt_active_prot) { nfc_err(dev->dev, "Can't exchange data if there is no active target\n"); rc = -EINVAL; goto error; } arg = kmalloc(sizeof(*arg), GFP_KERNEL); if (!arg) { rc = -ENOMEM; goto error; } arg->cb = cb; arg->cb_context = cb_context; switch (dev->device_type) { case PN533_DEVICE_PASORI: if (dev->tgt_active_prot == NFC_PROTO_FELICA) { rc = pn533_send_data_async(dev, PN533_CMD_IN_COMM_THRU, skb, pn533_data_exchange_complete, arg); break; } fallthrough; default: /* jumbo frame ? */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); if (rc < 0) goto error; skb = skb_dequeue(&dev->fragment_skb); if (!skb) { rc = -EIO; goto error; } } else { *(u8 *)skb_push(skb, sizeof(u8)) = 1; /* TG */ } rc = pn533_send_data_async(dev, PN533_CMD_IN_DATA_EXCHANGE, skb, pn533_data_exchange_complete, arg); break; } if (rc < 0) /* rc from send_async */ goto error; return 0; error: kfree(arg); dev_kfree_skb(skb); return rc; } static int pn533_tm_send_complete(struct pn533 *dev, void *arg, struct sk_buff *resp) { u8 status; if (IS_ERR(resp)) return PTR_ERR(resp); status = resp->data[0]; /* Prepare for the next round */ if (skb_queue_len(&dev->fragment_skb) > 0) { queue_work(dev->wq, &dev->mi_tm_tx_work); return -EINPROGRESS; } dev_kfree_skb(resp); if (status != 0) { nfc_tm_deactivated(dev->nfc_dev); dev->tgt_mode = 0; return 0; } queue_work(dev->wq, &dev->tg_work); return 0; } static int pn533_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); int rc; /* let's split in multiple chunks if size's too big */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); if (rc < 0) goto error; /* get the first skb */ skb = skb_dequeue(&dev->fragment_skb); if (!skb) { rc = -EIO; goto error; } rc = pn533_send_data_async(dev, PN533_CMD_TG_SET_META_DATA, skb, pn533_tm_send_complete, NULL); } else { /* Send th skb */ rc = pn533_send_data_async(dev, PN533_CMD_TG_SET_DATA, skb, pn533_tm_send_complete, NULL); } error: if (rc < 0) { dev_kfree_skb(skb); skb_queue_purge(&dev->fragment_skb); } return rc; } static void pn533_wq_mi_recv(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, mi_rx_work); struct sk_buff *skb; int rc; skb = pn533_alloc_skb(dev, PN533_CMD_DATAEXCH_HEAD_LEN); if (!skb) goto error; switch (dev->device_type) { case PN533_DEVICE_PASORI: if (dev->tgt_active_prot == NFC_PROTO_FELICA) { rc = pn533_send_cmd_direct_async(dev, PN533_CMD_IN_COMM_THRU, skb, pn533_data_exchange_complete, dev->cmd_complete_mi_arg); break; } fallthrough; default: skb_put_u8(skb, 1); /*TG*/ rc = pn533_send_cmd_direct_async(dev, PN533_CMD_IN_DATA_EXCHANGE, skb, pn533_data_exchange_complete, dev->cmd_complete_mi_arg); break; } if (rc == 0) /* success */ return; nfc_err(dev->dev, "Error %d when trying to perform data_exchange\n", rc); dev_kfree_skb(skb); kfree(dev->cmd_complete_mi_arg); error: dev->phy_ops->send_ack(dev, GFP_KERNEL); queue_work(dev->wq, &dev->cmd_work); } static void pn533_wq_mi_send(struct work_struct *work) { struct pn533 *dev = container_of(work, struct pn533, mi_tx_work); struct sk_buff *skb; int rc; /* Grab the first skb in the queue */ skb = skb_dequeue(&dev->fragment_skb); if (skb == NULL) { /* No more data */ /* Reset the queue for future use */ skb_queue_head_init(&dev->fragment_skb); goto error; } switch (dev->device_type) { case PN533_DEVICE_PASORI: if (dev->tgt_active_prot != NFC_PROTO_FELICA) { rc = -EIO; break; } rc = pn533_send_cmd_direct_async(dev, PN533_CMD_IN_COMM_THRU, skb, pn533_data_exchange_complete, dev->cmd_complete_dep_arg); break; default: /* Still some fragments? */ rc = pn533_send_cmd_direct_async(dev, PN533_CMD_IN_DATA_EXCHANGE, skb, pn533_data_exchange_complete, dev->cmd_complete_dep_arg); break; } if (rc == 0) /* success */ return; nfc_err(dev->dev, "Error %d when trying to perform data_exchange\n", rc); dev_kfree_skb(skb); kfree(dev->cmd_complete_dep_arg); error: dev->phy_ops->send_ack(dev, GFP_KERNEL); queue_work(dev->wq, &dev->cmd_work); } static int pn533_set_configuration(struct pn533 *dev, u8 cfgitem, u8 *cfgdata, u8 cfgdata_len) { struct sk_buff *skb; struct sk_buff *resp; int skb_len; skb_len = sizeof(cfgitem) + cfgdata_len; /* cfgitem + cfgdata */ skb = pn533_alloc_skb(dev, skb_len); if (!skb) return -ENOMEM; skb_put_u8(skb, cfgitem); skb_put_data(skb, cfgdata, cfgdata_len); resp = pn533_send_cmd_sync(dev, PN533_CMD_RF_CONFIGURATION, skb); if (IS_ERR(resp)) return PTR_ERR(resp); dev_kfree_skb(resp); return 0; } static int pn533_get_firmware_version(struct pn533 *dev, struct pn533_fw_version *fv) { struct sk_buff *skb; struct sk_buff *resp; skb = pn533_alloc_skb(dev, 0); if (!skb) return -ENOMEM; resp = pn533_send_cmd_sync(dev, PN533_CMD_GET_FIRMWARE_VERSION, skb); if (IS_ERR(resp)) return PTR_ERR(resp); fv->ic = resp->data[0]; fv->ver = resp->data[1]; fv->rev = resp->data[2]; fv->support = resp->data[3]; dev_kfree_skb(resp); return 0; } static int pn533_pasori_fw_reset(struct pn533 *dev) { struct sk_buff *skb; struct sk_buff *resp; skb = pn533_alloc_skb(dev, sizeof(u8)); if (!skb) return -ENOMEM; skb_put_u8(skb, 0x1); resp = pn533_send_cmd_sync(dev, 0x18, skb); if (IS_ERR(resp)) return PTR_ERR(resp); dev_kfree_skb(resp); return 0; } static int pn533_rf_field(struct nfc_dev *nfc_dev, u8 rf) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); u8 rf_field = !!rf; int rc; rf_field |= PN533_CFGITEM_RF_FIELD_AUTO_RFCA; rc = pn533_set_configuration(dev, PN533_CFGITEM_RF_FIELD, (u8 *)&rf_field, 1); if (rc) { nfc_err(dev->dev, "Error on setting RF field\n"); return rc; } return 0; } static int pn532_sam_configuration(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct sk_buff *skb; struct sk_buff *resp; skb = pn533_alloc_skb(dev, 1); if (!skb) return -ENOMEM; skb_put_u8(skb, 0x01); resp = pn533_send_cmd_sync(dev, PN533_CMD_SAM_CONFIGURATION, skb); if (IS_ERR(resp)) return PTR_ERR(resp); dev_kfree_skb(resp); return 0; } static int pn533_dev_up(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); int rc; if (dev->phy_ops->dev_up) { rc = dev->phy_ops->dev_up(dev); if (rc) return rc; } if ((dev->device_type == PN533_DEVICE_PN532) || (dev->device_type == PN533_DEVICE_PN532_AUTOPOLL)) { rc = pn532_sam_configuration(nfc_dev); if (rc) return rc; } return pn533_rf_field(nfc_dev, 1); } static int pn533_dev_down(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); int ret; ret = pn533_rf_field(nfc_dev, 0); if (dev->phy_ops->dev_down && !ret) ret = dev->phy_ops->dev_down(dev); return ret; } static const struct nfc_ops pn533_nfc_ops = { .dev_up = pn533_dev_up, .dev_down = pn533_dev_down, .dep_link_up = pn533_dep_link_up, .dep_link_down = pn533_dep_link_down, .start_poll = pn533_start_poll, .stop_poll = pn533_stop_poll, .activate_target = pn533_activate_target, .deactivate_target = pn533_deactivate_target, .im_transceive = pn533_transceive, .tm_send = pn533_tm_send, }; static int pn533_setup(struct pn533 *dev) { struct pn533_config_max_retries max_retries; struct pn533_config_timing timing; u8 pasori_cfg[3] = {0x08, 0x01, 0x08}; int rc; switch (dev->device_type) { case PN533_DEVICE_STD: case PN533_DEVICE_PASORI: case PN533_DEVICE_ACR122U: case PN533_DEVICE_PN532: case PN533_DEVICE_PN532_AUTOPOLL: max_retries.mx_rty_atr = 0x2; max_retries.mx_rty_psl = 0x1; max_retries.mx_rty_passive_act = PN533_CONFIG_MAX_RETRIES_NO_RETRY; timing.rfu = PN533_CONFIG_TIMING_102; timing.atr_res_timeout = PN533_CONFIG_TIMING_102; timing.dep_timeout = PN533_CONFIG_TIMING_204; break; default: nfc_err(dev->dev, "Unknown device type %d\n", dev->device_type); return -EINVAL; } rc = pn533_set_configuration(dev, PN533_CFGITEM_MAX_RETRIES, (u8 *)&max_retries, sizeof(max_retries)); if (rc) { nfc_err(dev->dev, "Error on setting MAX_RETRIES config\n"); return rc; } rc = pn533_set_configuration(dev, PN533_CFGITEM_TIMING, (u8 *)&timing, sizeof(timing)); if (rc) { nfc_err(dev->dev, "Error on setting RF timings\n"); return rc; } switch (dev->device_type) { case PN533_DEVICE_STD: case PN533_DEVICE_PN532: case PN533_DEVICE_PN532_AUTOPOLL: break; case PN533_DEVICE_PASORI: pn533_pasori_fw_reset(dev); rc = pn533_set_configuration(dev, PN533_CFGITEM_PASORI, pasori_cfg, 3); if (rc) { nfc_err(dev->dev, "Error while settings PASORI config\n"); return rc; } pn533_pasori_fw_reset(dev); break; } return 0; } int pn533_finalize_setup(struct pn533 *dev) { struct pn533_fw_version fw_ver; int rc; memset(&fw_ver, 0, sizeof(fw_ver)); rc = pn533_get_firmware_version(dev, &fw_ver); if (rc) { nfc_err(dev->dev, "Unable to get FW version\n"); return rc; } nfc_info(dev->dev, "NXP PN5%02X firmware ver %d.%d now attached\n", fw_ver.ic, fw_ver.ver, fw_ver.rev); rc = pn533_setup(dev); if (rc) return rc; return 0; } EXPORT_SYMBOL_GPL(pn533_finalize_setup); struct pn533 *pn53x_common_init(u32 device_type, enum pn533_protocol_type protocol_type, void *phy, const struct pn533_phy_ops *phy_ops, struct pn533_frame_ops *fops, struct device *dev) { struct pn533 *priv; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return ERR_PTR(-ENOMEM); priv->phy = phy; priv->phy_ops = phy_ops; priv->dev = dev; if (fops != NULL) priv->ops = fops; else priv->ops = &pn533_std_frame_ops; priv->protocol_type = protocol_type; priv->device_type = device_type; mutex_init(&priv->cmd_lock); INIT_WORK(&priv->cmd_work, pn533_wq_cmd); INIT_WORK(&priv->cmd_complete_work, pn533_wq_cmd_complete); INIT_WORK(&priv->mi_rx_work, pn533_wq_mi_recv); INIT_WORK(&priv->mi_tx_work, pn533_wq_mi_send); INIT_WORK(&priv->tg_work, pn533_wq_tg_get_data); INIT_WORK(&priv->mi_tm_rx_work, pn533_wq_tm_mi_recv); INIT_WORK(&priv->mi_tm_tx_work, pn533_wq_tm_mi_send); INIT_DELAYED_WORK(&priv->poll_work, pn533_wq_poll); INIT_WORK(&priv->rf_work, pn533_wq_rf); priv->wq = alloc_ordered_workqueue("pn533", 0); if (priv->wq == NULL) goto error; timer_setup(&priv->listen_timer, pn533_listen_mode_timer, 0); skb_queue_head_init(&priv->resp_q); skb_queue_head_init(&priv->fragment_skb); INIT_LIST_HEAD(&priv->cmd_queue); return priv; error: kfree(priv); return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(pn53x_common_init); void pn53x_common_clean(struct pn533 *priv) { struct pn533_cmd *cmd, *n; /* delete the timer before cleanup the worker */ timer_shutdown_sync(&priv->listen_timer); flush_delayed_work(&priv->poll_work); destroy_workqueue(priv->wq); skb_queue_purge(&priv->resp_q); list_for_each_entry_safe(cmd, n, &priv->cmd_queue, queue) { list_del(&cmd->queue); kfree(cmd); } kfree(priv); } EXPORT_SYMBOL_GPL(pn53x_common_clean); int pn532_i2c_nfc_alloc(struct pn533 *priv, u32 protocols, struct device *parent) { priv->nfc_dev = nfc_allocate_device(&pn533_nfc_ops, protocols, priv->ops->tx_header_len + PN533_CMD_DATAEXCH_HEAD_LEN, priv->ops->tx_tail_len); if (!priv->nfc_dev) return -ENOMEM; nfc_set_parent_dev(priv->nfc_dev, parent); nfc_set_drvdata(priv->nfc_dev, priv); return 0; } EXPORT_SYMBOL_GPL(pn532_i2c_nfc_alloc); int pn53x_register_nfc(struct pn533 *priv, u32 protocols, struct device *parent) { int rc; rc = pn532_i2c_nfc_alloc(priv, protocols, parent); if (rc) return rc; rc = nfc_register_device(priv->nfc_dev); if (rc) nfc_free_device(priv->nfc_dev); return rc; } EXPORT_SYMBOL_GPL(pn53x_register_nfc); void pn53x_unregister_nfc(struct pn533 *priv) { nfc_unregister_device(priv->nfc_dev); nfc_free_device(priv->nfc_dev); } EXPORT_SYMBOL_GPL(pn53x_unregister_nfc); MODULE_AUTHOR("Lauro Ramos Venancio <lauro.venancio@openbossa.org>"); MODULE_AUTHOR("Aloisio Almeida Jr <aloisio.almeida@openbossa.org>"); MODULE_AUTHOR("Waldemar Rymarkiewicz <waldemar.rymarkiewicz@tieto.com>"); MODULE_DESCRIPTION("PN533 driver ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL");
3 3 3 1 3 1 1 1 1 1 6 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 // SPDX-License-Identifier: GPL-2.0-only /* Common methods for dibusb-based-receivers. * * Copyright (C) 2004-5 Patrick Boettcher (patrick.boettcher@posteo.de) * * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information */ #include "dibusb.h" /* Max transfer size done by I2C transfer functions */ #define MAX_XFER_SIZE 64 static int debug; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "set debugging level (1=info (|-able))." DVB_USB_DEBUG_STATUS); MODULE_DESCRIPTION("Common methods for dibusb-based receivers"); MODULE_LICENSE("GPL"); #define deb_info(args...) dprintk(debug,0x01,args) /* common stuff used by the different dibusb modules */ int dibusb_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) { if (adap->priv != NULL) { struct dibusb_state *st = adap->priv; if (st->ops.fifo_ctrl != NULL) if (st->ops.fifo_ctrl(adap->fe_adap[0].fe, onoff)) { err("error while controlling the fifo of the demod."); return -ENODEV; } } return 0; } EXPORT_SYMBOL(dibusb_streaming_ctrl); int dibusb_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, int onoff) { if (adap->priv != NULL) { struct dibusb_state *st = adap->priv; if (st->ops.pid_ctrl != NULL) st->ops.pid_ctrl(adap->fe_adap[0].fe, index, pid, onoff); } return 0; } EXPORT_SYMBOL(dibusb_pid_filter); int dibusb_pid_filter_ctrl(struct dvb_usb_adapter *adap, int onoff) { if (adap->priv != NULL) { struct dibusb_state *st = adap->priv; if (st->ops.pid_parse != NULL) if (st->ops.pid_parse(adap->fe_adap[0].fe, onoff) < 0) err("could not handle pid_parser"); } return 0; } EXPORT_SYMBOL(dibusb_pid_filter_ctrl); int dibusb_power_ctrl(struct dvb_usb_device *d, int onoff) { u8 *b; int ret; b = kmalloc(3, GFP_KERNEL); if (!b) return -ENOMEM; b[0] = DIBUSB_REQ_SET_IOCTL; b[1] = DIBUSB_IOCTL_CMD_POWER_MODE; b[2] = onoff ? DIBUSB_IOCTL_POWER_WAKEUP : DIBUSB_IOCTL_POWER_SLEEP; ret = dvb_usb_generic_write(d, b, 3); kfree(b); msleep(10); return ret; } EXPORT_SYMBOL(dibusb_power_ctrl); int dibusb2_0_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) { int ret; u8 *b; b = kmalloc(3, GFP_KERNEL); if (!b) return -ENOMEM; if ((ret = dibusb_streaming_ctrl(adap,onoff)) < 0) goto ret; if (onoff) { b[0] = DIBUSB_REQ_SET_STREAMING_MODE; b[1] = 0x00; ret = dvb_usb_generic_write(adap->dev, b, 2); if (ret < 0) goto ret; } b[0] = DIBUSB_REQ_SET_IOCTL; b[1] = onoff ? DIBUSB_IOCTL_CMD_ENABLE_STREAM : DIBUSB_IOCTL_CMD_DISABLE_STREAM; ret = dvb_usb_generic_write(adap->dev, b, 3); ret: kfree(b); return ret; } EXPORT_SYMBOL(dibusb2_0_streaming_ctrl); int dibusb2_0_power_ctrl(struct dvb_usb_device *d, int onoff) { u8 *b; int ret; if (!onoff) return 0; b = kmalloc(3, GFP_KERNEL); if (!b) return -ENOMEM; b[0] = DIBUSB_REQ_SET_IOCTL; b[1] = DIBUSB_IOCTL_CMD_POWER_MODE; b[2] = DIBUSB_IOCTL_POWER_WAKEUP; ret = dvb_usb_generic_write(d, b, 3); kfree(b); return ret; } EXPORT_SYMBOL(dibusb2_0_power_ctrl); static int dibusb_i2c_msg(struct dvb_usb_device *d, u8 addr, u8 *wbuf, u16 wlen, u8 *rbuf, u16 rlen) { u8 *sndbuf; int ret, wo, len; /* write only ? */ wo = (rbuf == NULL || rlen == 0); len = 2 + wlen + (wo ? 0 : 2); sndbuf = kmalloc(MAX_XFER_SIZE, GFP_KERNEL); if (!sndbuf) return -ENOMEM; if (4 + wlen > MAX_XFER_SIZE) { warn("i2c wr: len=%d is too big!\n", wlen); ret = -EOPNOTSUPP; goto ret; } sndbuf[0] = wo ? DIBUSB_REQ_I2C_WRITE : DIBUSB_REQ_I2C_READ; sndbuf[1] = (addr << 1) | (wo ? 0 : 1); memcpy(&sndbuf[2], wbuf, wlen); if (!wo) { sndbuf[wlen + 2] = (rlen >> 8) & 0xff; sndbuf[wlen + 3] = rlen & 0xff; } ret = dvb_usb_generic_rw(d, sndbuf, len, rbuf, rlen, 0); ret: kfree(sndbuf); return ret; } /* * I2C master xfer function */ static int dibusb_i2c_xfer(struct i2c_adapter *adap,struct i2c_msg msg[],int num) { struct dvb_usb_device *d = i2c_get_adapdata(adap); int i; if (mutex_lock_interruptible(&d->i2c_mutex) < 0) return -EAGAIN; for (i = 0; i < num; i++) { /* write/read request */ if (i+1 < num && (msg[i].flags & I2C_M_RD) == 0 && (msg[i+1].flags & I2C_M_RD)) { if (dibusb_i2c_msg(d, msg[i].addr, msg[i].buf,msg[i].len, msg[i+1].buf,msg[i+1].len) < 0) break; i++; } else if ((msg[i].flags & I2C_M_RD) == 0) { if (dibusb_i2c_msg(d, msg[i].addr, msg[i].buf,msg[i].len,NULL,0) < 0) break; } else if (msg[i].addr != 0x50) { /* 0x50 is the address of the eeprom - we need to protect it * from dibusb's bad i2c implementation: reads without * writing the offset before are forbidden */ if (dibusb_i2c_msg(d, msg[i].addr, NULL, 0, msg[i].buf, msg[i].len) < 0) break; } } mutex_unlock(&d->i2c_mutex); return i; } static u32 dibusb_i2c_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C; } struct i2c_algorithm dibusb_i2c_algo = { .master_xfer = dibusb_i2c_xfer, .functionality = dibusb_i2c_func, }; EXPORT_SYMBOL(dibusb_i2c_algo); int dibusb_read_eeprom_byte(struct dvb_usb_device *d, u8 offs, u8 *val) { u8 *buf; int rc; buf = kzalloc(2, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = offs; rc = dibusb_i2c_msg(d, 0x50, &buf[0], 1, &buf[1], 1); *val = buf[1]; kfree(buf); return rc; } EXPORT_SYMBOL(dibusb_read_eeprom_byte); /* * common remote control stuff */ struct rc_map_table rc_map_dibusb_table[] = { /* Key codes for the little Artec T1/Twinhan/HAMA/ remote. */ { 0x0016, KEY_POWER }, { 0x0010, KEY_MUTE }, { 0x0003, KEY_1 }, { 0x0001, KEY_2 }, { 0x0006, KEY_3 }, { 0x0009, KEY_4 }, { 0x001d, KEY_5 }, { 0x001f, KEY_6 }, { 0x000d, KEY_7 }, { 0x0019, KEY_8 }, { 0x001b, KEY_9 }, { 0x0015, KEY_0 }, { 0x0005, KEY_CHANNELUP }, { 0x0002, KEY_CHANNELDOWN }, { 0x001e, KEY_VOLUMEUP }, { 0x000a, KEY_VOLUMEDOWN }, { 0x0011, KEY_RECORD }, { 0x0017, KEY_FAVORITES }, /* Heart symbol - Channel list. */ { 0x0014, KEY_PLAY }, { 0x001a, KEY_STOP }, { 0x0040, KEY_REWIND }, { 0x0012, KEY_FASTFORWARD }, { 0x000e, KEY_PREVIOUS }, /* Recall - Previous channel. */ { 0x004c, KEY_PAUSE }, { 0x004d, KEY_SCREEN }, /* Full screen mode. */ { 0x0054, KEY_AUDIO }, /* MTS - Switch to secondary audio. */ /* additional keys TwinHan VisionPlus, the Artec seemingly not have */ { 0x000c, KEY_CANCEL }, /* Cancel */ { 0x001c, KEY_EPG }, /* EPG */ { 0x0000, KEY_TAB }, /* Tab */ { 0x0048, KEY_INFO }, /* Preview */ { 0x0004, KEY_LIST }, /* RecordList */ { 0x000f, KEY_TEXT }, /* Teletext */ /* Key codes for the KWorld/ADSTech/JetWay remote. */ { 0x8612, KEY_POWER }, { 0x860f, KEY_SELECT }, /* source */ { 0x860c, KEY_UNKNOWN }, /* scan */ { 0x860b, KEY_EPG }, { 0x8610, KEY_MUTE }, { 0x8601, KEY_1 }, { 0x8602, KEY_2 }, { 0x8603, KEY_3 }, { 0x8604, KEY_4 }, { 0x8605, KEY_5 }, { 0x8606, KEY_6 }, { 0x8607, KEY_7 }, { 0x8608, KEY_8 }, { 0x8609, KEY_9 }, { 0x860a, KEY_0 }, { 0x8618, KEY_ZOOM }, { 0x861c, KEY_UNKNOWN }, /* preview */ { 0x8613, KEY_UNKNOWN }, /* snap */ { 0x8600, KEY_UNDO }, { 0x861d, KEY_RECORD }, { 0x860d, KEY_STOP }, { 0x860e, KEY_PAUSE }, { 0x8616, KEY_PLAY }, { 0x8611, KEY_BACK }, { 0x8619, KEY_FORWARD }, { 0x8614, KEY_UNKNOWN }, /* pip */ { 0x8615, KEY_ESC }, { 0x861a, KEY_UP }, { 0x861e, KEY_DOWN }, { 0x861f, KEY_LEFT }, { 0x861b, KEY_RIGHT }, /* Key codes for the DiBcom MOD3000 remote. */ { 0x8000, KEY_MUTE }, { 0x8001, KEY_TEXT }, { 0x8002, KEY_HOME }, { 0x8003, KEY_POWER }, { 0x8004, KEY_RED }, { 0x8005, KEY_GREEN }, { 0x8006, KEY_YELLOW }, { 0x8007, KEY_BLUE }, { 0x8008, KEY_DVD }, { 0x8009, KEY_AUDIO }, { 0x800a, KEY_IMAGES }, /* Pictures */ { 0x800b, KEY_VIDEO }, { 0x800c, KEY_BACK }, { 0x800d, KEY_UP }, { 0x800e, KEY_RADIO }, { 0x800f, KEY_EPG }, { 0x8010, KEY_LEFT }, { 0x8011, KEY_OK }, { 0x8012, KEY_RIGHT }, { 0x8013, KEY_UNKNOWN }, /* SAP */ { 0x8014, KEY_TV }, { 0x8015, KEY_DOWN }, { 0x8016, KEY_MENU }, /* DVD Menu */ { 0x8017, KEY_LAST }, { 0x8018, KEY_RECORD }, { 0x8019, KEY_STOP }, { 0x801a, KEY_PAUSE }, { 0x801b, KEY_PLAY }, { 0x801c, KEY_PREVIOUS }, { 0x801d, KEY_REWIND }, { 0x801e, KEY_FASTFORWARD }, { 0x801f, KEY_NEXT}, { 0x8040, KEY_1 }, { 0x8041, KEY_2 }, { 0x8042, KEY_3 }, { 0x8043, KEY_CHANNELUP }, { 0x8044, KEY_4 }, { 0x8045, KEY_5 }, { 0x8046, KEY_6 }, { 0x8047, KEY_CHANNELDOWN }, { 0x8048, KEY_7 }, { 0x8049, KEY_8 }, { 0x804a, KEY_9 }, { 0x804b, KEY_VOLUMEUP }, { 0x804c, KEY_CLEAR }, { 0x804d, KEY_0 }, { 0x804e, KEY_ENTER }, { 0x804f, KEY_VOLUMEDOWN }, }; EXPORT_SYMBOL(rc_map_dibusb_table); int dibusb_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { u8 *buf; int ret; buf = kmalloc(5, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = DIBUSB_REQ_POLL_REMOTE; ret = dvb_usb_generic_rw(d, buf, 1, buf, 5, 0); if (ret < 0) goto ret; dvb_usb_nec_rc_key_to_event(d, buf, event, state); if (buf[0] != 0) deb_info("key: %*ph\n", 5, buf); ret: kfree(buf); return ret; } EXPORT_SYMBOL(dibusb_rc_query);
124 124 1 1 1 1 1 21 2 21 3 152 228 8 186 38 181 121 2 3 3 1 2 181 38 181 191 192 193 66 152 181 33 181 66 121 37 119 131 129 131 10 17 121 16 107 61 61 60 5 58 58 58 29 29 29 4 27 28 9 9 9 1 9 5 5 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 // SPDX-License-Identifier: GPL-2.0 #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/netpoll.h> #include <linux/export.h> #include <net/gro.h> #include "vlan.h" bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; __be16 vlan_proto = skb->vlan_proto; u16 vlan_id = skb_vlan_tag_get_id(skb); struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; vlan_dev = vlan_find_dev(skb->dev, vlan_proto, vlan_id); if (!vlan_dev) return false; skb = *skbp = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return false; if (unlikely(!(vlan_dev->flags & IFF_UP))) { kfree_skb(skb); *skbp = NULL; return false; } skb->dev = vlan_dev; if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) { /* Our lower layer thinks this is not local, let's make sure. * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, vlan_dev->dev_addr)) skb->pkt_type = PACKET_HOST; } if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) && !netif_is_macvlan_port(vlan_dev) && !netif_is_bridge_port(vlan_dev)) { unsigned int offset = skb->data - skb_mac_header(skb); /* * vlan_insert_tag expect skb->data pointing to mac header. * So change skb->data before calling it and change back to * original position later */ skb_push(skb, offset); skb = *skbp = vlan_insert_inner_tag(skb, skb->vlan_proto, skb->vlan_tci, skb->mac_len); if (!skb) return false; skb_pull(skb, offset + VLAN_HLEN); skb_reset_mac_len(skb); } skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); __vlan_hwaccel_clear_tag(skb); rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); u64_stats_inc(&rx_stats->rx_packets); u64_stats_add(&rx_stats->rx_bytes, skb->len); if (skb->pkt_type == PACKET_MULTICAST) u64_stats_inc(&rx_stats->rx_multicast); u64_stats_update_end(&rx_stats->syncp); return true; } /* Must be invoked with rcu_read_lock. */ struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev, __be16 vlan_proto, u16 vlan_id) { struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); if (vlan_info) { return vlan_group_get_device(&vlan_info->grp, vlan_proto, vlan_id); } else { /* * Lower devices of master uppers (bonding, team) do not have * grp assigned to themselves. Grp is assigned to upper device * instead. */ struct net_device *upper_dev; upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) return __vlan_find_dev_deep_rcu(upper_dev, vlan_proto, vlan_id); } return NULL; } EXPORT_SYMBOL(__vlan_find_dev_deep_rcu); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { struct net_device *ret = vlan_dev_priv(dev)->real_dev; while (is_vlan_dev(ret)) ret = vlan_dev_priv(ret)->real_dev; return ret; } EXPORT_SYMBOL(vlan_dev_real_dev); u16 vlan_dev_vlan_id(const struct net_device *dev) { return vlan_dev_priv(dev)->vlan_id; } EXPORT_SYMBOL(vlan_dev_vlan_id); __be16 vlan_dev_vlan_proto(const struct net_device *dev) { return vlan_dev_priv(dev)->vlan_proto; } EXPORT_SYMBOL(vlan_dev_vlan_proto); /* * vlan info and vid list */ static void vlan_group_free(struct vlan_group *grp) { int i, j; for (i = 0; i < VLAN_PROTO_NUM; i++) for (j = 0; j < VLAN_GROUP_ARRAY_SPLIT_PARTS; j++) kfree(grp->vlan_devices_arrays[i][j]); } static void vlan_info_free(struct vlan_info *vlan_info) { vlan_group_free(&vlan_info->grp); kfree(vlan_info); } static void vlan_info_rcu_free(struct rcu_head *rcu) { vlan_info_free(container_of(rcu, struct vlan_info, rcu)); } static struct vlan_info *vlan_info_alloc(struct net_device *dev) { struct vlan_info *vlan_info; vlan_info = kzalloc(sizeof(struct vlan_info), GFP_KERNEL); if (!vlan_info) return NULL; vlan_info->real_dev = dev; INIT_LIST_HEAD(&vlan_info->vid_list); return vlan_info; } struct vlan_vid_info { struct list_head list; __be16 proto; u16 vid; int refcount; }; static bool vlan_hw_filter_capable(const struct net_device *dev, __be16 proto) { if (proto == htons(ETH_P_8021Q) && dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) return true; if (proto == htons(ETH_P_8021AD) && dev->features & NETIF_F_HW_VLAN_STAG_FILTER) return true; return false; } static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info, __be16 proto, u16 vid) { struct vlan_vid_info *vid_info; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { if (vid_info->proto == proto && vid_info->vid == vid) return vid_info; } return NULL; } static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid) { struct vlan_vid_info *vid_info; vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL); if (!vid_info) return NULL; vid_info->proto = proto; vid_info->vid = vid; return vid_info; } static int vlan_add_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid) { if (!vlan_hw_filter_capable(dev, proto)) return 0; if (netif_device_present(dev)) return dev->netdev_ops->ndo_vlan_rx_add_vid(dev, proto, vid); else return -ENODEV; } static int vlan_kill_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid) { if (!vlan_hw_filter_capable(dev, proto)) return 0; if (netif_device_present(dev)) return dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, proto, vid); else return -ENODEV; } int vlan_for_each(struct net_device *dev, int (*action)(struct net_device *dev, int vid, void *arg), void *arg) { struct vlan_vid_info *vid_info; struct vlan_info *vlan_info; struct net_device *vdev; int ret; ASSERT_RTNL(); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { vdev = vlan_group_get_device(&vlan_info->grp, vid_info->proto, vid_info->vid); ret = action(vdev, vid_info->vid, arg); if (ret) return ret; } return 0; } EXPORT_SYMBOL(vlan_for_each); int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto) { struct net_device *real_dev = vlan_info->real_dev; struct vlan_vid_info *vlan_vid_info; int err; list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list) { if (vlan_vid_info->proto == proto) { err = vlan_add_rx_filter_info(real_dev, proto, vlan_vid_info->vid); if (err) goto unwind; } } return 0; unwind: list_for_each_entry_continue_reverse(vlan_vid_info, &vlan_info->vid_list, list) { if (vlan_vid_info->proto == proto) vlan_kill_rx_filter_info(real_dev, proto, vlan_vid_info->vid); } return err; } EXPORT_SYMBOL(vlan_filter_push_vids); void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto) { struct vlan_vid_info *vlan_vid_info; list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list) if (vlan_vid_info->proto == proto) vlan_kill_rx_filter_info(vlan_info->real_dev, vlan_vid_info->proto, vlan_vid_info->vid); } EXPORT_SYMBOL(vlan_filter_drop_vids); static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid, struct vlan_vid_info **pvid_info) { struct net_device *dev = vlan_info->real_dev; struct vlan_vid_info *vid_info; int err; vid_info = vlan_vid_info_alloc(proto, vid); if (!vid_info) return -ENOMEM; err = vlan_add_rx_filter_info(dev, proto, vid); if (err) { kfree(vid_info); return err; } list_add(&vid_info->list, &vlan_info->vid_list); vlan_info->nr_vids++; *pvid_info = vid_info; return 0; } int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid) { struct vlan_info *vlan_info; struct vlan_vid_info *vid_info; bool vlan_info_created = false; int err; ASSERT_RTNL(); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) { vlan_info = vlan_info_alloc(dev); if (!vlan_info) return -ENOMEM; vlan_info_created = true; } vid_info = vlan_vid_info_get(vlan_info, proto, vid); if (!vid_info) { err = __vlan_vid_add(vlan_info, proto, vid, &vid_info); if (err) goto out_free_vlan_info; } vid_info->refcount++; if (vlan_info_created) rcu_assign_pointer(dev->vlan_info, vlan_info); return 0; out_free_vlan_info: if (vlan_info_created) kfree(vlan_info); return err; } EXPORT_SYMBOL(vlan_vid_add); static void __vlan_vid_del(struct vlan_info *vlan_info, struct vlan_vid_info *vid_info) { struct net_device *dev = vlan_info->real_dev; __be16 proto = vid_info->proto; u16 vid = vid_info->vid; int err; err = vlan_kill_rx_filter_info(dev, proto, vid); if (err && dev->reg_state != NETREG_UNREGISTERING) netdev_warn(dev, "failed to kill vid %04x/%d\n", proto, vid); list_del(&vid_info->list); kfree(vid_info); vlan_info->nr_vids--; } void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid) { struct vlan_info *vlan_info; struct vlan_vid_info *vid_info; ASSERT_RTNL(); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) return; vid_info = vlan_vid_info_get(vlan_info, proto, vid); if (!vid_info) return; vid_info->refcount--; if (vid_info->refcount == 0) { __vlan_vid_del(vlan_info, vid_info); if (vlan_info->nr_vids == 0) { RCU_INIT_POINTER(dev->vlan_info, NULL); call_rcu(&vlan_info->rcu, vlan_info_rcu_free); } } } EXPORT_SYMBOL(vlan_vid_del); int vlan_vids_add_by_dev(struct net_device *dev, const struct net_device *by_dev) { struct vlan_vid_info *vid_info; struct vlan_info *vlan_info; int err; ASSERT_RTNL(); vlan_info = rtnl_dereference(by_dev->vlan_info); if (!vlan_info) return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) continue; err = vlan_vid_add(dev, vid_info->proto, vid_info->vid); if (err) goto unwind; } return 0; unwind: list_for_each_entry_continue_reverse(vid_info, &vlan_info->vid_list, list) { if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); } return err; } EXPORT_SYMBOL(vlan_vids_add_by_dev); void vlan_vids_del_by_dev(struct net_device *dev, const struct net_device *by_dev) { struct vlan_vid_info *vid_info; struct vlan_info *vlan_info; ASSERT_RTNL(); vlan_info = rtnl_dereference(by_dev->vlan_info); if (!vlan_info) return; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); } } EXPORT_SYMBOL(vlan_vids_del_by_dev); bool vlan_uses_dev(const struct net_device *dev) { struct vlan_info *vlan_info; ASSERT_RTNL(); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) return false; return vlan_info->grp.nr_vlan_devs ? true : false; } EXPORT_SYMBOL(vlan_uses_dev); static struct sk_buff *vlan_gro_receive(struct list_head *head, struct sk_buff *skb) { const struct packet_offload *ptype; unsigned int hlen, off_vlan; struct sk_buff *pp = NULL; struct vlan_hdr *vhdr; struct sk_buff *p; __be16 type; int flush = 1; off_vlan = skb_gro_offset(skb); hlen = off_vlan + sizeof(*vhdr); vhdr = skb_gro_header(skb, hlen, off_vlan); if (unlikely(!vhdr)) goto out; NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen; type = vhdr->h_vlan_encapsulated_proto; ptype = gro_find_receive_by_type(type); if (!ptype) goto out; flush = 0; list_for_each_entry(p, head, list) { struct vlan_hdr *vhdr2; if (!NAPI_GRO_CB(p)->same_flow) continue; vhdr2 = (struct vlan_hdr *)(p->data + off_vlan); if (compare_vlan_header(vhdr, vhdr2)) NAPI_GRO_CB(p)->same_flow = 0; } skb_gro_pull(skb, sizeof(*vhdr)); skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr)); pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive, ipv6_gro_receive, inet_gro_receive, head, skb); out: skb_gro_flush_final(skb, pp, flush); return pp; } static int vlan_gro_complete(struct sk_buff *skb, int nhoff) { struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff); __be16 type = vhdr->h_vlan_encapsulated_proto; struct packet_offload *ptype; int err = -ENOENT; ptype = gro_find_complete_by_type(type); if (ptype) err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, ipv6_gro_complete, inet_gro_complete, skb, nhoff + sizeof(*vhdr)); return err; } static struct packet_offload vlan_packet_offloads[] __read_mostly = { { .type = cpu_to_be16(ETH_P_8021Q), .priority = 10, .callbacks = { .gro_receive = vlan_gro_receive, .gro_complete = vlan_gro_complete, }, }, { .type = cpu_to_be16(ETH_P_8021AD), .priority = 10, .callbacks = { .gro_receive = vlan_gro_receive, .gro_complete = vlan_gro_complete, }, }, }; static int __init vlan_offload_init(void) { unsigned int i; for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++) dev_add_offload(&vlan_packet_offloads[i]); return 0; } fs_initcall(vlan_offload_init);
30 30 171 171 272 273 82 82 82 332 333 1 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 // SPDX-License-Identifier: GPL-2.0 /* * dax: direct host memory access * Copyright (C) 2020 Red Hat, Inc. */ #include "fuse_i.h" #include <linux/delay.h> #include <linux/dax.h> #include <linux/uio.h> #include <linux/pagemap.h> #include <linux/pfn_t.h> #include <linux/iomap.h> #include <linux/interval_tree.h> /* * Default memory range size. A power of 2 so it agrees with common FUSE_INIT * map_alignment values 4KB and 64KB. */ #define FUSE_DAX_SHIFT 21 #define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT) #define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE) /* Number of ranges reclaimer will try to free in one invocation */ #define FUSE_DAX_RECLAIM_CHUNK (10) /* * Dax memory reclaim threshold in percetage of total ranges. When free * number of free ranges drops below this threshold, reclaim can trigger * Default is 20% */ #define FUSE_DAX_RECLAIM_THRESHOLD (20) /** Translation information for file offsets to DAX window offsets */ struct fuse_dax_mapping { /* Pointer to inode where this memory range is mapped */ struct inode *inode; /* Will connect in fcd->free_ranges to keep track of free memory */ struct list_head list; /* For interval tree in file/inode */ struct interval_tree_node itn; /* Will connect in fc->busy_ranges to keep track busy memory */ struct list_head busy_list; /** Position in DAX window */ u64 window_offset; /** Length of mapping, in bytes */ loff_t length; /* Is this mapping read-only or read-write */ bool writable; /* reference count when the mapping is used by dax iomap. */ refcount_t refcnt; }; /* Per-inode dax map */ struct fuse_inode_dax { /* Semaphore to protect modifications to the dmap tree */ struct rw_semaphore sem; /* Sorted rb tree of struct fuse_dax_mapping elements */ struct rb_root_cached tree; unsigned long nr; }; struct fuse_conn_dax { /* DAX device */ struct dax_device *dev; /* Lock protecting accessess to members of this structure */ spinlock_t lock; /* List of memory ranges which are busy */ unsigned long nr_busy_ranges; struct list_head busy_ranges; /* Worker to free up memory ranges */ struct delayed_work free_work; /* Wait queue for a dax range to become free */ wait_queue_head_t range_waitq; /* DAX Window Free Ranges */ long nr_free_ranges; struct list_head free_ranges; unsigned long nr_ranges; }; static inline struct fuse_dax_mapping * node_to_dmap(struct interval_tree_node *node) { if (!node) return NULL; return container_of(node, struct fuse_dax_mapping, itn); } static struct fuse_dax_mapping * alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode); static void __kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) { unsigned long free_threshold; /* If number of free ranges are below threshold, start reclaim */ free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100, 1); if (fcd->nr_free_ranges < free_threshold) queue_delayed_work(system_long_wq, &fcd->free_work, msecs_to_jiffies(delay_ms)); } static void kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) { spin_lock(&fcd->lock); __kick_dmap_free_worker(fcd, delay_ms); spin_unlock(&fcd->lock); } static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd) { struct fuse_dax_mapping *dmap; spin_lock(&fcd->lock); dmap = list_first_entry_or_null(&fcd->free_ranges, struct fuse_dax_mapping, list); if (dmap) { list_del_init(&dmap->list); WARN_ON(fcd->nr_free_ranges <= 0); fcd->nr_free_ranges--; } __kick_dmap_free_worker(fcd, 0); spin_unlock(&fcd->lock); return dmap; } /* This assumes fcd->lock is held */ static void __dmap_remove_busy_list(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { list_del_init(&dmap->busy_list); WARN_ON(fcd->nr_busy_ranges == 0); fcd->nr_busy_ranges--; } static void dmap_remove_busy_list(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { spin_lock(&fcd->lock); __dmap_remove_busy_list(fcd, dmap); spin_unlock(&fcd->lock); } /* This assumes fcd->lock is held */ static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { list_add_tail(&dmap->list, &fcd->free_ranges); fcd->nr_free_ranges++; wake_up(&fcd->range_waitq); } static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { /* Return fuse_dax_mapping to free list */ spin_lock(&fcd->lock); __dmap_add_to_free_pool(fcd, dmap); spin_unlock(&fcd->lock); } static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx, struct fuse_dax_mapping *dmap, bool writable, bool upgrade) { struct fuse_mount *fm = get_fuse_mount(inode); struct fuse_conn_dax *fcd = fm->fc->dax; struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_setupmapping_in inarg; loff_t offset = start_idx << FUSE_DAX_SHIFT; FUSE_ARGS(args); ssize_t err; WARN_ON(fcd->nr_free_ranges < 0); /* Ask fuse daemon to setup mapping */ memset(&inarg, 0, sizeof(inarg)); inarg.foffset = offset; inarg.fh = -1; inarg.moffset = dmap->window_offset; inarg.len = FUSE_DAX_SZ; inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ; if (writable) inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE; args.opcode = FUSE_SETUPMAPPING; args.nodeid = fi->nodeid; args.in_numargs = 1; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; err = fuse_simple_request(fm, &args); if (err < 0) return err; dmap->writable = writable; if (!upgrade) { /* * We don't take a reference on inode. inode is valid right now * and when inode is going away, cleanup logic should first * cleanup dmap entries. */ dmap->inode = inode; dmap->itn.start = dmap->itn.last = start_idx; /* Protected by fi->dax->sem */ interval_tree_insert(&dmap->itn, &fi->dax->tree); fi->dax->nr++; spin_lock(&fcd->lock); list_add_tail(&dmap->busy_list, &fcd->busy_ranges); fcd->nr_busy_ranges++; spin_unlock(&fcd->lock); } return 0; } static int fuse_send_removemapping(struct inode *inode, struct fuse_removemapping_in *inargp, struct fuse_removemapping_one *remove_one) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_mount *fm = get_fuse_mount(inode); FUSE_ARGS(args); args.opcode = FUSE_REMOVEMAPPING; args.nodeid = fi->nodeid; args.in_numargs = 2; args.in_args[0].size = sizeof(*inargp); args.in_args[0].value = inargp; args.in_args[1].size = inargp->count * sizeof(*remove_one); args.in_args[1].value = remove_one; return fuse_simple_request(fm, &args); } static int dmap_removemapping_list(struct inode *inode, unsigned int num, struct list_head *to_remove) { struct fuse_removemapping_one *remove_one, *ptr; struct fuse_removemapping_in inarg; struct fuse_dax_mapping *dmap; int ret, i = 0, nr_alloc; nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY); remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS); if (!remove_one) return -ENOMEM; ptr = remove_one; list_for_each_entry(dmap, to_remove, list) { ptr->moffset = dmap->window_offset; ptr->len = dmap->length; ptr++; i++; num--; if (i >= nr_alloc || num == 0) { memset(&inarg, 0, sizeof(inarg)); inarg.count = i; ret = fuse_send_removemapping(inode, &inarg, remove_one); if (ret) goto out; ptr = remove_one; i = 0; } } out: kfree(remove_one); return ret; } /* * Cleanup dmap entry and add back to free list. This should be called with * fcd->lock held. */ static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n", dmap->itn.start, dmap->itn.last, dmap->window_offset, dmap->length); __dmap_remove_busy_list(fcd, dmap); dmap->inode = NULL; dmap->itn.start = dmap->itn.last = 0; __dmap_add_to_free_pool(fcd, dmap); } /* * Free inode dmap entries whose range falls inside [start, end]. * Does not take any locks. At this point of time it should only be * called from evict_inode() path where we know all dmap entries can be * reclaimed. */ static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd, struct inode *inode, loff_t start, loff_t end) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap, *n; int err, num = 0; LIST_HEAD(to_remove); unsigned long start_idx = start >> FUSE_DAX_SHIFT; unsigned long end_idx = end >> FUSE_DAX_SHIFT; struct interval_tree_node *node; while (1) { node = interval_tree_iter_first(&fi->dax->tree, start_idx, end_idx); if (!node) break; dmap = node_to_dmap(node); /* inode is going away. There should not be any users of dmap */ WARN_ON(refcount_read(&dmap->refcnt) > 1); interval_tree_remove(&dmap->itn, &fi->dax->tree); num++; list_add(&dmap->list, &to_remove); } /* Nothing to remove */ if (list_empty(&to_remove)) return; WARN_ON(fi->dax->nr < num); fi->dax->nr -= num; err = dmap_removemapping_list(inode, num, &to_remove); if (err && err != -ENOTCONN) { pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n", start, end); } spin_lock(&fcd->lock); list_for_each_entry_safe(dmap, n, &to_remove, list) { list_del_init(&dmap->list); dmap_reinit_add_to_free_pool(fcd, dmap); } spin_unlock(&fcd->lock); } static int dmap_removemapping_one(struct inode *inode, struct fuse_dax_mapping *dmap) { struct fuse_removemapping_one forget_one; struct fuse_removemapping_in inarg; memset(&inarg, 0, sizeof(inarg)); inarg.count = 1; memset(&forget_one, 0, sizeof(forget_one)); forget_one.moffset = dmap->window_offset; forget_one.len = dmap->length; return fuse_send_removemapping(inode, &inarg, &forget_one); } /* * It is called from evict_inode() and by that time inode is going away. So * this function does not take any locks like fi->dax->sem for traversing * that fuse inode interval tree. If that lock is taken then lock validator * complains of deadlock situation w.r.t fs_reclaim lock. */ void fuse_dax_inode_cleanup(struct inode *inode) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); /* * fuse_evict_inode() has already called truncate_inode_pages_final() * before we arrive here. So we should not have to worry about any * pages/exception entries still associated with inode. */ inode_reclaim_dmap_range(fc->dax, inode, 0, -1); WARN_ON(fi->dax->nr); } static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length) { iomap->addr = IOMAP_NULL_ADDR; iomap->length = length; iomap->type = IOMAP_HOLE; } static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length, struct iomap *iomap, struct fuse_dax_mapping *dmap, unsigned int flags) { loff_t offset, len; loff_t i_size = i_size_read(inode); offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT); len = min(length, dmap->length - offset); /* If length is beyond end of file, truncate further */ if (pos + len > i_size) len = i_size - pos; if (len > 0) { iomap->addr = dmap->window_offset + offset; iomap->length = len; if (flags & IOMAP_FAULT) iomap->length = ALIGN(len, PAGE_SIZE); iomap->type = IOMAP_MAPPED; /* * increace refcnt so that reclaim code knows this dmap is in * use. This assumes fi->dax->sem mutex is held either * shared/exclusive. */ refcount_inc(&dmap->refcnt); /* iomap->private should be NULL */ WARN_ON_ONCE(iomap->private); iomap->private = dmap; } else { /* Mapping beyond end of file is hole */ fuse_fill_iomap_hole(iomap, length); } } static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn_dax *fcd = fc->dax; struct fuse_dax_mapping *dmap, *alloc_dmap = NULL; int ret; bool writable = flags & IOMAP_WRITE; unsigned long start_idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* * Can't do inline reclaim in fault path. We call * dax_layout_busy_page() before we free a range. And * fuse_wait_dax_page() drops mapping->invalidate_lock and requires it. * In fault path we enter with mapping->invalidate_lock held and can't * drop it. Also in fault path we hold mapping->invalidate_lock shared * and not exclusive, so that creates further issues with * fuse_wait_dax_page(). Hence return -EAGAIN and fuse_dax_fault() * will wait for a memory range to become free and retry. */ if (flags & IOMAP_FAULT) { alloc_dmap = alloc_dax_mapping(fcd); if (!alloc_dmap) return -EAGAIN; } else { alloc_dmap = alloc_dax_mapping_reclaim(fcd, inode); if (IS_ERR(alloc_dmap)) return PTR_ERR(alloc_dmap); } /* If we are here, we should have memory allocated */ if (WARN_ON(!alloc_dmap)) return -EIO; /* * Take write lock so that only one caller can try to setup mapping * and other waits. */ down_write(&fi->dax->sem); /* * We dropped lock. Check again if somebody else setup * mapping already. */ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); if (node) { dmap = node_to_dmap(node); fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); dmap_add_to_free_pool(fcd, alloc_dmap); up_write(&fi->dax->sem); return 0; } /* Setup one mapping */ ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap, writable, false); if (ret < 0) { dmap_add_to_free_pool(fcd, alloc_dmap); up_write(&fi->dax->sem); return ret; } fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags); up_write(&fi->dax->sem); return 0; } static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; int ret; unsigned long idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* * Take exclusive lock so that only one caller can try to setup * mapping and others wait. */ down_write(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, idx, idx); /* We are holding either inode lock or invalidate_lock, and that should * ensure that dmap can't be truncated. We are holding a reference * on dmap and that should make sure it can't be reclaimed. So dmap * should still be there in tree despite the fact we dropped and * re-acquired the fi->dax->sem lock. */ ret = -EIO; if (WARN_ON(!node)) goto out_err; dmap = node_to_dmap(node); /* We took an extra reference on dmap to make sure its not reclaimd. * Now we hold fi->dax->sem lock and that reference is not needed * anymore. Drop it. */ if (refcount_dec_and_test(&dmap->refcnt)) { /* refcount should not hit 0. This object only goes * away when fuse connection goes away */ WARN_ON_ONCE(1); } /* Maybe another thread already upgraded mapping while we were not * holding lock. */ if (dmap->writable) { ret = 0; goto out_fill_iomap; } ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true, true); if (ret < 0) goto out_err; out_fill_iomap: fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); out_err: up_write(&fi->dax->sem); return ret; } /* This is just for DAX and the mapping is ephemeral, do not use it for other * purposes since there is no block device with a permanent mapping. */ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_dax_mapping *dmap; bool writable = flags & IOMAP_WRITE; unsigned long start_idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* We don't support FIEMAP */ if (WARN_ON(flags & IOMAP_REPORT)) return -EIO; iomap->offset = pos; iomap->flags = 0; iomap->bdev = NULL; iomap->dax_dev = fc->dax->dev; /* * Both read/write and mmap path can race here. So we need something * to make sure if we are setting up mapping, then other path waits * * For now, use a semaphore for this. It probably needs to be * optimized later. */ down_read(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); if (node) { dmap = node_to_dmap(node); if (writable && !dmap->writable) { /* Upgrade read-only mapping to read-write. This will * require exclusive fi->dax->sem lock as we don't want * two threads to be trying to this simultaneously * for same dmap. So drop shared lock and acquire * exclusive lock. * * Before dropping fi->dax->sem lock, take reference * on dmap so that its not freed by range reclaim. */ refcount_inc(&dmap->refcnt); up_read(&fi->dax->sem); pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n", __func__, pos, length); return fuse_upgrade_dax_mapping(inode, pos, length, flags, iomap); } else { fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); up_read(&fi->dax->sem); return 0; } } else { up_read(&fi->dax->sem); pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n", __func__, pos, length); if (pos >= i_size_read(inode)) goto iomap_hole; return fuse_setup_new_dax_mapping(inode, pos, length, flags, iomap); } /* * If read beyond end of file happens, fs code seems to return * it as hole */ iomap_hole: fuse_fill_iomap_hole(iomap, length); pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n", __func__, pos, length, iomap->length); return 0; } static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned int flags, struct iomap *iomap) { struct fuse_dax_mapping *dmap = iomap->private; if (dmap) { if (refcount_dec_and_test(&dmap->refcnt)) { /* refcount should not hit 0. This object only goes * away when fuse connection goes away */ WARN_ON_ONCE(1); } } /* DAX writes beyond end-of-file aren't handled using iomap, so the * file size is unchanged and there is nothing to do here. */ return 0; } static const struct iomap_ops fuse_iomap_ops = { .iomap_begin = fuse_iomap_begin, .iomap_end = fuse_iomap_end, }; static void fuse_wait_dax_page(struct inode *inode) { filemap_invalidate_unlock(inode->i_mapping); schedule(); filemap_invalidate_lock(inode->i_mapping); } /* Should be called with mapping->invalidate_lock held exclusively */ static int __fuse_dax_break_layouts(struct inode *inode, bool *retry, loff_t start, loff_t end) { struct page *page; page = dax_layout_busy_page_range(inode->i_mapping, start, end); if (!page) return 0; *retry = true; return ___wait_var_event(&page->_refcount, atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, 0, 0, fuse_wait_dax_page(inode)); } /* dmap_end == 0 leads to unmapping of whole file */ int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end) { bool retry; int ret; do { retry = false; ret = __fuse_dax_break_layouts(inode, &retry, dmap_start, dmap_end); } while (ret == 0 && retry); return ret; } ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock_shared(inode)) return -EAGAIN; } else { inode_lock_shared(inode); } ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops); inode_unlock_shared(inode); /* TODO file_accessed(iocb->f_filp) */ return ret; } static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); return (iov_iter_rw(from) == WRITE && ((iocb->ki_pos) >= i_size_read(inode) || (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)))); } static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); ssize_t ret; ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); fuse_write_update_attr(inode, iocb->ki_pos, ret); return ret; } ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) return -EAGAIN; } else { inode_lock(inode); } ret = generic_write_checks(iocb, from); if (ret <= 0) goto out; ret = file_remove_privs(iocb->ki_filp); if (ret) goto out; /* TODO file_update_time() but we don't want metadata I/O */ /* Do not use dax for file extending writes as write and on * disk i_size increase are not atomic otherwise. */ if (file_extending_write(iocb, from)) ret = fuse_dax_direct_write(iocb, from); else ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops); out: inode_unlock(inode); if (ret > 0) ret = generic_write_sync(iocb, ret); return ret; } static int fuse_dax_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc); } static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, unsigned int order, bool write) { vm_fault_t ret; struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; pfn_t pfn; int error = 0; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn_dax *fcd = fc->dax; bool retry = false; if (write) sb_start_pagefault(sb); retry: if (retry && !(fcd->nr_free_ranges > 0)) wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0)); /* * We need to serialize against not only truncate but also against * fuse dax memory range reclaim. While a range is being reclaimed, * we do not want any read/write/mmap to make progress and try * to populate page cache or access memory we are trying to free. */ filemap_invalidate_lock_shared(inode->i_mapping); ret = dax_iomap_fault(vmf, order, &pfn, &error, &fuse_iomap_ops); if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) { error = 0; retry = true; filemap_invalidate_unlock_shared(inode->i_mapping); goto retry; } if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, order, pfn); filemap_invalidate_unlock_shared(inode->i_mapping); if (write) sb_end_pagefault(sb); return ret; } static vm_fault_t fuse_dax_fault(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, vmf->flags & FAULT_FLAG_WRITE); } static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf, unsigned int order) { return __fuse_dax_fault(vmf, order, vmf->flags & FAULT_FLAG_WRITE); } static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, true); } static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, true); } static const struct vm_operations_struct fuse_dax_vm_ops = { .fault = fuse_dax_fault, .huge_fault = fuse_dax_huge_fault, .page_mkwrite = fuse_dax_page_mkwrite, .pfn_mkwrite = fuse_dax_pfn_mkwrite, }; int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &fuse_dax_vm_ops; vm_flags_set(vma, VM_MIXEDMAP | VM_HUGEPAGE); return 0; } static int dmap_writeback_invalidate(struct inode *inode, struct fuse_dax_mapping *dmap) { int ret; loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT; loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1); ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos); if (ret) { pr_debug("fuse: filemap_fdatawrite_range() failed. err=%d start_pos=0x%llx, end_pos=0x%llx\n", ret, start_pos, end_pos); return ret; } ret = invalidate_inode_pages2_range(inode->i_mapping, start_pos >> PAGE_SHIFT, end_pos >> PAGE_SHIFT); if (ret) pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n", ret); return ret; } static int reclaim_one_dmap_locked(struct inode *inode, struct fuse_dax_mapping *dmap) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); /* * igrab() was done to make sure inode won't go under us, and this * further avoids the race with evict(). */ ret = dmap_writeback_invalidate(inode, dmap); if (ret) return ret; /* Remove dax mapping from inode interval tree now */ interval_tree_remove(&dmap->itn, &fi->dax->tree); fi->dax->nr--; /* It is possible that umount/shutdown has killed the fuse connection * and worker thread is trying to reclaim memory in parallel. Don't * warn in that case. */ ret = dmap_removemapping_one(inode, dmap); if (ret && ret != -ENOTCONN) { pr_warn("Failed to remove mapping. offset=0x%llx len=0x%llx ret=%d\n", dmap->window_offset, dmap->length, ret); } return 0; } /* Find first mapped dmap for an inode and return file offset. Caller needs * to hold fi->dax->sem lock either shared or exclusive. */ static struct fuse_dax_mapping *inode_lookup_first_dmap(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; struct interval_tree_node *node; for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node; node = interval_tree_iter_next(node, 0, -1)) { dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) continue; return dmap; } return NULL; } /* * Find first mapping in the tree and free it and return it. Do not add * it back to free pool. */ static struct fuse_dax_mapping * inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode, bool *retry) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; u64 dmap_start, dmap_end; unsigned long start_idx; int ret; struct interval_tree_node *node; filemap_invalidate_lock(inode->i_mapping); /* Lookup a dmap and corresponding file offset to reclaim. */ down_read(&fi->dax->sem); dmap = inode_lookup_first_dmap(inode); if (dmap) { start_idx = dmap->itn.start; dmap_start = start_idx << FUSE_DAX_SHIFT; dmap_end = dmap_start + FUSE_DAX_SZ - 1; } up_read(&fi->dax->sem); if (!dmap) goto out_mmap_sem; /* * Make sure there are no references to inode pages using * get_user_pages() */ ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); if (ret) { pr_debug("fuse: fuse_dax_break_layouts() failed. err=%d\n", ret); dmap = ERR_PTR(ret); goto out_mmap_sem; } down_write(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); /* Range already got reclaimed by somebody else */ if (!node) { if (retry) *retry = true; goto out_write_dmap_sem; } dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) { dmap = NULL; if (retry) *retry = true; goto out_write_dmap_sem; } ret = reclaim_one_dmap_locked(inode, dmap); if (ret < 0) { dmap = ERR_PTR(ret); goto out_write_dmap_sem; } /* Clean up dmap. Do not add back to free list */ dmap_remove_busy_list(fcd, dmap); dmap->inode = NULL; dmap->itn.start = dmap->itn.last = 0; pr_debug("fuse: %s: inline reclaimed memory range. inode=%p, window_offset=0x%llx, length=0x%llx\n", __func__, inode, dmap->window_offset, dmap->length); out_write_dmap_sem: up_write(&fi->dax->sem); out_mmap_sem: filemap_invalidate_unlock(inode->i_mapping); return dmap; } static struct fuse_dax_mapping * alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode) { struct fuse_dax_mapping *dmap; struct fuse_inode *fi = get_fuse_inode(inode); while (1) { bool retry = false; dmap = alloc_dax_mapping(fcd); if (dmap) return dmap; dmap = inode_inline_reclaim_one_dmap(fcd, inode, &retry); /* * Either we got a mapping or it is an error, return in both * the cases. */ if (dmap) return dmap; /* If we could not reclaim a mapping because it * had a reference or some other temporary failure, * Try again. We want to give up inline reclaim only * if there is no range assigned to this node. Otherwise * if a deadlock is possible if we sleep with * mapping->invalidate_lock held and worker to free memory * can't make progress due to unavailability of * mapping->invalidate_lock. So sleep only if fi->dax->nr=0 */ if (retry) continue; /* * There are no mappings which can be reclaimed. Wait for one. * We are not holding fi->dax->sem. So it is possible * that range gets added now. But as we are not holding * mapping->invalidate_lock, worker should still be able to * free up a range and wake us up. */ if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) { if (wait_event_killable_exclusive(fcd->range_waitq, (fcd->nr_free_ranges > 0))) { return ERR_PTR(-EINTR); } } } } static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd, struct inode *inode, unsigned long start_idx) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; struct interval_tree_node *node; /* Find fuse dax mapping at file offset inode. */ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); /* Range already got cleaned up by somebody else */ if (!node) return 0; dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) return 0; ret = reclaim_one_dmap_locked(inode, dmap); if (ret < 0) return ret; /* Cleanup dmap entry and add back to free list */ spin_lock(&fcd->lock); dmap_reinit_add_to_free_pool(fcd, dmap); spin_unlock(&fcd->lock); return ret; } /* * Free a range of memory. * Locking: * 1. Take mapping->invalidate_lock to block dax faults. * 2. Take fi->dax->sem to protect interval tree and also to make sure * read/write can not reuse a dmap which we might be freeing. */ static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd, struct inode *inode, unsigned long start_idx, unsigned long end_idx) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); loff_t dmap_start = start_idx << FUSE_DAX_SHIFT; loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1; filemap_invalidate_lock(inode->i_mapping); ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); if (ret) { pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n", ret); goto out_mmap_sem; } down_write(&fi->dax->sem); ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx); up_write(&fi->dax->sem); out_mmap_sem: filemap_invalidate_unlock(inode->i_mapping); return ret; } static int try_to_free_dmap_chunks(struct fuse_conn_dax *fcd, unsigned long nr_to_free) { struct fuse_dax_mapping *dmap, *pos, *temp; int ret, nr_freed = 0; unsigned long start_idx = 0, end_idx = 0; struct inode *inode = NULL; /* Pick first busy range and free it for now*/ while (1) { if (nr_freed >= nr_to_free) break; dmap = NULL; spin_lock(&fcd->lock); if (!fcd->nr_busy_ranges) { spin_unlock(&fcd->lock); return 0; } list_for_each_entry_safe(pos, temp, &fcd->busy_ranges, busy_list) { /* skip this range if it's in use. */ if (refcount_read(&pos->refcnt) > 1) continue; inode = igrab(pos->inode); /* * This inode is going away. That will free * up all the ranges anyway, continue to * next range. */ if (!inode) continue; /* * Take this element off list and add it tail. If * this element can't be freed, it will help with * selecting new element in next iteration of loop. */ dmap = pos; list_move_tail(&dmap->busy_list, &fcd->busy_ranges); start_idx = end_idx = dmap->itn.start; break; } spin_unlock(&fcd->lock); if (!dmap) return 0; ret = lookup_and_reclaim_dmap(fcd, inode, start_idx, end_idx); iput(inode); if (ret) return ret; nr_freed++; } return 0; } static void fuse_dax_free_mem_worker(struct work_struct *work) { int ret; struct fuse_conn_dax *fcd = container_of(work, struct fuse_conn_dax, free_work.work); ret = try_to_free_dmap_chunks(fcd, FUSE_DAX_RECLAIM_CHUNK); if (ret) { pr_debug("fuse: try_to_free_dmap_chunks() failed with err=%d\n", ret); } /* If number of free ranges are still below threshold, requeue */ kick_dmap_free_worker(fcd, 1); } static void fuse_free_dax_mem_ranges(struct list_head *mem_list) { struct fuse_dax_mapping *range, *temp; /* Free All allocated elements */ list_for_each_entry_safe(range, temp, mem_list, list) { list_del(&range->list); if (!list_empty(&range->busy_list)) list_del(&range->busy_list); kfree(range); } } void fuse_dax_conn_free(struct fuse_conn *fc) { if (fc->dax) { fuse_free_dax_mem_ranges(&fc->dax->free_ranges); kfree(fc->dax); fc->dax = NULL; } } static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) { long nr_pages, nr_ranges; struct fuse_dax_mapping *range; int ret, id; size_t dax_size = -1; unsigned long i; init_waitqueue_head(&fcd->range_waitq); INIT_LIST_HEAD(&fcd->free_ranges); INIT_LIST_HEAD(&fcd->busy_ranges); INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); id = dax_read_lock(); nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), DAX_ACCESS, NULL, NULL); dax_read_unlock(id); if (nr_pages < 0) { pr_debug("dax_direct_access() returned %ld\n", nr_pages); return nr_pages; } nr_ranges = nr_pages/FUSE_DAX_PAGES; pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n", __func__, nr_pages, nr_ranges); for (i = 0; i < nr_ranges; i++) { range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL); ret = -ENOMEM; if (!range) goto out_err; /* TODO: This offset only works if virtio-fs driver is not * having some memory hidden at the beginning. This needs * better handling */ range->window_offset = i * FUSE_DAX_SZ; range->length = FUSE_DAX_SZ; INIT_LIST_HEAD(&range->busy_list); refcount_set(&range->refcnt, 1); list_add_tail(&range->list, &fcd->free_ranges); } fcd->nr_free_ranges = nr_ranges; fcd->nr_ranges = nr_ranges; return 0; out_err: /* Free All allocated elements */ fuse_free_dax_mem_ranges(&fcd->free_ranges); return ret; } int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode dax_mode, struct dax_device *dax_dev) { struct fuse_conn_dax *fcd; int err; fc->dax_mode = dax_mode; if (!dax_dev) return 0; fcd = kzalloc(sizeof(*fcd), GFP_KERNEL); if (!fcd) return -ENOMEM; spin_lock_init(&fcd->lock); fcd->dev = dax_dev; err = fuse_dax_mem_range_init(fcd); if (err) { kfree(fcd); return err; } fc->dax = fcd; return 0; } bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi) { struct fuse_conn *fc = get_fuse_conn_super(sb); fi->dax = NULL; if (fc->dax) { fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT); if (!fi->dax) return false; init_rwsem(&fi->dax->sem); fi->dax->tree = RB_ROOT_CACHED; } return true; } static const struct address_space_operations fuse_dax_file_aops = { .writepages = fuse_dax_writepages, .direct_IO = noop_direct_IO, .dirty_folio = noop_dirty_folio, }; static bool fuse_should_enable_dax(struct inode *inode, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(inode); enum fuse_dax_mode dax_mode = fc->dax_mode; if (dax_mode == FUSE_DAX_NEVER) return false; /* * fc->dax may be NULL in 'inode' mode when filesystem device doesn't * support DAX, in which case it will silently fallback to 'never' mode. */ if (!fc->dax) return false; if (dax_mode == FUSE_DAX_ALWAYS) return true; /* dax_mode is FUSE_DAX_INODE* */ return fc->inode_dax && (flags & FUSE_ATTR_DAX); } void fuse_dax_inode_init(struct inode *inode, unsigned int flags) { if (!fuse_should_enable_dax(inode, flags)) return; inode->i_flags |= S_DAX; inode->i_data.a_ops = &fuse_dax_file_aops; } void fuse_dax_dontcache(struct inode *inode, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(inode); if (fuse_is_inode_dax_mode(fc->dax_mode) && ((bool) IS_DAX(inode) != (bool) (flags & FUSE_ATTR_DAX))) d_mark_dontcache(inode); } bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment) { if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) { pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n", map_alignment, FUSE_DAX_SZ); return false; } return true; } void fuse_dax_cancel_work(struct fuse_conn *fc) { struct fuse_conn_dax *fcd = fc->dax; if (fcd) cancel_delayed_work_sync(&fcd->free_work); } EXPORT_SYMBOL_GPL(fuse_dax_cancel_work);
51 51 24 10 3 17 2 3 2 1 1 3 3 1 2 2 8 1 5 2 3 4 3 4 7 6 1 6 1 7 4 3 2 1 1 1 31 2 23 6 27 2 23 6 27 2 22 7 22 7 23 6 18 11 29 18 13 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 // SPDX-License-Identifier: GPL-2.0-only /* * This file contains vfs inode ops for the 9P2000.L protocol. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/namei.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/xattr.h> #include <linux/posix_acl.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" #include "cache.h" #include "xattr.h" #include "acl.h" static int v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, dev_t rdev); /** * v9fs_get_fsgid_for_create - Helper function to get the gid for a new object * @dir_inode: The directory inode * * Helper function to get the gid for creating a * new file system object. This checks the S_ISGID to determine the owning * group of the new file system object. */ static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) { BUG_ON(dir_inode == NULL); if (dir_inode->i_mode & S_ISGID) { /* set_gid bit is set.*/ return dir_inode->i_gid; } return current_fsgid(); } struct inode * v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid, bool new) { int retval; struct inode *inode; struct p9_stat_dotl *st; struct v9fs_session_info *v9ses = sb->s_fs_info; inode = iget_locked(sb, QID2INO(&fid->qid)); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) { if (!new) { goto done; } else { /* deal with race condition in inode number reuse */ p9_debug(P9_DEBUG_ERROR, "WARNING: Inode collision %lx\n", inode->i_ino); iput(inode); remove_inode_hash(inode); inode = iget_locked(sb, QID2INO(&fid->qid)); WARN_ON(!(inode->i_state & I_NEW)); } } /* * initialize the inode with the stat info * FIXME!! we may need support for stale inodes * later. */ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC | P9_STATS_GEN); if (IS_ERR(st)) { retval = PTR_ERR(st); goto error; } retval = v9fs_init_inode(v9ses, inode, &fid->qid, st->st_mode, new_decode_dev(st->st_rdev)); v9fs_stat2inode_dotl(st, inode, 0); kfree(st); if (retval) goto error; v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); retval = v9fs_get_acl(inode, fid); if (retval) goto error; unlock_new_inode(inode); done: return inode; error: iget_failed(inode); return ERR_PTR(retval); } struct dotl_openflag_map { int open_flag; int dotl_flag; }; static int v9fs_mapped_dotl_flags(int flags) { int i; int rflags = 0; struct dotl_openflag_map dotl_oflag_map[] = { { O_CREAT, P9_DOTL_CREATE }, { O_EXCL, P9_DOTL_EXCL }, { O_NOCTTY, P9_DOTL_NOCTTY }, { O_APPEND, P9_DOTL_APPEND }, { O_NONBLOCK, P9_DOTL_NONBLOCK }, { O_DSYNC, P9_DOTL_DSYNC }, { FASYNC, P9_DOTL_FASYNC }, { O_DIRECT, P9_DOTL_DIRECT }, { O_LARGEFILE, P9_DOTL_LARGEFILE }, { O_DIRECTORY, P9_DOTL_DIRECTORY }, { O_NOFOLLOW, P9_DOTL_NOFOLLOW }, { O_NOATIME, P9_DOTL_NOATIME }, { O_CLOEXEC, P9_DOTL_CLOEXEC }, { O_SYNC, P9_DOTL_SYNC}, }; for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { if (flags & dotl_oflag_map[i].open_flag) rflags |= dotl_oflag_map[i].dotl_flag; } return rflags; } /** * v9fs_open_to_dotl_flags- convert Linux specific open flags to * plan 9 open flag. * @flags: flags to convert */ int v9fs_open_to_dotl_flags(int flags) { int rflags = 0; /* * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY * and P9_DOTL_NOACCESS */ rflags |= flags & O_ACCMODE; rflags |= v9fs_mapped_dotl_flags(flags); return rflags; } /** * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. * @idmap: The user namespace of the mount * @dir: directory inode that is being created * @dentry: dentry that is being deleted * @omode: create permissions * @excl: True if the file must not yet exist * */ static int v9fs_vfs_create_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, bool excl) { return v9fs_vfs_mknod_dotl(idmap, dir, dentry, omode, 0); } static int v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, struct file *file, unsigned int flags, umode_t omode) { int err = 0; kgid_t gid; umode_t mode; int p9_omode = v9fs_open_to_dotl_flags(flags); const unsigned char *name = NULL; struct p9_qid qid; struct inode *inode; struct p9_fid *fid = NULL; struct p9_fid *dfid = NULL, *ofid = NULL; struct v9fs_session_info *v9ses; struct posix_acl *pacl = NULL, *dacl = NULL; struct dentry *res = NULL; if (d_in_lookup(dentry)) { res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); if (res) dentry = res; } /* Only creates */ if (!(flags & O_CREAT) || d_really_is_positive(dentry)) return finish_no_open(file, res); v9ses = v9fs_inode2v9ses(dir); name = dentry->d_name.name; p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%x\n", name, flags, omode); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); goto out; } /* clone a fid to use for creation */ ofid = clone_fid(dfid); if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto out; } gid = v9fs_get_fsgid_for_create(dir); mode = omode; /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { p9_debug(P9_DEBUG_VFS, "Failed to get acl values in create %d\n", err); goto out; } if ((v9ses->cache & CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) { p9_omode = (p9_omode & ~P9_OWRITE) | P9_ORDWR; p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, creating w/ O_RDWR\n"); } err = p9_client_create_dotl(ofid, name, p9_omode, mode, gid, &qid); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in create %d\n", err); goto out; } v9fs_invalidate_inode_attr(dir); /* instantiate inode and assign the unopened fid to the dentry */ fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto out; } inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto out; } /* Now set the ACL based on the default value */ v9fs_set_create_acl(inode, fid, dacl, pacl); v9fs_fid_add(dentry, &fid); d_instantiate(dentry, inode); /* Since we are opening a file, assign the open fid to the file */ err = finish_open(file, dentry, generic_file_open); if (err) goto out; file->private_data = ofid; #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) { struct v9fs_inode *v9inode = V9FS_I(inode); fscache_use_cookie(v9fs_inode_cookie(v9inode), file->f_mode & FMODE_WRITE); } #endif v9fs_fid_add_modes(ofid, v9ses->flags, v9ses->cache, flags); v9fs_open_fid_add(inode, &ofid); file->f_mode |= FMODE_CREATED; out: p9_fid_put(dfid); p9_fid_put(ofid); p9_fid_put(fid); v9fs_put_acl(dacl, pacl); dput(res); return err; } /** * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory * @idmap: The idmap of the mount * @dir: inode that is being unlinked * @dentry: dentry that is being unlinked * @omode: mode for new directory * */ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode) { int err; struct p9_fid *fid = NULL, *dfid = NULL; kgid_t gid; const unsigned char *name; umode_t mode; struct inode *inode; struct p9_qid qid; struct posix_acl *dacl = NULL, *pacl = NULL; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); omode |= S_IFDIR; if (dir->i_mode & S_ISGID) omode |= S_ISGID; dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); goto error; } gid = v9fs_get_fsgid_for_create(dir); mode = omode; /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mkdir %d\n", err); goto error; } name = dentry->d_name.name; err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid); if (err < 0) goto error; fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto error; } /* instantiate inode and assign the unopened fid to the dentry */ inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } v9fs_fid_add(dentry, &fid); v9fs_set_create_acl(inode, fid, dacl, pacl); d_instantiate(dentry, inode); err = 0; inc_nlink(dir); v9fs_invalidate_inode_attr(dir); error: p9_fid_put(fid); v9fs_put_acl(dacl, pacl); p9_fid_put(dfid); return err; } static int v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; struct v9fs_session_info *v9ses; struct p9_fid *fid; struct inode *inode = d_inode(dentry); struct p9_stat_dotl *st; p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } else if (v9ses->cache) { if (S_ISREG(inode->i_mode)) { int retval = filemap_fdatawrite(inode->i_mapping); if (retval) p9_debug(P9_DEBUG_ERROR, "flushing writeback during getattr returned %d\n", retval); } } fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); /* Ask for all the fields in stat structure. Server will return * whatever it supports */ st = p9_client_getattr_dotl(fid, P9_STATS_ALL); p9_fid_put(fid); if (IS_ERR(st)) return PTR_ERR(st); v9fs_stat2inode_dotl(st, d_inode(dentry), 0); generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat); /* Change block size to what the server returned */ stat->blksize = st->st_blksize; kfree(st); return 0; } /* * Attribute flags. */ #define P9_ATTR_MODE (1 << 0) #define P9_ATTR_UID (1 << 1) #define P9_ATTR_GID (1 << 2) #define P9_ATTR_SIZE (1 << 3) #define P9_ATTR_ATIME (1 << 4) #define P9_ATTR_MTIME (1 << 5) #define P9_ATTR_CTIME (1 << 6) #define P9_ATTR_ATIME_SET (1 << 7) #define P9_ATTR_MTIME_SET (1 << 8) struct dotl_iattr_map { int iattr_valid; int p9_iattr_valid; }; static int v9fs_mapped_iattr_valid(int iattr_valid) { int i; int p9_iattr_valid = 0; struct dotl_iattr_map dotl_iattr_map[] = { { ATTR_MODE, P9_ATTR_MODE }, { ATTR_UID, P9_ATTR_UID }, { ATTR_GID, P9_ATTR_GID }, { ATTR_SIZE, P9_ATTR_SIZE }, { ATTR_ATIME, P9_ATTR_ATIME }, { ATTR_MTIME, P9_ATTR_MTIME }, { ATTR_CTIME, P9_ATTR_CTIME }, { ATTR_ATIME_SET, P9_ATTR_ATIME_SET }, { ATTR_MTIME_SET, P9_ATTR_MTIME_SET }, }; for (i = 0; i < ARRAY_SIZE(dotl_iattr_map); i++) { if (iattr_valid & dotl_iattr_map[i].iattr_valid) p9_iattr_valid |= dotl_iattr_map[i].p9_iattr_valid; } return p9_iattr_valid; } /** * v9fs_vfs_setattr_dotl - set file metadata * @idmap: idmap of the mount * @dentry: file whose metadata to set * @iattr: metadata assignment structure * */ int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { int retval, use_dentry = 0; struct inode *inode = d_inode(dentry); struct v9fs_session_info __maybe_unused *v9ses; struct p9_fid *fid = NULL; struct p9_iattr_dotl p9attr = { .uid = INVALID_UID, .gid = INVALID_GID, }; p9_debug(P9_DEBUG_VFS, "\n"); retval = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (retval) return retval; v9ses = v9fs_dentry2v9ses(dentry); p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid); if (iattr->ia_valid & ATTR_MODE) p9attr.mode = iattr->ia_mode; if (iattr->ia_valid & ATTR_UID) p9attr.uid = iattr->ia_uid; if (iattr->ia_valid & ATTR_GID) p9attr.gid = iattr->ia_gid; if (iattr->ia_valid & ATTR_SIZE) p9attr.size = iattr->ia_size; if (iattr->ia_valid & ATTR_ATIME_SET) { p9attr.atime_sec = iattr->ia_atime.tv_sec; p9attr.atime_nsec = iattr->ia_atime.tv_nsec; } if (iattr->ia_valid & ATTR_MTIME_SET) { p9attr.mtime_sec = iattr->ia_mtime.tv_sec; p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; } if (iattr->ia_valid & ATTR_FILE) { fid = iattr->ia_file->private_data; WARN_ON(!fid); } if (!fid) { fid = v9fs_fid_lookup(dentry); use_dentry = 1; } if (IS_ERR(fid)) return PTR_ERR(fid); /* Write all dirty data */ if (S_ISREG(inode->i_mode)) { retval = filemap_fdatawrite(inode->i_mapping); if (retval < 0) p9_debug(P9_DEBUG_ERROR, "Flushing file prior to setattr failed: %d\n", retval); } retval = p9_client_setattr(fid, &p9attr); if (retval < 0) { if (use_dentry) p9_fid_put(fid); return retval; } if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { truncate_setsize(inode, iattr->ia_size); netfs_resize_file(netfs_inode(inode), iattr->ia_size, true); #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) fscache_resize_cookie(v9fs_inode_cookie(V9FS_I(inode)), iattr->ia_size); #endif } v9fs_invalidate_inode_attr(inode); setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) { /* We also want to update ACL when we update mode bits */ retval = v9fs_acl_chmod(inode, fid); if (retval < 0) { if (use_dentry) p9_fid_put(fid); return retval; } } if (use_dentry) p9_fid_put(fid); return 0; } /** * v9fs_stat2inode_dotl - populate an inode structure with stat info * @stat: stat structure * @inode: inode to populate * @flags: ctrl flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) * */ void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode, unsigned int flags) { umode_t mode; struct v9fs_inode *v9inode = V9FS_I(inode); if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { inode_set_atime(inode, stat->st_atime_sec, stat->st_atime_nsec); inode_set_mtime(inode, stat->st_mtime_sec, stat->st_mtime_nsec); inode_set_ctime(inode, stat->st_ctime_sec, stat->st_ctime_nsec); inode->i_uid = stat->st_uid; inode->i_gid = stat->st_gid; set_nlink(inode, stat->st_nlink); mode = stat->st_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; v9inode->netfs.remote_i_size = stat->st_size; if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) v9fs_i_size_write(inode, stat->st_size); inode->i_blocks = stat->st_blocks; } else { if (stat->st_result_mask & P9_STATS_ATIME) { inode_set_atime(inode, stat->st_atime_sec, stat->st_atime_nsec); } if (stat->st_result_mask & P9_STATS_MTIME) { inode_set_mtime(inode, stat->st_mtime_sec, stat->st_mtime_nsec); } if (stat->st_result_mask & P9_STATS_CTIME) { inode_set_ctime(inode, stat->st_ctime_sec, stat->st_ctime_nsec); } if (stat->st_result_mask & P9_STATS_UID) inode->i_uid = stat->st_uid; if (stat->st_result_mask & P9_STATS_GID) inode->i_gid = stat->st_gid; if (stat->st_result_mask & P9_STATS_NLINK) set_nlink(inode, stat->st_nlink); if (stat->st_result_mask & P9_STATS_MODE) { mode = stat->st_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; } if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) && stat->st_result_mask & P9_STATS_SIZE) { v9inode->netfs.remote_i_size = stat->st_size; v9fs_i_size_write(inode, stat->st_size); } if (stat->st_result_mask & P9_STATS_BLOCKS) inode->i_blocks = stat->st_blocks; } if (stat->st_result_mask & P9_STATS_GEN) inode->i_generation = stat->st_gen; /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION * because the inode structure does not have fields for them. */ v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; } static int v9fs_vfs_symlink_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { int err; kgid_t gid; const unsigned char *name; struct p9_qid qid; struct p9_fid *dfid; struct p9_fid *fid = NULL; name = dentry->d_name.name; p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); return err; } gid = v9fs_get_fsgid_for_create(dir); /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */ err = p9_client_symlink(dfid, name, symname, gid, &qid); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err); goto error; } v9fs_invalidate_inode_attr(dir); error: p9_fid_put(fid); p9_fid_put(dfid); return err; } /** * v9fs_vfs_link_dotl - create a hardlink for dotl * @old_dentry: dentry for file to link to * @dir: inode destination for new link * @dentry: dentry for link * */ static int v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { int err; struct p9_fid *dfid, *oldfid; struct v9fs_session_info *v9ses; p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %pd, new_name: %pd\n", dir->i_ino, old_dentry, dentry); v9ses = v9fs_inode2v9ses(dir); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) return PTR_ERR(dfid); oldfid = v9fs_fid_lookup(old_dentry); if (IS_ERR(oldfid)) { p9_fid_put(dfid); return PTR_ERR(oldfid); } err = p9_client_link(dfid, oldfid, dentry->d_name.name); p9_fid_put(dfid); p9_fid_put(oldfid); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err); return err; } v9fs_invalidate_inode_attr(dir); if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { /* Get the latest stat info from server. */ struct p9_fid *fid; fid = v9fs_fid_lookup(old_dentry); if (IS_ERR(fid)) return PTR_ERR(fid); v9fs_refresh_inode_dotl(fid, d_inode(old_dentry)); p9_fid_put(fid); } ihold(d_inode(old_dentry)); d_instantiate(dentry, d_inode(old_dentry)); return err; } /** * v9fs_vfs_mknod_dotl - create a special file * @idmap: The idmap of the mount * @dir: inode destination for new link * @dentry: dentry for file * @omode: mode for creation * @rdev: device associated with special file * */ static int v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode, dev_t rdev) { int err; kgid_t gid; const unsigned char *name; umode_t mode; struct p9_fid *fid = NULL, *dfid = NULL; struct inode *inode; struct p9_qid qid; struct posix_acl *dacl = NULL, *pacl = NULL; p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, dentry, omode, MAJOR(rdev), MINOR(rdev)); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); goto error; } gid = v9fs_get_fsgid_for_create(dir); mode = omode; /* Update mode based on ACL value */ err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); if (err) { p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mknod %d\n", err); goto error; } name = dentry->d_name.name; err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid); if (err < 0) goto error; v9fs_invalidate_inode_attr(dir); fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto error; } inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } v9fs_set_create_acl(inode, fid, dacl, pacl); v9fs_fid_add(dentry, &fid); d_instantiate(dentry, inode); err = 0; error: p9_fid_put(fid); v9fs_put_acl(dacl, pacl); p9_fid_put(dfid); return err; } /** * v9fs_vfs_get_link_dotl - follow a symlink path * @dentry: dentry for symlink * @inode: inode for symlink * @done: destructor for return value */ static const char * v9fs_vfs_get_link_dotl(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct p9_fid *fid; char *target; int retval; if (!dentry) return ERR_PTR(-ECHILD); p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return ERR_CAST(fid); retval = p9_client_readlink(fid, &target); p9_fid_put(fid); if (retval) return ERR_PTR(retval); set_delayed_call(done, kfree_link, target); return target; } int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) { struct p9_stat_dotl *st; struct v9fs_session_info *v9ses; unsigned int flags; v9ses = v9fs_inode2v9ses(inode); st = p9_client_getattr_dotl(fid, P9_STATS_ALL); if (IS_ERR(st)) return PTR_ERR(st); /* * Don't update inode if the file type is different */ if (inode_wrong_type(inode, st->st_mode)) goto out; /* * We don't want to refresh inode->i_size, * because we may have cached data */ flags = (v9ses->cache & CACHE_LOOSE) ? V9FS_STAT2INODE_KEEP_ISIZE : 0; v9fs_stat2inode_dotl(st, inode, flags); out: kfree(st); return 0; } const struct inode_operations v9fs_dir_inode_operations_dotl = { .create = v9fs_vfs_create_dotl, .atomic_open = v9fs_vfs_atomic_open_dotl, .lookup = v9fs_vfs_lookup, .link = v9fs_vfs_link_dotl, .symlink = v9fs_vfs_symlink_dotl, .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir_dotl, .rmdir = v9fs_vfs_rmdir, .mknod = v9fs_vfs_mknod_dotl, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .listxattr = v9fs_listxattr, .get_inode_acl = v9fs_iop_get_inode_acl, .get_acl = v9fs_iop_get_acl, .set_acl = v9fs_iop_set_acl, }; const struct inode_operations v9fs_file_inode_operations_dotl = { .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .listxattr = v9fs_listxattr, .get_inode_acl = v9fs_iop_get_inode_acl, .get_acl = v9fs_iop_get_acl, .set_acl = v9fs_iop_set_acl, }; const struct inode_operations v9fs_symlink_inode_operations_dotl = { .get_link = v9fs_vfs_get_link_dotl, .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .listxattr = v9fs_listxattr, };
5 5 5 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 // SPDX-License-Identifier: GPL-2.0-only /* * ebtable_broute * * Authors: * Bart De Schuymer <bdschuym@pandora.be> * * April, 2002 * * This table lets you choose between routing and bridging for frames * entering on a bridge enslaved nic. This table is traversed before any * other ebtables table. See net/bridge/br_input.c. */ #include <linux/netfilter_bridge/ebtables.h> #include <linux/module.h> #include <linux/if_bridge.h> #include "../br_private.h" /* EBT_ACCEPT means the frame will be bridged * EBT_DROP means the frame will be routed */ static struct ebt_entries initial_chain = { .name = "BROUTING", .policy = EBT_ACCEPT, }; static struct ebt_replace_kernel initial_table = { .name = "broute", .valid_hooks = 1 << NF_BR_BROUTING, .entries_size = sizeof(struct ebt_entries), .hook_entry = { [NF_BR_BROUTING] = &initial_chain, }, .entries = (char *)&initial_chain, }; static const struct ebt_table broute_table = { .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, .me = THIS_MODULE, }; static unsigned int ebt_broute(void *priv, struct sk_buff *skb, const struct nf_hook_state *s) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); struct nf_hook_state state; unsigned char *dest; int ret; if (!p || p->state != BR_STATE_FORWARDING) return NF_ACCEPT; nf_hook_state_init(&state, NF_BR_BROUTING, NFPROTO_BRIDGE, s->in, NULL, NULL, s->net, NULL); ret = ebt_do_table(priv, skb, &state); if (ret != NF_DROP) return ret; /* DROP in ebtables -t broute means that the * skb should be routed, not bridged. * This is awkward, but can't be changed for compatibility * reasons. * * We map DROP to ACCEPT and set the ->br_netfilter_broute flag. */ BR_INPUT_SKB_CB(skb)->br_netfilter_broute = 1; /* undo PACKET_HOST mangling done in br_input in case the dst * address matches the logical bridge but not the port. */ dest = eth_hdr(skb)->h_dest; if (skb->pkt_type == PACKET_HOST && !ether_addr_equal(skb->dev->dev_addr, dest) && ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_OTHERHOST; return NF_ACCEPT; } static const struct nf_hook_ops ebt_ops_broute = { .hook = ebt_broute, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_FIRST, }; static int broute_table_init(struct net *net) { return ebt_register_table(net, &broute_table, &ebt_ops_broute); } static void __net_exit broute_net_pre_exit(struct net *net) { ebt_unregister_table_pre_exit(net, "broute"); } static void __net_exit broute_net_exit(struct net *net) { ebt_unregister_table(net, "broute"); } static struct pernet_operations broute_net_ops = { .exit = broute_net_exit, .pre_exit = broute_net_pre_exit, }; static int __init ebtable_broute_init(void) { int ret = ebt_register_template(&broute_table, broute_table_init); if (ret) return ret; ret = register_pernet_subsys(&broute_net_ops); if (ret) { ebt_unregister_template(&broute_table); return ret; } return 0; } static void __exit ebtable_broute_fini(void) { unregister_pernet_subsys(&broute_net_ops); ebt_unregister_template(&broute_table); } module_init(ebtable_broute_init); module_exit(ebtable_broute_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Force packets to be routed instead of bridged");
120 34 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BTRFS_FS_H #define BTRFS_FS_H #include <linux/blkdev.h> #include <linux/sizes.h> #include <linux/time64.h> #include <linux/compiler.h> #include <linux/math.h> #include <linux/atomic.h> #include <linux/percpu_counter.h> #include <linux/completion.h> #include <linux/lockdep.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/rwlock_types.h> #include <linux/rwsem.h> #include <linux/semaphore.h> #include <linux/list.h> #include <linux/radix-tree.h> #include <linux/workqueue.h> #include <linux/wait.h> #include <linux/wait_bit.h> #include <linux/sched.h> #include <linux/rbtree.h> #include <uapi/linux/btrfs.h> #include <uapi/linux/btrfs_tree.h> #include "extent-io-tree.h" #include "async-thread.h" #include "block-rsv.h" struct inode; struct super_block; struct kobject; struct reloc_control; struct crypto_shash; struct ulist; struct btrfs_device; struct btrfs_block_group; struct btrfs_root; struct btrfs_fs_devices; struct btrfs_transaction; struct btrfs_delayed_root; struct btrfs_balance_control; struct btrfs_subpage_info; struct btrfs_stripe_hash_table; struct btrfs_space_info; #define BTRFS_MAX_EXTENT_SIZE SZ_128M #define BTRFS_OLDEST_GENERATION 0ULL #define BTRFS_EMPTY_DIR_SIZE 0 #define BTRFS_DIRTY_METADATA_THRESH SZ_32M #define BTRFS_SUPER_INFO_OFFSET SZ_64K #define BTRFS_SUPER_INFO_SIZE 4096 static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE); /* * Number of metadata items necessary for an unlink operation: * * 1 for the possible orphan item * 1 for the dir item * 1 for the dir index * 1 for the inode ref * 1 for the inode * 1 for the parent inode */ #define BTRFS_UNLINK_METADATA_UNITS 6 /* * The reserved space at the beginning of each device. It covers the primary * super block and leaves space for potential use by other tools like * bootloaders or to lower potential damage of accidental overwrite. */ #define BTRFS_DEVICE_RANGE_RESERVED (SZ_1M) /* * Runtime (in-memory) states of filesystem */ enum { /* * Filesystem is being remounted, allow to skip some operations, like * defrag */ BTRFS_FS_STATE_REMOUNTING, /* Filesystem in RO mode */ BTRFS_FS_STATE_RO, /* Track if a transaction abort has been reported on this filesystem */ BTRFS_FS_STATE_TRANS_ABORTED, /* * Bio operations should be blocked on this filesystem because a source * or target device is being destroyed as part of a device replace */ BTRFS_FS_STATE_DEV_REPLACING, /* The btrfs_fs_info created for self-tests */ BTRFS_FS_STATE_DUMMY_FS_INFO, /* Checksum errors are ignored. */ BTRFS_FS_STATE_NO_DATA_CSUMS, BTRFS_FS_STATE_SKIP_META_CSUMS, /* Indicates there was an error cleaning up a log tree. */ BTRFS_FS_STATE_LOG_CLEANUP_ERROR, BTRFS_FS_STATE_COUNT }; enum { BTRFS_FS_CLOSING_START, BTRFS_FS_CLOSING_DONE, BTRFS_FS_LOG_RECOVERING, BTRFS_FS_OPEN, BTRFS_FS_QUOTA_ENABLED, BTRFS_FS_UPDATE_UUID_TREE_GEN, BTRFS_FS_CREATING_FREE_SPACE_TREE, BTRFS_FS_BTREE_ERR, BTRFS_FS_LOG1_ERR, BTRFS_FS_LOG2_ERR, BTRFS_FS_QUOTA_OVERRIDE, /* Used to record internally whether fs has been frozen */ BTRFS_FS_FROZEN, /* * Indicate that balance has been set up from the ioctl and is in the * main phase. The fs_info::balance_ctl is initialized. */ BTRFS_FS_BALANCE_RUNNING, /* * Indicate that relocation of a chunk has started, it's set per chunk * and is toggled between chunks. */ BTRFS_FS_RELOC_RUNNING, /* Indicate that the cleaner thread is awake and doing something. */ BTRFS_FS_CLEANER_RUNNING, /* * The checksumming has an optimized version and is considered fast, * so we don't need to offload checksums to workqueues. */ BTRFS_FS_CSUM_IMPL_FAST, /* Indicate that the discard workqueue can service discards. */ BTRFS_FS_DISCARD_RUNNING, /* Indicate that we need to cleanup space cache v1 */ BTRFS_FS_CLEANUP_SPACE_CACHE_V1, /* Indicate that we can't trust the free space tree for caching yet */ BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, /* Indicate whether there are any tree modification log users */ BTRFS_FS_TREE_MOD_LOG_USERS, /* Indicate that we want the transaction kthread to commit right now. */ BTRFS_FS_COMMIT_TRANS, /* Indicate we have half completed snapshot deletions pending. */ BTRFS_FS_UNFINISHED_DROPS, /* Indicate we have to finish a zone to do next allocation. */ BTRFS_FS_NEED_ZONE_FINISH, /* Indicate that we want to commit the transaction. */ BTRFS_FS_NEED_TRANS_COMMIT, /* This is set when active zone tracking is needed. */ BTRFS_FS_ACTIVE_ZONE_TRACKING, /* * Indicate if we have some features changed, this is mostly for * cleaner thread to update the sysfs interface. */ BTRFS_FS_FEATURE_CHANGED, /* * Indicate that we have found a tree block which is only aligned to * sectorsize, but not to nodesize. This should be rare nowadays. */ BTRFS_FS_UNALIGNED_TREE_BLOCK, #if BITS_PER_LONG == 32 /* Indicate if we have error/warn message printed on 32bit systems */ BTRFS_FS_32BIT_ERROR, BTRFS_FS_32BIT_WARN, #endif }; /* * Flags for mount options. * * Note: don't forget to add new options to btrfs_show_options() */ enum { BTRFS_MOUNT_NODATASUM = (1ULL << 0), BTRFS_MOUNT_NODATACOW = (1ULL << 1), BTRFS_MOUNT_NOBARRIER = (1ULL << 2), BTRFS_MOUNT_SSD = (1ULL << 3), BTRFS_MOUNT_DEGRADED = (1ULL << 4), BTRFS_MOUNT_COMPRESS = (1ULL << 5), BTRFS_MOUNT_NOTREELOG = (1ULL << 6), BTRFS_MOUNT_FLUSHONCOMMIT = (1ULL << 7), BTRFS_MOUNT_SSD_SPREAD = (1ULL << 8), BTRFS_MOUNT_NOSSD = (1ULL << 9), BTRFS_MOUNT_DISCARD_SYNC = (1ULL << 10), BTRFS_MOUNT_FORCE_COMPRESS = (1ULL << 11), BTRFS_MOUNT_SPACE_CACHE = (1ULL << 12), BTRFS_MOUNT_CLEAR_CACHE = (1ULL << 13), BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED = (1ULL << 14), BTRFS_MOUNT_ENOSPC_DEBUG = (1ULL << 15), BTRFS_MOUNT_AUTO_DEFRAG = (1ULL << 16), BTRFS_MOUNT_USEBACKUPROOT = (1ULL << 17), BTRFS_MOUNT_SKIP_BALANCE = (1ULL << 18), BTRFS_MOUNT_PANIC_ON_FATAL_ERROR = (1ULL << 19), BTRFS_MOUNT_RESCAN_UUID_TREE = (1ULL << 20), BTRFS_MOUNT_FRAGMENT_DATA = (1ULL << 21), BTRFS_MOUNT_FRAGMENT_METADATA = (1ULL << 22), BTRFS_MOUNT_FREE_SPACE_TREE = (1ULL << 23), BTRFS_MOUNT_NOLOGREPLAY = (1ULL << 24), BTRFS_MOUNT_REF_VERIFY = (1ULL << 25), BTRFS_MOUNT_DISCARD_ASYNC = (1ULL << 26), BTRFS_MOUNT_IGNOREBADROOTS = (1ULL << 27), BTRFS_MOUNT_IGNOREDATACSUMS = (1ULL << 28), BTRFS_MOUNT_NODISCARD = (1ULL << 29), BTRFS_MOUNT_NOSPACECACHE = (1ULL << 30), BTRFS_MOUNT_IGNOREMETACSUMS = (1ULL << 31), BTRFS_MOUNT_IGNORESUPERFLAGS = (1ULL << 32), }; /* * Compat flags that we support. If any incompat flags are set other than the * ones specified below then we will fail to mount */ #define BTRFS_FEATURE_COMPAT_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL #define BTRFS_FEATURE_COMPAT_RO_SUPP \ (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \ BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \ BTRFS_FEATURE_COMPAT_RO_VERITY | \ BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE) #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL #define BTRFS_FEATURE_INCOMPAT_SUPP_STABLE \ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \ BTRFS_FEATURE_INCOMPAT_RAID56 | \ BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ BTRFS_FEATURE_INCOMPAT_NO_HOLES | \ BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \ BTRFS_FEATURE_INCOMPAT_RAID1C34 | \ BTRFS_FEATURE_INCOMPAT_ZONED | \ BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA) #ifdef CONFIG_BTRFS_DEBUG /* * Features under developmen like Extent tree v2 support is enabled * only under CONFIG_BTRFS_DEBUG. */ #define BTRFS_FEATURE_INCOMPAT_SUPP \ (BTRFS_FEATURE_INCOMPAT_SUPP_STABLE | \ BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE | \ BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2) #else #define BTRFS_FEATURE_INCOMPAT_SUPP \ (BTRFS_FEATURE_INCOMPAT_SUPP_STABLE) #endif #define BTRFS_FEATURE_INCOMPAT_SAFE_SET \ (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) #define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) #define BTRFS_DEFAULT_MAX_INLINE (2048) struct btrfs_dev_replace { /* See #define above */ u64 replace_state; /* Seconds since 1-Jan-1970 */ time64_t time_started; /* Seconds since 1-Jan-1970 */ time64_t time_stopped; atomic64_t num_write_errors; atomic64_t num_uncorrectable_read_errors; u64 cursor_left; u64 committed_cursor_left; u64 cursor_left_last_write_of_item; u64 cursor_right; /* See #define above */ u64 cont_reading_from_srcdev_mode; int is_valid; int item_needs_writeback; struct btrfs_device *srcdev; struct btrfs_device *tgtdev; struct mutex lock_finishing_cancel_unmount; struct rw_semaphore rwsem; struct btrfs_scrub_progress scrub_progress; struct percpu_counter bio_counter; wait_queue_head_t replace_wait; }; /* * Free clusters are used to claim free space in relatively large chunks, * allowing us to do less seeky writes. They are used for all metadata * allocations. In ssd_spread mode they are also used for data allocations. */ struct btrfs_free_cluster { spinlock_t lock; spinlock_t refill_lock; struct rb_root root; /* Largest extent in this cluster */ u64 max_size; /* First extent starting offset */ u64 window_start; /* We did a full search and couldn't create a cluster */ bool fragmented; struct btrfs_block_group *block_group; /* * When a cluster is allocated from a block group, we put the cluster * onto a list in the block group so that it can be freed before the * block group is freed. */ struct list_head block_group_list; }; /* Discard control. */ /* * Async discard uses multiple lists to differentiate the discard filter * parameters. Index 0 is for completely free block groups where we need to * ensure the entire block group is trimmed without being lossy. Indices * afterwards represent monotonically decreasing discard filter sizes to * prioritize what should be discarded next. */ #define BTRFS_NR_DISCARD_LISTS 3 #define BTRFS_DISCARD_INDEX_UNUSED 0 #define BTRFS_DISCARD_INDEX_START 1 struct btrfs_discard_ctl { struct workqueue_struct *discard_workers; struct delayed_work work; spinlock_t lock; struct btrfs_block_group *block_group; struct list_head discard_list[BTRFS_NR_DISCARD_LISTS]; u64 prev_discard; u64 prev_discard_time; atomic_t discardable_extents; atomic64_t discardable_bytes; u64 max_discard_size; u64 delay_ms; u32 iops_limit; u32 kbps_limit; u64 discard_extent_bytes; u64 discard_bitmap_bytes; atomic64_t discard_bytes_saved; }; /* * Exclusive operations (device replace, resize, device add/remove, balance) */ enum btrfs_exclusive_operation { BTRFS_EXCLOP_NONE, BTRFS_EXCLOP_BALANCE_PAUSED, BTRFS_EXCLOP_BALANCE, BTRFS_EXCLOP_DEV_ADD, BTRFS_EXCLOP_DEV_REMOVE, BTRFS_EXCLOP_DEV_REPLACE, BTRFS_EXCLOP_RESIZE, BTRFS_EXCLOP_SWAP_ACTIVATE, }; /* Store data about transaction commits, exported via sysfs. */ struct btrfs_commit_stats { /* Total number of commits */ u64 commit_count; /* The maximum commit duration so far in ns */ u64 max_commit_dur; /* The last commit duration in ns */ u64 last_commit_dur; /* The total commit duration in ns */ u64 total_commit_dur; }; struct btrfs_fs_info { u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; unsigned long flags; struct btrfs_root *tree_root; struct btrfs_root *chunk_root; struct btrfs_root *dev_root; struct btrfs_root *fs_root; struct btrfs_root *quota_root; struct btrfs_root *uuid_root; struct btrfs_root *data_reloc_root; struct btrfs_root *block_group_root; struct btrfs_root *stripe_root; /* The log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; /* The tree that holds the global roots (csum, extent, etc) */ rwlock_t global_root_lock; struct rb_root global_root_tree; spinlock_t fs_roots_radix_lock; struct radix_tree_root fs_roots_radix; /* Block group cache stuff */ rwlock_t block_group_cache_lock; struct rb_root_cached block_group_cache_tree; /* Keep track of unallocated space */ atomic64_t free_chunk_space; /* Track ranges which are used by log trees blocks/logged data extents */ struct extent_io_tree excluded_extents; /* logical->physical extent mapping */ struct rb_root_cached mapping_tree; rwlock_t mapping_tree_lock; /* * Block reservation for extent, checksum, root tree and delayed dir * index item. */ struct btrfs_block_rsv global_block_rsv; /* Block reservation for metadata operations */ struct btrfs_block_rsv trans_block_rsv; /* Block reservation for chunk tree */ struct btrfs_block_rsv chunk_block_rsv; /* Block reservation for delayed operations */ struct btrfs_block_rsv delayed_block_rsv; /* Block reservation for delayed refs */ struct btrfs_block_rsv delayed_refs_rsv; struct btrfs_block_rsv empty_block_rsv; /* * Updated while holding the lock 'trans_lock'. Due to the life cycle of * a transaction, it can be directly read while holding a transaction * handle, everywhere else must be read with btrfs_get_fs_generation(). * Should always be updated using btrfs_set_fs_generation(). */ u64 generation; /* * Always use btrfs_get_last_trans_committed() and * btrfs_set_last_trans_committed() to read and update this field. */ u64 last_trans_committed; /* * Generation of the last transaction used for block group relocation * since the filesystem was last mounted (or 0 if none happened yet). * Must be written and read while holding btrfs_fs_info::commit_root_sem. */ u64 last_reloc_trans; /* * This is updated to the current trans every time a full commit is * required instead of the faster short fsync log commits */ u64 last_trans_log_full_commit; unsigned long long mount_opt; unsigned long compress_type:4; unsigned int compress_level; u32 commit_interval; /* * It is a suggestive number, the read side is safe even it gets a * wrong number because we will write out the data into a regular * extent. The write side(mount/remount) is under ->s_umount lock, * so it is also safe. */ u64 max_inline; struct btrfs_transaction *running_transaction; wait_queue_head_t transaction_throttle; wait_queue_head_t transaction_wait; wait_queue_head_t transaction_blocked_wait; wait_queue_head_t async_submit_wait; /* * Used to protect the incompat_flags, compat_flags, compat_ro_flags * when they are updated. * * Because we do not clear the flags for ever, so we needn't use * the lock on the read side. * * We also needn't use the lock when we mount the fs, because * there is no other task which will update the flag. */ spinlock_t super_lock; struct btrfs_super_block *super_copy; struct btrfs_super_block *super_for_commit; struct super_block *sb; struct inode *btree_inode; struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; struct mutex chunk_mutex; /* * This is taken to make sure we don't set block groups ro after the * free space cache has been allocated on them. */ struct mutex ro_block_group_mutex; /* * This is used during read/modify/write to make sure no two ios are * trying to mod the same stripe at the same time. */ struct btrfs_stripe_hash_table *stripe_hash_table; /* * This protects the ordered operations list only while we are * processing all of the entries on it. This way we make sure the * commit code doesn't find the list temporarily empty because another * function happens to be doing non-waiting preflush before jumping * into the main commit. */ struct mutex ordered_operations_mutex; struct rw_semaphore commit_root_sem; struct rw_semaphore cleanup_work_sem; struct rw_semaphore subvol_sem; spinlock_t trans_lock; /* * The reloc mutex goes with the trans lock, it is taken during commit * to protect us from the relocation code. */ struct mutex reloc_mutex; struct list_head trans_list; struct list_head dead_roots; struct list_head caching_block_groups; spinlock_t delayed_iput_lock; struct list_head delayed_iputs; atomic_t nr_delayed_iputs; wait_queue_head_t delayed_iputs_wait; atomic64_t tree_mod_seq; /* This protects tree_mod_log and tree_mod_seq_list */ rwlock_t tree_mod_log_lock; struct rb_root tree_mod_log; struct list_head tree_mod_seq_list; atomic_t async_delalloc_pages; /* This is used to protect the following list -- ordered_roots. */ spinlock_t ordered_root_lock; /* * All fs/file tree roots in which there are data=ordered extents * pending writeback are added into this list. * * These can span multiple transactions and basically include every * dirty data page that isn't from nodatacow. */ struct list_head ordered_roots; struct mutex delalloc_root_mutex; spinlock_t delalloc_root_lock; /* All fs/file tree roots that have delalloc inodes. */ struct list_head delalloc_roots; /* * There is a pool of worker threads for checksumming during writes and * a pool for checksumming after reads. This is because readers can * run with FS locks held, and the writers may be waiting for those * locks. We don't want ordering in the pending list to cause * deadlocks, and so the two are serviced separately. * * A third pool does submit_bio to avoid deadlocking with the other two. */ struct btrfs_workqueue *workers; struct btrfs_workqueue *delalloc_workers; struct btrfs_workqueue *flush_workers; struct workqueue_struct *endio_workers; struct workqueue_struct *endio_meta_workers; struct workqueue_struct *rmw_workers; struct workqueue_struct *compressed_write_workers; struct btrfs_workqueue *endio_write_workers; struct btrfs_workqueue *endio_freespace_worker; struct btrfs_workqueue *caching_workers; /* * Fixup workers take dirty pages that didn't properly go through the * cow mechanism and make them safe to write. It happens for the * sys_munmap function call path. */ struct btrfs_workqueue *fixup_workers; struct btrfs_workqueue *delayed_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; u32 thread_pool_size; struct kobject *space_info_kobj; struct kobject *qgroups_kobj; struct kobject *discard_kobj; /* Used to keep from writing metadata until there is a nice batch */ struct percpu_counter dirty_metadata_bytes; struct percpu_counter delalloc_bytes; struct percpu_counter ordered_bytes; s32 dirty_metadata_batch; s32 delalloc_batch; struct percpu_counter evictable_extent_maps; spinlock_t extent_map_shrinker_lock; u64 extent_map_shrinker_last_root; u64 extent_map_shrinker_last_ino; /* Protected by 'trans_lock'. */ struct list_head dirty_cowonly_roots; struct btrfs_fs_devices *fs_devices; /* * The space_info list is effectively read only after initial setup. * It is populated at mount time and cleaned up after all block groups * are removed. RCU is used to protect it. */ struct list_head space_info; struct btrfs_space_info *data_sinfo; struct reloc_control *reloc_ctl; /* data_alloc_cluster is only used in ssd_spread mode */ struct btrfs_free_cluster data_alloc_cluster; /* All metadata allocations go through this cluster. */ struct btrfs_free_cluster meta_alloc_cluster; /* Auto defrag inodes go here. */ spinlock_t defrag_inodes_lock; struct rb_root defrag_inodes; atomic_t defrag_running; /* Used to protect avail_{data, metadata, system}_alloc_bits */ seqlock_t profiles_lock; /* * These three are in extended format (availability of single chunks is * denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other types are denoted * by corresponding BTRFS_BLOCK_GROUP_* bits) */ u64 avail_data_alloc_bits; u64 avail_metadata_alloc_bits; u64 avail_system_alloc_bits; /* Balance state */ spinlock_t balance_lock; struct mutex balance_mutex; atomic_t balance_pause_req; atomic_t balance_cancel_req; struct btrfs_balance_control *balance_ctl; wait_queue_head_t balance_wait_q; /* Cancellation requests for chunk relocation */ atomic_t reloc_cancel_req; u32 data_chunk_allocations; u32 metadata_ratio; void *bdev_holder; /* Private scrub information */ struct mutex scrub_lock; atomic_t scrubs_running; atomic_t scrub_pause_req; atomic_t scrubs_paused; atomic_t scrub_cancel_req; wait_queue_head_t scrub_pause_wait; /* * The worker pointers are NULL iff the refcount is 0, ie. scrub is not * running. */ refcount_t scrub_workers_refcnt; struct workqueue_struct *scrub_workers; struct btrfs_subpage_info *subpage_info; struct btrfs_discard_ctl discard_ctl; /* Is qgroup tracking in a consistent state? */ u64 qgroup_flags; /* Holds configuration and tracking. Protected by qgroup_lock. */ struct rb_root qgroup_tree; spinlock_t qgroup_lock; /* * Used to avoid frequently calling ulist_alloc()/ulist_free() * when doing qgroup accounting, it must be protected by qgroup_lock. */ struct ulist *qgroup_ulist; /* * Protect user change for quota operations. If a transaction is needed, * it must be started before locking this lock. */ struct mutex qgroup_ioctl_lock; /* List of dirty qgroups to be written at next commit. */ struct list_head dirty_qgroups; /* Used by qgroup for an efficient tree traversal. */ u64 qgroup_seq; /* Qgroup rescan items. */ /* Protects the progress item */ struct mutex qgroup_rescan_lock; struct btrfs_key qgroup_rescan_progress; struct btrfs_workqueue *qgroup_rescan_workers; struct completion qgroup_rescan_completion; struct btrfs_work qgroup_rescan_work; /* Protected by qgroup_rescan_lock */ bool qgroup_rescan_running; u8 qgroup_drop_subtree_thres; u64 qgroup_enable_gen; /* * If this is not 0, then it indicates a serious filesystem error has * happened and it contains that error (negative errno value). */ int fs_error; /* Filesystem state */ unsigned long fs_state; struct btrfs_delayed_root *delayed_root; /* Extent buffer radix tree */ spinlock_t buffer_lock; /* Entries are eb->start / sectorsize */ struct radix_tree_root buffer_radix; /* Next backup root to be overwritten */ int backup_root_index; /* Device replace state */ struct btrfs_dev_replace dev_replace; struct semaphore uuid_tree_rescan_sem; /* Used to reclaim the metadata space in the background. */ struct work_struct async_reclaim_work; struct work_struct async_data_reclaim_work; struct work_struct preempt_reclaim_work; /* Reclaim partially filled block groups in the background */ struct work_struct reclaim_bgs_work; /* Protected by unused_bgs_lock. */ struct list_head reclaim_bgs; int bg_reclaim_threshold; /* Protects the lists unused_bgs and reclaim_bgs. */ spinlock_t unused_bgs_lock; /* Protected by unused_bgs_lock. */ struct list_head unused_bgs; struct mutex unused_bg_unpin_mutex; /* Protect block groups that are going to be deleted */ struct mutex reclaim_bgs_lock; /* Cached block sizes */ u32 nodesize; u32 sectorsize; /* ilog2 of sectorsize, use to avoid 64bit division */ u32 sectorsize_bits; u32 csum_size; u32 csums_per_leaf; u32 stripesize; /* * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular * filesystem, on zoned it depends on the device constraints. */ u64 max_extent_size; /* Block groups and devices containing active swapfiles. */ spinlock_t swapfile_pins_lock; struct rb_root swapfile_pins; struct crypto_shash *csum_shash; /* Type of exclusive operation running, protected by super_lock */ enum btrfs_exclusive_operation exclusive_operation; /* * Zone size > 0 when in ZONED mode, otherwise it's used for a check * if the mode is enabled */ u64 zone_size; /* Constraints for ZONE_APPEND commands: */ struct queue_limits limits; u64 max_zone_append_size; struct mutex zoned_meta_io_lock; spinlock_t treelog_bg_lock; u64 treelog_bg; /* * Start of the dedicated data relocation block group, protected by * relocation_bg_lock. */ spinlock_t relocation_bg_lock; u64 data_reloc_bg; struct mutex zoned_data_reloc_io_lock; struct btrfs_block_group *active_meta_bg; struct btrfs_block_group *active_system_bg; u64 nr_global_roots; spinlock_t zone_active_bgs_lock; struct list_head zone_active_bgs; /* Updates are not protected by any lock */ struct btrfs_commit_stats commit_stats; /* * Last generation where we dropped a non-relocation root. * Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen() * to change it and to read it, respectively. */ u64 last_root_drop_gen; /* * Annotations for transaction events (structures are empty when * compiled without lockdep). */ struct lockdep_map btrfs_trans_num_writers_map; struct lockdep_map btrfs_trans_num_extwriters_map; struct lockdep_map btrfs_state_change_map[4]; struct lockdep_map btrfs_trans_pending_ordered_map; struct lockdep_map btrfs_ordered_extent_map; #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; struct rb_root block_tree; #endif #ifdef CONFIG_BTRFS_DEBUG struct kobject *debug_kobj; struct list_head allocated_roots; spinlock_t eb_leak_lock; struct list_head allocated_ebs; #endif }; #define page_to_inode(_page) (BTRFS_I(_Generic((_page), \ struct page *: (_page))->mapping->host)) #define folio_to_inode(_folio) (BTRFS_I(_Generic((_folio), \ struct folio *: (_folio))->mapping->host)) #define page_to_fs_info(_page) (page_to_inode(_page)->root->fs_info) #define folio_to_fs_info(_folio) (folio_to_inode(_folio)->root->fs_info) #define inode_to_fs_info(_inode) (BTRFS_I(_Generic((_inode), \ struct inode *: (_inode)))->root->fs_info) static inline u64 btrfs_get_fs_generation(const struct btrfs_fs_info *fs_info) { return READ_ONCE(fs_info->generation); } static inline void btrfs_set_fs_generation(struct btrfs_fs_info *fs_info, u64 gen) { WRITE_ONCE(fs_info->generation, gen); } static inline u64 btrfs_get_last_trans_committed(const struct btrfs_fs_info *fs_info) { return READ_ONCE(fs_info->last_trans_committed); } static inline void btrfs_set_last_trans_committed(struct btrfs_fs_info *fs_info, u64 gen) { WRITE_ONCE(fs_info->last_trans_committed, gen); } static inline void btrfs_set_last_root_drop_gen(struct btrfs_fs_info *fs_info, u64 gen) { WRITE_ONCE(fs_info->last_root_drop_gen, gen); } static inline u64 btrfs_get_last_root_drop_gen(const struct btrfs_fs_info *fs_info) { return READ_ONCE(fs_info->last_root_drop_gen); } /* * Take the number of bytes to be checksummed and figure out how many leaves * it would require to store the csums for that many bytes. */ static inline u64 btrfs_csum_bytes_to_leaves( const struct btrfs_fs_info *fs_info, u64 csum_bytes) { const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits; return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf); } /* * Use this if we would be adding new items, as we could split nodes as we cow * down the tree. */ static inline u64 btrfs_calc_insert_metadata_size(const struct btrfs_fs_info *fs_info, unsigned num_items) { return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; } /* * Doing a truncate or a modification won't result in new nodes or leaves, just * what we need for COW. */ static inline u64 btrfs_calc_metadata_size(const struct btrfs_fs_info *fs_info, unsigned num_items) { return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; } #define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \ sizeof(struct btrfs_item)) static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info) { return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && fs_info->zone_size > 0; } /* * Count how many fs_info->max_extent_size cover the @size */ static inline u32 count_max_extents(const struct btrfs_fs_info *fs_info, u64 size) { #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS if (!fs_info) return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); #endif return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size); } bool btrfs_exclop_start(struct btrfs_fs_info *fs_info, enum btrfs_exclusive_operation type); bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info, enum btrfs_exclusive_operation type); void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info); void btrfs_exclop_finish(struct btrfs_fs_info *fs_info); void btrfs_exclop_balance(struct btrfs_fs_info *fs_info, enum btrfs_exclusive_operation op); int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args); /* Compatibility and incompatibility defines */ void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, const char *name); void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, const char *name); void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, const char *name); void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, const char *name); #define __btrfs_fs_incompat(fs_info, flags) \ (!!(btrfs_super_incompat_flags((fs_info)->super_copy) & (flags))) #define __btrfs_fs_compat_ro(fs_info, flags) \ (!!(btrfs_super_compat_ro_flags((fs_info)->super_copy) & (flags))) #define btrfs_set_fs_incompat(__fs_info, opt) \ __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt) #define btrfs_clear_fs_incompat(__fs_info, opt) \ __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt) #define btrfs_fs_incompat(fs_info, opt) \ __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt) #define btrfs_set_fs_compat_ro(__fs_info, opt) \ __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt) #define btrfs_clear_fs_compat_ro(__fs_info, opt) \ __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt) #define btrfs_fs_compat_ro(fs_info, opt) \ __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) #define btrfs_test_opt(fs_info, opt) ((fs_info)->mount_opt & \ BTRFS_MOUNT_##opt) static inline int btrfs_fs_closing(const struct btrfs_fs_info *fs_info) { /* Do it this way so we only ever do one test_bit in the normal case. */ if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) { if (test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags)) return 2; return 1; } return 0; } /* * If we remount the fs to be R/O or umount the fs, the cleaner needn't do * anything except sleeping. This function is used to check the status of * the fs. * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount, * since setting and checking for SB_RDONLY in the superblock's flags is not * atomic. */ static inline int btrfs_need_cleaner_sleep(const struct btrfs_fs_info *fs_info) { return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) || btrfs_fs_closing(fs_info); } static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info) { clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags); } #define BTRFS_FS_ERROR(fs_info) (READ_ONCE((fs_info)->fs_error)) #define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \ (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \ &(fs_info)->fs_state))) #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS #define EXPORT_FOR_TESTS static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info) { return test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state); } void btrfs_test_destroy_inode(struct inode *inode); #else #define EXPORT_FOR_TESTS static static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info) { return 0; } #endif #endif
12 12 12 12 12 12 11 8 8 8 3 3 2 2 2 2 2 2 1 2 2 1 2 12 1 11 11 2 13 2 2 2 13 2 2 2 2 2 2 2 2 2 2 13 11 2 2 2 2 2 13 13 13 13 14 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/bio.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> #include <linux/statfs.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/kthread.h> #include <linux/delay.h> #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> #include <linux/time.h> #include <linux/wait.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/kernel.h> #include "gfs2.h" #include "incore.h" #include "bmap.h" #include "dir.h" #include "glock.h" #include "glops.h" #include "inode.h" #include "log.h" #include "meta_io.h" #include "quota.h" #include "recovery.h" #include "rgrp.h" #include "super.h" #include "trans.h" #include "util.h" #include "sys.h" #include "xattr.h" #include "lops.h" enum dinode_demise { SHOULD_DELETE_DINODE, SHOULD_NOT_DELETE_DINODE, SHOULD_DEFER_EVICTION, }; /** * gfs2_jindex_free - Clear all the journal index information * @sdp: The GFS2 superblock * */ void gfs2_jindex_free(struct gfs2_sbd *sdp) { struct list_head list; struct gfs2_jdesc *jd; spin_lock(&sdp->sd_jindex_spin); list_add(&list, &sdp->sd_jindex_list); list_del_init(&sdp->sd_jindex_list); sdp->sd_journals = 0; spin_unlock(&sdp->sd_jindex_spin); down_write(&sdp->sd_log_flush_lock); sdp->sd_jdesc = NULL; up_write(&sdp->sd_log_flush_lock); while (!list_empty(&list)) { jd = list_first_entry(&list, struct gfs2_jdesc, jd_list); BUG_ON(jd->jd_log_bio); gfs2_free_journal_extents(jd); list_del(&jd->jd_list); iput(jd->jd_inode); jd->jd_inode = NULL; kfree(jd); } } static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid) { struct gfs2_jdesc *jd; list_for_each_entry(jd, head, jd_list) { if (jd->jd_jid == jid) return jd; } return NULL; } struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid) { struct gfs2_jdesc *jd; spin_lock(&sdp->sd_jindex_spin); jd = jdesc_find_i(&sdp->sd_jindex_list, jid); spin_unlock(&sdp->sd_jindex_spin); return jd; } int gfs2_jdesc_check(struct gfs2_jdesc *jd) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); u64 size = i_size_read(jd->jd_inode); if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, BIT(30))) return -EIO; jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift; if (gfs2_write_alloc_required(ip, 0, size)) { gfs2_consist_inode(ip); return -EIO; } return 0; } /** * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one * @sdp: the filesystem * * Returns: errno */ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; struct gfs2_log_header_host head; int error; j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); if (gfs2_withdrawing_or_withdrawn(sdp)) return -EIO; error = gfs2_find_jhead(sdp->sd_jdesc, &head, false); if (error) { gfs2_consist(sdp); return error; } if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { gfs2_consist(sdp); return -EIO; } /* Initialize some head of the log stuff */ sdp->sd_log_sequence = head.lh_sequence + 1; gfs2_log_pointers_init(sdp, head.lh_blkno); error = gfs2_quota_init(sdp); if (!error && gfs2_withdrawing_or_withdrawn(sdp)) error = -EIO; if (!error) set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); return error; } void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) { const struct gfs2_statfs_change *str = buf; sc->sc_total = be64_to_cpu(str->sc_total); sc->sc_free = be64_to_cpu(str->sc_free); sc->sc_dinodes = be64_to_cpu(str->sc_dinodes); } void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf) { struct gfs2_statfs_change *str = buf; str->sc_total = cpu_to_be64(sc->sc_total); str->sc_free = cpu_to_be64(sc->sc_free); str->sc_dinodes = cpu_to_be64(sc->sc_dinodes); } int gfs2_statfs_init(struct gfs2_sbd *sdp) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; struct buffer_head *m_bh; struct gfs2_holder gh; int error; error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &gh); if (error) return error; error = gfs2_meta_inode_buffer(m_ip, &m_bh); if (error) goto out; if (sdp->sd_args.ar_spectator) { spin_lock(&sdp->sd_statfs_spin); gfs2_statfs_change_in(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); spin_unlock(&sdp->sd_statfs_spin); } else { spin_lock(&sdp->sd_statfs_spin); gfs2_statfs_change_in(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); gfs2_statfs_change_in(l_sc, sdp->sd_sc_bh->b_data + sizeof(struct gfs2_dinode)); spin_unlock(&sdp->sd_statfs_spin); } brelse(m_bh); out: gfs2_glock_dq_uninit(&gh); return 0; } void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, s64 dinodes) { struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; s64 x, y; int need_sync = 0; gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh); spin_lock(&sdp->sd_statfs_spin); l_sc->sc_total += total; l_sc->sc_free += free; l_sc->sc_dinodes += dinodes; gfs2_statfs_change_out(l_sc, sdp->sd_sc_bh->b_data + sizeof(struct gfs2_dinode)); if (sdp->sd_args.ar_statfs_percent) { x = 100 * l_sc->sc_free; y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent; if (x >= y || x <= -y) need_sync = 1; } spin_unlock(&sdp->sd_statfs_spin); if (need_sync) gfs2_wake_up_statfs(sdp); } void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh); gfs2_trans_add_meta(m_ip->i_gl, m_bh); spin_lock(&sdp->sd_statfs_spin); m_sc->sc_total += l_sc->sc_total; m_sc->sc_free += l_sc->sc_free; m_sc->sc_dinodes += l_sc->sc_dinodes; memset(l_sc, 0, sizeof(struct gfs2_statfs_change)); memset(sdp->sd_sc_bh->b_data + sizeof(struct gfs2_dinode), 0, sizeof(struct gfs2_statfs_change)); gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); spin_unlock(&sdp->sd_statfs_spin); } int gfs2_statfs_sync(struct super_block *sb, int type) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; struct gfs2_holder gh; struct buffer_head *m_bh; int error; error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &gh); if (error) goto out; error = gfs2_meta_inode_buffer(m_ip, &m_bh); if (error) goto out_unlock; spin_lock(&sdp->sd_statfs_spin); gfs2_statfs_change_in(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) { spin_unlock(&sdp->sd_statfs_spin); goto out_bh; } spin_unlock(&sdp->sd_statfs_spin); error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); if (error) goto out_bh; update_statfs(sdp, m_bh); sdp->sd_statfs_force_sync = 0; gfs2_trans_end(sdp); out_bh: brelse(m_bh); out_unlock: gfs2_glock_dq_uninit(&gh); out: return error; } struct lfcc { struct list_head list; struct gfs2_holder gh; }; /** * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all * journals are clean * @sdp: the file system * * Returns: errno */ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) { struct gfs2_inode *ip; struct gfs2_jdesc *jd; struct lfcc *lfcc; LIST_HEAD(list); struct gfs2_log_header_host lh; int error, error2; /* * Grab all the journal glocks in SH mode. We are *probably* doing * that to prevent recovery. */ list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL); if (!lfcc) { error = -ENOMEM; goto out; } ip = GFS2_I(jd->jd_inode); error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh); if (error) { kfree(lfcc); goto out; } list_add(&lfcc->list, &list); } gfs2_freeze_unlock(sdp); error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOPID, &sdp->sd_freeze_gh); if (error) goto relock_shared; list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { error = gfs2_jdesc_check(jd); if (error) break; error = gfs2_find_jhead(jd, &lh, false); if (error) break; if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { error = -EBUSY; break; } } if (!error) goto out; /* success */ gfs2_freeze_unlock(sdp); relock_shared: error2 = gfs2_freeze_lock_shared(sdp); gfs2_assert_withdraw(sdp, !error2); out: while (!list_empty(&list)) { lfcc = list_first_entry(&list, struct lfcc, list); list_del(&lfcc->list); gfs2_glock_dq_uninit(&lfcc->gh); kfree(lfcc); } return error; } void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) { const struct inode *inode = &ip->i_inode; struct gfs2_dinode *str = buf; str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); str->di_mode = cpu_to_be32(inode->i_mode); str->di_uid = cpu_to_be32(i_uid_read(inode)); str->di_gid = cpu_to_be32(i_gid_read(inode)); str->di_nlink = cpu_to_be32(inode->i_nlink); str->di_size = cpu_to_be64(i_size_read(inode)); str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode)); str->di_atime = cpu_to_be64(inode_get_atime_sec(inode)); str->di_mtime = cpu_to_be64(inode_get_mtime_sec(inode)); str->di_ctime = cpu_to_be64(inode_get_ctime_sec(inode)); str->di_goal_meta = cpu_to_be64(ip->i_goal); str->di_goal_data = cpu_to_be64(ip->i_goal); str->di_generation = cpu_to_be64(ip->i_generation); str->di_flags = cpu_to_be32(ip->i_diskflags); str->di_height = cpu_to_be16(ip->i_height); str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) && !(ip->i_diskflags & GFS2_DIF_EXHASH) ? GFS2_FORMAT_DE : 0); str->di_depth = cpu_to_be16(ip->i_depth); str->di_entries = cpu_to_be32(ip->i_entries); str->di_eattr = cpu_to_be64(ip->i_eattr); str->di_atime_nsec = cpu_to_be32(inode_get_atime_nsec(inode)); str->di_mtime_nsec = cpu_to_be32(inode_get_mtime_nsec(inode)); str->di_ctime_nsec = cpu_to_be32(inode_get_ctime_nsec(inode)); } /** * gfs2_write_inode - Make sure the inode is stable on the disk * @inode: The inode * @wbc: The writeback control structure * * Returns: errno */ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); struct backing_dev_info *bdi = inode_to_bdi(metamapping->host); int ret = 0; bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip)); if (flush_all) gfs2_log_flush(GFS2_SB(inode), ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_WRITE_INODE); if (bdi->wb.dirty_exceeded) gfs2_ail1_flush(sdp, wbc); else filemap_fdatawrite(metamapping); if (flush_all) ret = filemap_fdatawait(metamapping); if (ret) mark_inode_dirty_sync(inode); else { spin_lock(&inode->i_lock); if (!(inode->i_flags & I_DIRTY)) gfs2_ordered_del_inode(ip); spin_unlock(&inode->i_lock); } return ret; } /** * gfs2_dirty_inode - check for atime updates * @inode: The inode in question * @flags: The type of dirty * * Unfortunately it can be called under any combination of inode * glock and freeze glock, so we have to check carefully. * * At the moment this deals only with atime - it should be possible * to expand that role in future, once a review of the locking has * been carried out. */ static void gfs2_dirty_inode(struct inode *inode, int flags) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *bh; struct gfs2_holder gh; int need_unlock = 0; int need_endtrans = 0; int ret; if (unlikely(!ip->i_gl)) { /* This can only happen during incomplete inode creation. */ BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); return; } if (gfs2_withdrawing_or_withdrawn(sdp)) return; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) { fs_err(sdp, "dirty_inode: glock %d\n", ret); gfs2_dump_glock(NULL, ip->i_gl, true); return; } need_unlock = 1; } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE)) return; if (current->journal_info == NULL) { ret = gfs2_trans_begin(sdp, RES_DINODE, 0); if (ret) { fs_err(sdp, "dirty_inode: gfs2_trans_begin %d\n", ret); goto out; } need_endtrans = 1; } ret = gfs2_meta_inode_buffer(ip, &bh); if (ret == 0) { gfs2_trans_add_meta(ip->i_gl, bh); gfs2_dinode_out(ip, bh->b_data); brelse(bh); } if (need_endtrans) gfs2_trans_end(sdp); out: if (need_unlock) gfs2_glock_dq_uninit(&gh); } /** * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one * @sdp: the filesystem * * Returns: errno */ void gfs2_make_fs_ro(struct gfs2_sbd *sdp) { int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); if (!test_bit(SDF_KILL, &sdp->sd_flags)) gfs2_flush_delete_work(sdp); gfs2_destroy_threads(sdp); if (log_write_allowed) { gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); /* We do two log flushes here. The first one commits dirty inodes * and rgrps to the journal, but queues up revokes to the ail list. * The second flush writes out and removes the revokes. * * The first must be done before the FLUSH_SHUTDOWN code * clears the LIVE flag, otherwise it will not be able to start * a transaction to write its revokes, and the error will cause * a withdraw of the file system. */ gfs2_log_flush(sdp, NULL, GFS2_LFC_MAKE_FS_RO); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN | GFS2_LFC_MAKE_FS_RO); wait_event_timeout(sdp->sd_log_waitq, gfs2_log_is_empty(sdp), HZ * 5); gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp)); } gfs2_quota_cleanup(sdp); } /** * gfs2_put_super - Unmount the filesystem * @sb: The VFS superblock * */ static void gfs2_put_super(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_jdesc *jd; /* No more recovery requests */ set_bit(SDF_NORECOVERY, &sdp->sd_flags); smp_mb(); /* Wait on outstanding recovery */ restart: spin_lock(&sdp->sd_jindex_spin); list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { if (!test_bit(JDF_RECOVERY, &jd->jd_flags)) continue; spin_unlock(&sdp->sd_jindex_spin); wait_on_bit(&jd->jd_flags, JDF_RECOVERY, TASK_UNINTERRUPTIBLE); goto restart; } spin_unlock(&sdp->sd_jindex_spin); if (!sb_rdonly(sb)) gfs2_make_fs_ro(sdp); else { if (gfs2_withdrawing_or_withdrawn(sdp)) gfs2_destroy_threads(sdp); gfs2_quota_cleanup(sdp); } WARN_ON(gfs2_withdrawing(sdp)); /* At this point, we're through modifying the disk */ /* Release stuff */ gfs2_freeze_unlock(sdp); iput(sdp->sd_jindex); iput(sdp->sd_statfs_inode); iput(sdp->sd_rindex); iput(sdp->sd_quota_inode); gfs2_glock_put(sdp->sd_rename_gl); gfs2_glock_put(sdp->sd_freeze_gl); if (!sdp->sd_args.ar_spectator) { if (gfs2_holder_initialized(&sdp->sd_journal_gh)) gfs2_glock_dq_uninit(&sdp->sd_journal_gh); if (gfs2_holder_initialized(&sdp->sd_jinode_gh)) gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); brelse(sdp->sd_sc_bh); gfs2_glock_dq_uninit(&sdp->sd_sc_gh); gfs2_glock_dq_uninit(&sdp->sd_qc_gh); free_local_statfs_inodes(sdp); iput(sdp->sd_qc_inode); } gfs2_glock_dq_uninit(&sdp->sd_live_gh); gfs2_clear_rgrpd(sdp); gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); truncate_inode_pages_final(&sdp->sd_aspace); gfs2_delete_debugfs_file(sdp); gfs2_sys_fs_del(sdp); free_sbd(sdp); } /** * gfs2_sync_fs - sync the filesystem * @sb: the superblock * @wait: true to wait for completion * * Flushes the log to disk. */ static int gfs2_sync_fs(struct super_block *sb, int wait) { struct gfs2_sbd *sdp = sb->s_fs_info; gfs2_quota_sync(sb, -1); if (wait) gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_SYNC_FS); return sdp->sd_log_error; } static int gfs2_do_thaw(struct gfs2_sbd *sdp) { struct super_block *sb = sdp->sd_vfs; int error; error = gfs2_freeze_lock_shared(sdp); if (error) goto fail; error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); if (!error) return 0; fail: fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n", error); gfs2_assert_withdraw(sdp, 0); return error; } void gfs2_freeze_func(struct work_struct *work) { struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work); struct super_block *sb = sdp->sd_vfs; int error; mutex_lock(&sdp->sd_freeze_mutex); error = -EBUSY; if (test_bit(SDF_FROZEN, &sdp->sd_flags)) goto freeze_failed; error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); if (error) goto freeze_failed; gfs2_freeze_unlock(sdp); set_bit(SDF_FROZEN, &sdp->sd_flags); error = gfs2_do_thaw(sdp); if (error) goto out; clear_bit(SDF_FROZEN, &sdp->sd_flags); goto out; freeze_failed: fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error); out: mutex_unlock(&sdp->sd_freeze_mutex); deactivate_super(sb); } /** * gfs2_freeze_super - prevent further writes to the filesystem * @sb: the VFS structure for the filesystem * */ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who) { struct gfs2_sbd *sdp = sb->s_fs_info; int error; if (!mutex_trylock(&sdp->sd_freeze_mutex)) return -EBUSY; if (test_bit(SDF_FROZEN, &sdp->sd_flags)) { mutex_unlock(&sdp->sd_freeze_mutex); return -EBUSY; } for (;;) { error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); if (error) { fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error); goto out; } error = gfs2_lock_fs_check_clean(sdp); if (!error) { set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); set_bit(SDF_FROZEN, &sdp->sd_flags); break; } error = gfs2_do_thaw(sdp); if (error) goto out; if (error == -EBUSY) fs_err(sdp, "waiting for recovery before freeze\n"); else if (error == -EIO) { fs_err(sdp, "Fatal IO error: cannot freeze gfs2 due " "to recovery error.\n"); goto out; } else { fs_err(sdp, "error freezing FS: %d\n", error); } fs_err(sdp, "retrying...\n"); msleep(1000); } out: mutex_unlock(&sdp->sd_freeze_mutex); return error; } static int gfs2_freeze_fs(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | GFS2_LFC_FREEZE_GO_SYNC); if (gfs2_withdrawing_or_withdrawn(sdp)) return -EIO; } return 0; } /** * gfs2_thaw_super - reallow writes to the filesystem * @sb: the VFS structure for the filesystem * */ static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who) { struct gfs2_sbd *sdp = sb->s_fs_info; int error; if (!mutex_trylock(&sdp->sd_freeze_mutex)) return -EBUSY; if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) { mutex_unlock(&sdp->sd_freeze_mutex); return -EINVAL; } atomic_inc(&sb->s_active); gfs2_freeze_unlock(sdp); error = gfs2_do_thaw(sdp); if (!error) { clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); clear_bit(SDF_FROZEN, &sdp->sd_flags); } mutex_unlock(&sdp->sd_freeze_mutex); deactivate_super(sb); return error; } void gfs2_thaw_freeze_initiator(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; mutex_lock(&sdp->sd_freeze_mutex); if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) goto out; gfs2_freeze_unlock(sdp); out: mutex_unlock(&sdp->sd_freeze_mutex); } /** * statfs_slow_fill - fill in the sg for a given RG * @rgd: the RG * @sc: the sc structure * * Returns: 0 on success, -ESTALE if the LVB is invalid */ static int statfs_slow_fill(struct gfs2_rgrpd *rgd, struct gfs2_statfs_change_host *sc) { gfs2_rgrp_verify(rgd); sc->sc_total += rgd->rd_data; sc->sc_free += rgd->rd_free; sc->sc_dinodes += rgd->rd_dinodes; return 0; } /** * gfs2_statfs_slow - Stat a filesystem using asynchronous locking * @sdp: the filesystem * @sc: the sc info that will be returned * * Any error (other than a signal) will cause this routine to fall back * to the synchronous version. * * FIXME: This really shouldn't busy wait like this. * * Returns: errno */ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) { struct gfs2_rgrpd *rgd_next; struct gfs2_holder *gha, *gh; unsigned int slots = 64; unsigned int x; int done; int error = 0, err; memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); gha = kmalloc_array(slots, sizeof(struct gfs2_holder), GFP_KERNEL); if (!gha) return -ENOMEM; for (x = 0; x < slots; x++) gfs2_holder_mark_uninitialized(gha + x); rgd_next = gfs2_rgrpd_get_first(sdp); for (;;) { done = 1; for (x = 0; x < slots; x++) { gh = gha + x; if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) { err = gfs2_glock_wait(gh); if (err) { gfs2_holder_uninit(gh); error = err; } else { if (!error) { struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gh->gh_gl); error = statfs_slow_fill(rgd, sc); } gfs2_glock_dq_uninit(gh); } } if (gfs2_holder_initialized(gh)) done = 0; else if (rgd_next && !error) { error = gfs2_glock_nq_init(rgd_next->rd_gl, LM_ST_SHARED, GL_ASYNC, gh); rgd_next = gfs2_rgrpd_get_next(rgd_next); done = 0; } if (signal_pending(current)) error = -ERESTARTSYS; } if (done) break; yield(); } kfree(gha); return error; } /** * gfs2_statfs_i - Do a statfs * @sdp: the filesystem * @sc: the sc structure * * Returns: errno */ static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) { struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; spin_lock(&sdp->sd_statfs_spin); *sc = *m_sc; sc->sc_total += l_sc->sc_total; sc->sc_free += l_sc->sc_free; sc->sc_dinodes += l_sc->sc_dinodes; spin_unlock(&sdp->sd_statfs_spin); if (sc->sc_free < 0) sc->sc_free = 0; if (sc->sc_free > sc->sc_total) sc->sc_free = sc->sc_total; if (sc->sc_dinodes < 0) sc->sc_dinodes = 0; return 0; } /** * gfs2_statfs - Gather and return stats about the filesystem * @dentry: The name of the link * @buf: The buffer * * Returns: 0 on success or error code */ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_statfs_change_host sc; int error; error = gfs2_rindex_update(sdp); if (error) return error; if (gfs2_tune_get(sdp, gt_statfs_slow)) error = gfs2_statfs_slow(sdp, &sc); else error = gfs2_statfs_i(sdp, &sc); if (error) return error; buf->f_type = GFS2_MAGIC; buf->f_bsize = sdp->sd_sb.sb_bsize; buf->f_blocks = sc.sc_total; buf->f_bfree = sc.sc_free; buf->f_bavail = sc.sc_free; buf->f_files = sc.sc_dinodes + sc.sc_free; buf->f_ffree = sc.sc_free; buf->f_namelen = GFS2_FNAMESIZE; buf->f_fsid = uuid_to_fsid(sb->s_uuid.b); return 0; } /** * gfs2_drop_inode - Drop an inode (test for remote unlink) * @inode: The inode to drop * * If we've received a callback on an iopen lock then it's because a * remote node tried to deallocate the inode but failed due to this node * still having the inode open. Here we mark the link count zero * since we know that it must have reached zero if the GLF_DEMOTE flag * is set on the iopen glock. If we didn't do a disk read since the * remote node removed the final link then we might otherwise miss * this event. This check ensures that this node will deallocate the * inode's blocks, or alternatively pass the baton on to another * node for later deallocation. */ static int gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); if (inode->i_nlink && gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; if (test_bit(GLF_DEMOTE, &gl->gl_flags)) clear_nlink(inode); } /* * When under memory pressure when an inode's link count has dropped to * zero, defer deleting the inode to the delete workqueue. This avoids * calling into DLM under memory pressure, which can deadlock. */ if (!inode->i_nlink && unlikely(current->flags & PF_MEMALLOC) && gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; gfs2_glock_hold(gl); if (!gfs2_queue_try_to_evict(gl)) gfs2_glock_put_async(gl); return 0; } /* * No longer cache inodes when trying to evict them all. */ if (test_bit(SDF_EVICTING, &sdp->sd_flags)) return 1; return generic_drop_inode(inode); } /** * gfs2_show_options - Show mount options for /proc/mounts * @s: seq_file structure * @root: root of this (sub)tree * * Returns: 0 on success or error code */ static int gfs2_show_options(struct seq_file *s, struct dentry *root) { struct gfs2_sbd *sdp = root->d_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; unsigned int logd_secs, statfs_slow, statfs_quantum, quota_quantum; spin_lock(&sdp->sd_tune.gt_spin); logd_secs = sdp->sd_tune.gt_logd_secs; quota_quantum = sdp->sd_tune.gt_quota_quantum; statfs_quantum = sdp->sd_tune.gt_statfs_quantum; statfs_slow = sdp->sd_tune.gt_statfs_slow; spin_unlock(&sdp->sd_tune.gt_spin); if (is_subdir(root, sdp->sd_master_dir)) seq_puts(s, ",meta"); if (args->ar_lockproto[0]) seq_show_option(s, "lockproto", args->ar_lockproto); if (args->ar_locktable[0]) seq_show_option(s, "locktable", args->ar_locktable); if (args->ar_hostdata[0]) seq_show_option(s, "hostdata", args->ar_hostdata); if (args->ar_spectator) seq_puts(s, ",spectator"); if (args->ar_localflocks) seq_puts(s, ",localflocks"); if (args->ar_debug) seq_puts(s, ",debug"); if (args->ar_posix_acl) seq_puts(s, ",acl"); if (args->ar_quota != GFS2_QUOTA_DEFAULT) { char *state; switch (args->ar_quota) { case GFS2_QUOTA_OFF: state = "off"; break; case GFS2_QUOTA_ACCOUNT: state = "account"; break; case GFS2_QUOTA_ON: state = "on"; break; case GFS2_QUOTA_QUIET: state = "quiet"; break; default: state = "unknown"; break; } seq_printf(s, ",quota=%s", state); } if (args->ar_suiddir) seq_puts(s, ",suiddir"); if (args->ar_data != GFS2_DATA_DEFAULT) { char *state; switch (args->ar_data) { case GFS2_DATA_WRITEBACK: state = "writeback"; break; case GFS2_DATA_ORDERED: state = "ordered"; break; default: state = "unknown"; break; } seq_printf(s, ",data=%s", state); } if (args->ar_discard) seq_puts(s, ",discard"); if (logd_secs != 30) seq_printf(s, ",commit=%d", logd_secs); if (statfs_quantum != 30) seq_printf(s, ",statfs_quantum=%d", statfs_quantum); else if (statfs_slow) seq_puts(s, ",statfs_quantum=0"); if (quota_quantum != 60) seq_printf(s, ",quota_quantum=%d", quota_quantum); if (args->ar_statfs_percent) seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent); if (args->ar_errors != GFS2_ERRORS_DEFAULT) { const char *state; switch (args->ar_errors) { case GFS2_ERRORS_WITHDRAW: state = "withdraw"; break; case GFS2_ERRORS_PANIC: state = "panic"; break; default: state = "unknown"; break; } seq_printf(s, ",errors=%s", state); } if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) seq_puts(s, ",nobarrier"); if (test_bit(SDF_DEMOTE, &sdp->sd_flags)) seq_puts(s, ",demote_interface_used"); if (args->ar_rgrplvb) seq_puts(s, ",rgrplvb"); if (args->ar_loccookie) seq_puts(s, ",loccookie"); return 0; } static void gfs2_final_release_pages(struct gfs2_inode *ip) { struct inode *inode = &ip->i_inode; struct gfs2_glock *gl = ip->i_gl; if (unlikely(!gl)) { /* This can only happen during incomplete inode creation. */ BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); return; } truncate_inode_pages(gfs2_glock2aspace(gl), 0); truncate_inode_pages(&inode->i_data, 0); if (atomic_read(&gl->gl_revokes) == 0) { clear_bit(GLF_LFLUSH, &gl->gl_flags); clear_bit(GLF_DIRTY, &gl->gl_flags); } } static int gfs2_dinode_dealloc(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; struct gfs2_holder gh; int error; if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { gfs2_consist_inode(ip); return -EIO; } gfs2_rindex_update(sdp); error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (error) return error; rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); if (!rgd) { gfs2_consist_inode(ip); error = -EIO; goto out_qs; } error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, &gh); if (error) goto out_qs; error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, sdp->sd_jdesc->jd_blocks); if (error) goto out_rg_gunlock; gfs2_free_di(rgd, ip); gfs2_final_release_pages(ip); gfs2_trans_end(sdp); out_rg_gunlock: gfs2_glock_dq_uninit(&gh); out_qs: gfs2_quota_unhold(ip); return error; } /** * gfs2_glock_put_eventually * @gl: The glock to put * * When under memory pressure, trigger a deferred glock put to make sure we * won't call into DLM and deadlock. Otherwise, put the glock directly. */ static void gfs2_glock_put_eventually(struct gfs2_glock *gl) { if (current->flags & PF_MEMALLOC) gfs2_glock_put_async(gl); else gfs2_glock_put(gl); } static bool gfs2_upgrade_iopen_glock(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_holder *gh = &ip->i_iopen_gh; int error; gh->gh_flags |= GL_NOCACHE; gfs2_glock_dq_wait(gh); /* * If there are no other lock holders, we will immediately get * exclusive access to the iopen glock here. * * Otherwise, the other nodes holding the lock will be notified about * our locking request. If they do not have the inode open, they are * expected to evict the cached inode and release the lock, allowing us * to proceed. * * Otherwise, if they cannot evict the inode, they are expected to poke * the inode glock (note: not the iopen glock). We will notice that * and stop waiting for the iopen glock immediately. The other node(s) * are then expected to take care of deleting the inode when they no * longer use it. * * As a last resort, if another node keeps holding the iopen glock * without showing any activity on the inode glock, we will eventually * time out and fail the iopen glock upgrade. */ gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh); error = gfs2_glock_nq(gh); if (error) return false; wait_event_interruptible_timeout(sdp->sd_async_glock_wait, !test_bit(HIF_WAIT, &gh->gh_iflags) || test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags), 5 * HZ); if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) { gfs2_glock_dq(gh); return false; } return gfs2_glock_holder_ready(gh) == 0; } /** * evict_should_delete - determine whether the inode is eligible for deletion * @inode: The inode to evict * @gh: The glock holder structure * * This function determines whether the evicted inode is eligible to be deleted * and locks the inode glock. * * Returns: the fate of the dinode */ static enum dinode_demise evict_should_delete(struct inode *inode, struct gfs2_holder *gh) { struct gfs2_inode *ip = GFS2_I(inode); struct super_block *sb = inode->i_sb; struct gfs2_sbd *sdp = sb->s_fs_info; int ret; if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) goto should_delete; if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags)) return SHOULD_DEFER_EVICTION; /* Deletes should never happen under memory pressure anymore. */ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) return SHOULD_DEFER_EVICTION; /* Must not read inode block until block type has been verified */ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh); if (unlikely(ret)) { glock_clear_object(ip->i_iopen_gh.gh_gl, ip); ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&ip->i_iopen_gh); return SHOULD_DEFER_EVICTION; } if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino)) return SHOULD_NOT_DELETE_DINODE; ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED); if (ret) return SHOULD_NOT_DELETE_DINODE; ret = gfs2_instantiate(gh); if (ret) return SHOULD_NOT_DELETE_DINODE; /* * The inode may have been recreated in the meantime. */ if (inode->i_nlink) return SHOULD_NOT_DELETE_DINODE; should_delete: if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { if (!gfs2_upgrade_iopen_glock(inode)) { gfs2_holder_uninit(&ip->i_iopen_gh); return SHOULD_NOT_DELETE_DINODE; } } return SHOULD_DELETE_DINODE; } /** * evict_unlinked_inode - delete the pieces of an unlinked evicted inode * @inode: The inode to evict */ static int evict_unlinked_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); int ret; if (S_ISDIR(inode->i_mode) && (ip->i_diskflags & GFS2_DIF_EXHASH)) { ret = gfs2_dir_exhash_dealloc(ip); if (ret) goto out; } if (ip->i_eattr) { ret = gfs2_ea_dealloc(ip); if (ret) goto out; } if (!gfs2_is_stuffed(ip)) { ret = gfs2_file_dealloc(ip); if (ret) goto out; } /* * As soon as we clear the bitmap for the dinode, gfs2_create_inode() * can get called to recreate it, or even gfs2_inode_lookup() if the * inode was recreated on another node in the meantime. * * However, inserting the new inode into the inode hash table will not * succeed until the old inode is removed, and that only happens after * ->evict_inode() returns. The new inode is attached to its inode and * iopen glocks after inserting it into the inode hash table, so at * that point we can be sure that both glocks are unused. */ ret = gfs2_dinode_dealloc(ip); if (!ret && ip->i_gl) gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); out: return ret; } /* * evict_linked_inode - evict an inode whose dinode has not been unlinked * @inode: The inode to evict */ static int evict_linked_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(inode); struct address_space *metamapping; int ret; gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_EVICT_INODE); metamapping = gfs2_glock2aspace(ip->i_gl); if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { filemap_fdatawrite(metamapping); filemap_fdatawait(metamapping); } write_inode_now(inode, 1); gfs2_ail_flush(ip->i_gl, 0); ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); if (ret) return ret; /* Needs to be done before glock release & also in a transaction */ truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(metamapping, 0); gfs2_trans_end(sdp); return 0; } /** * gfs2_evict_inode - Remove an inode from cache * @inode: The inode to evict * * There are three cases to consider: * 1. i_nlink == 0, we are final opener (and must deallocate) * 2. i_nlink == 0, we are not the final opener (and cannot deallocate) * 3. i_nlink > 0 * * If the fs is read only, then we have to treat all cases as per #3 * since we are unable to do any deallocation. The inode will be * deallocated by the next read/write node to attempt an allocation * in the same resource group * * We have to (at the moment) hold the inodes main lock to cover * the gap between unlocking the shared lock on the iopen lock and * taking the exclusive lock. I'd rather do a shared -> exclusive * conversion on the iopen lock, but we can change that later. This * is safe, just less efficient. */ static void gfs2_evict_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int ret; if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) goto out; /* * In case of an incomplete mount, gfs2_evict_inode() may be called for * system files without having an active journal to write to. In that * case, skip the filesystem evict. */ if (!sdp->sd_jdesc) goto out; gfs2_holder_mark_uninitialized(&gh); ret = evict_should_delete(inode, &gh); if (ret == SHOULD_DEFER_EVICTION) goto out; if (ret == SHOULD_DELETE_DINODE) ret = evict_unlinked_inode(inode); else ret = evict_linked_inode(inode); if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); if (gfs2_holder_initialized(&gh)) gfs2_glock_dq_uninit(&gh); if (ret && ret != GLR_TRYFAILED && ret != -EROFS) fs_warn(sdp, "gfs2_evict_inode: %d\n", ret); out: truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); gfs2_rs_deltree(&ip->i_res); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); if (gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; glock_clear_object(gl, ip); gfs2_glock_hold(gl); ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&ip->i_iopen_gh); gfs2_glock_put_eventually(gl); } if (ip->i_gl) { glock_clear_object(ip->i_gl, ip); wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); gfs2_glock_put_eventually(ip->i_gl); rcu_assign_pointer(ip->i_gl, NULL); } } static struct inode *gfs2_alloc_inode(struct super_block *sb) { struct gfs2_inode *ip; ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL); if (!ip) return NULL; ip->i_no_addr = 0; ip->i_flags = 0; ip->i_gl = NULL; gfs2_holder_mark_uninitialized(&ip->i_iopen_gh); memset(&ip->i_res, 0, sizeof(ip->i_res)); RB_CLEAR_NODE(&ip->i_res.rs_node); ip->i_rahead = 0; return &ip->i_inode; } static void gfs2_free_inode(struct inode *inode) { kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode)); } void free_local_statfs_inodes(struct gfs2_sbd *sdp) { struct local_statfs_inode *lsi, *safe; /* Run through the statfs inodes list to iput and free memory */ list_for_each_entry_safe(lsi, safe, &sdp->sd_sc_inodes_list, si_list) { if (lsi->si_jid == sdp->sd_jdesc->jd_jid) sdp->sd_sc_inode = NULL; /* belongs to this node */ if (lsi->si_sc_inode) iput(lsi->si_sc_inode); list_del(&lsi->si_list); kfree(lsi); } } struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, unsigned int index) { struct local_statfs_inode *lsi; /* Return the local (per node) statfs inode in the * sdp->sd_sc_inodes_list corresponding to the 'index'. */ list_for_each_entry(lsi, &sdp->sd_sc_inodes_list, si_list) { if (lsi->si_jid == index) return lsi->si_sc_inode; } return NULL; } const struct super_operations gfs2_super_ops = { .alloc_inode = gfs2_alloc_inode, .free_inode = gfs2_free_inode, .write_inode = gfs2_write_inode, .dirty_inode = gfs2_dirty_inode, .evict_inode = gfs2_evict_inode, .put_super = gfs2_put_super, .sync_fs = gfs2_sync_fs, .freeze_super = gfs2_freeze_super, .freeze_fs = gfs2_freeze_fs, .thaw_super = gfs2_thaw_super, .statfs = gfs2_statfs, .drop_inode = gfs2_drop_inode, .show_options = gfs2_show_options, };
17 18 39 39 15 1 15 15 15 6 5917 5912 5929 5913 6 5929 3414 3386 48 8 3411 45 8 9 28 12 26 4 19 10616 1396 11996 753 4546 4494 222 15 15 15 26 26 14 2996 84 88 87 90 29 88 45 45 25 1 24 1 23 12 12 12 20 4 15 12 1 6 16 9 9 9 13 12 13 15 2 1 1 1 1 1 1 2 1 2 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* * Copyright (C) 2017-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005 * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved. * * This driver produces cryptographically secure pseudorandom data. It is divided * into roughly six sections, each with a section header: * * - Initialization and readiness waiting. * - Fast key erasure RNG, the "crng". * - Entropy accumulation and extraction routines. * - Entropy collection routines. * - Userspace reader/writer interfaces. * - Sysctl interface. * * The high level overview is that there is one input pool, into which * various pieces of data are hashed. Prior to initialization, some of that * data is then "credited" as having a certain number of bits of entropy. * When enough bits of entropy are available, the hash is finalized and * handed as a key to a stream cipher that expands it indefinitely for * various consumers. This key is periodically refreshed as the various * entropy collectors, described below, add data to the input pool. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/utsname.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/string.h> #include <linux/fcntl.h> #include <linux/slab.h> #include <linux/random.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/interrupt.h> #include <linux/mm.h> #include <linux/nodemask.h> #include <linux/spinlock.h> #include <linux/kthread.h> #include <linux/percpu.h> #include <linux/ptrace.h> #include <linux/workqueue.h> #include <linux/irq.h> #include <linux/ratelimit.h> #include <linux/syscalls.h> #include <linux/completion.h> #include <linux/uuid.h> #include <linux/uaccess.h> #include <linux/suspend.h> #include <linux/siphash.h> #include <linux/sched/isolation.h> #include <crypto/chacha.h> #include <crypto/blake2s.h> #ifdef CONFIG_VDSO_GETRANDOM #include <vdso/getrandom.h> #include <vdso/datapage.h> #endif #include <asm/archrandom.h> #include <asm/processor.h> #include <asm/irq.h> #include <asm/irq_regs.h> #include <asm/io.h> /********************************************************************* * * Initialization and readiness waiting. * * Much of the RNG infrastructure is devoted to various dependencies * being able to wait until the RNG has collected enough entropy and * is ready for safe consumption. * *********************************************************************/ /* * crng_init is protected by base_crng->lock, and only increases * its value (from empty->early->ready). */ static enum { CRNG_EMPTY = 0, /* Little to no entropy collected */ CRNG_EARLY = 1, /* At least POOL_EARLY_BITS collected */ CRNG_READY = 2 /* Fully initialized with POOL_READY_BITS collected */ } crng_init __read_mostly = CRNG_EMPTY; static DEFINE_STATIC_KEY_FALSE(crng_is_ready); #define crng_ready() (static_branch_likely(&crng_is_ready) || crng_init >= CRNG_READY) /* Various types of waiters for crng_init->CRNG_READY transition. */ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); static struct fasync_struct *fasync; static ATOMIC_NOTIFIER_HEAD(random_ready_notifier); /* Control how we warn userspace. */ static struct ratelimit_state urandom_warning = RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE); static int ratelimit_disable __read_mostly = IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); /* * Returns whether or not the input pool has been seeded and thus guaranteed * to supply cryptographically secure random numbers. This applies to: the * /dev/urandom device, the get_random_bytes function, and the get_random_{u8, * u16,u32,u64,long} family of functions. * * Returns: true if the input pool has been seeded. * false if the input pool has not been seeded. */ bool rng_is_initialized(void) { return crng_ready(); } EXPORT_SYMBOL(rng_is_initialized); static void __cold crng_set_ready(struct work_struct *work) { static_branch_enable(&crng_is_ready); } /* Used by wait_for_random_bytes(), and considered an entropy collector, below. */ static void try_to_generate_entropy(void); /* * Wait for the input pool to be seeded and thus guaranteed to supply * cryptographically secure random numbers. This applies to: the /dev/urandom * device, the get_random_bytes function, and the get_random_{u8,u16,u32,u64, * long} family of functions. Using any of these functions without first * calling this function forfeits the guarantee of security. * * Returns: 0 if the input pool has been seeded. * -ERESTARTSYS if the function was interrupted by a signal. */ int wait_for_random_bytes(void) { while (!crng_ready()) { int ret; try_to_generate_entropy(); ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); if (ret) return ret > 0 ? 0 : ret; } return 0; } EXPORT_SYMBOL(wait_for_random_bytes); /* * Add a callback function that will be invoked when the crng is initialised, * or immediately if it already has been. Only use this is you are absolutely * sure it is required. Most users should instead be able to test * `rng_is_initialized()` on demand, or make use of `get_random_bytes_wait()`. */ int __cold execute_with_initialized_rng(struct notifier_block *nb) { unsigned long flags; int ret = 0; spin_lock_irqsave(&random_ready_notifier.lock, flags); if (crng_ready()) nb->notifier_call(nb, 0, NULL); else ret = raw_notifier_chain_register((struct raw_notifier_head *)&random_ready_notifier.head, nb); spin_unlock_irqrestore(&random_ready_notifier.lock, flags); return ret; } #define warn_unseeded_randomness() \ if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \ printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \ __func__, (void *)_RET_IP_, crng_init) /********************************************************************* * * Fast key erasure RNG, the "crng". * * These functions expand entropy from the entropy extractor into * long streams for external consumption using the "fast key erasure" * RNG described at <https://blog.cr.yp.to/20170723-random.html>. * * There are a few exported interfaces for use by other drivers: * * void get_random_bytes(void *buf, size_t len) * u8 get_random_u8() * u16 get_random_u16() * u32 get_random_u32() * u32 get_random_u32_below(u32 ceil) * u32 get_random_u32_above(u32 floor) * u32 get_random_u32_inclusive(u32 floor, u32 ceil) * u64 get_random_u64() * unsigned long get_random_long() * * These interfaces will return the requested number of random bytes * into the given buffer or as a return value. This is equivalent to * a read from /dev/urandom. The u8, u16, u32, u64, long family of * functions may be higher performance for one-off random integers, * because they do a bit of buffering and do not invoke reseeding * until the buffer is emptied. * *********************************************************************/ enum { CRNG_RESEED_START_INTERVAL = HZ, CRNG_RESEED_INTERVAL = 60 * HZ }; static struct { u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long)); unsigned long generation; spinlock_t lock; } base_crng = { .lock = __SPIN_LOCK_UNLOCKED(base_crng.lock) }; struct crng { u8 key[CHACHA_KEY_SIZE]; unsigned long generation; local_lock_t lock; }; static DEFINE_PER_CPU(struct crng, crngs) = { .generation = ULONG_MAX, .lock = INIT_LOCAL_LOCK(crngs.lock), }; /* * Return the interval until the next reseeding, which is normally * CRNG_RESEED_INTERVAL, but during early boot, it is at an interval * proportional to the uptime. */ static unsigned int crng_reseed_interval(void) { static bool early_boot = true; if (unlikely(READ_ONCE(early_boot))) { time64_t uptime = ktime_get_seconds(); if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2) WRITE_ONCE(early_boot, false); else return max_t(unsigned int, CRNG_RESEED_START_INTERVAL, (unsigned int)uptime / 2 * HZ); } return CRNG_RESEED_INTERVAL; } /* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */ static void extract_entropy(void *buf, size_t len); /* This extracts a new crng key from the input pool. */ static void crng_reseed(struct work_struct *work) { static DECLARE_DELAYED_WORK(next_reseed, crng_reseed); unsigned long flags; unsigned long next_gen; u8 key[CHACHA_KEY_SIZE]; /* Immediately schedule the next reseeding, so that it fires sooner rather than later. */ if (likely(system_unbound_wq)) queue_delayed_work(system_unbound_wq, &next_reseed, crng_reseed_interval()); extract_entropy(key, sizeof(key)); /* * We copy the new key into the base_crng, overwriting the old one, * and update the generation counter. We avoid hitting ULONG_MAX, * because the per-cpu crngs are initialized to ULONG_MAX, so this * forces new CPUs that come online to always initialize. */ spin_lock_irqsave(&base_crng.lock, flags); memcpy(base_crng.key, key, sizeof(base_crng.key)); next_gen = base_crng.generation + 1; if (next_gen == ULONG_MAX) ++next_gen; WRITE_ONCE(base_crng.generation, next_gen); #ifdef CONFIG_VDSO_GETRANDOM /* base_crng.generation's invalid value is ULONG_MAX, while * _vdso_rng_data.generation's invalid value is 0, so add one to the * former to arrive at the latter. Use smp_store_release so that this * is ordered with the write above to base_crng.generation. Pairs with * the smp_rmb() before the syscall in the vDSO code. */ smp_store_release(&_vdso_rng_data.generation, next_gen + 1); #endif if (!static_branch_likely(&crng_is_ready)) crng_init = CRNG_READY; spin_unlock_irqrestore(&base_crng.lock, flags); memzero_explicit(key, sizeof(key)); } /* * This generates a ChaCha block using the provided key, and then * immediately overwrites that key with half the block. It returns * the resultant ChaCha state to the user, along with the second * half of the block containing 32 bytes of random data that may * be used; random_data_len may not be greater than 32. * * The returned ChaCha state contains within it a copy of the old * key value, at index 4, so the state should always be zeroed out * immediately after using in order to maintain forward secrecy. * If the state cannot be erased in a timely manner, then it is * safer to set the random_data parameter to &chacha_state[4] so * that this function overwrites it before returning. */ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], u32 chacha_state[CHACHA_STATE_WORDS], u8 *random_data, size_t random_data_len) { u8 first_block[CHACHA_BLOCK_SIZE]; BUG_ON(random_data_len > 32); chacha_init_consts(chacha_state); memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE); memset(&chacha_state[12], 0, sizeof(u32) * 4); chacha20_block(chacha_state, first_block); memcpy(key, first_block, CHACHA_KEY_SIZE); memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); memzero_explicit(first_block, sizeof(first_block)); } /* * This function returns a ChaCha state that you may use for generating * random data. It also returns up to 32 bytes on its own of random data * that may be used; random_data_len may not be greater than 32. */ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], u8 *random_data, size_t random_data_len) { unsigned long flags; struct crng *crng; BUG_ON(random_data_len > 32); /* * For the fast path, we check whether we're ready, unlocked first, and * then re-check once locked later. In the case where we're really not * ready, we do fast key erasure with the base_crng directly, extracting * when crng_init is CRNG_EMPTY. */ if (!crng_ready()) { bool ready; spin_lock_irqsave(&base_crng.lock, flags); ready = crng_ready(); if (!ready) { if (crng_init == CRNG_EMPTY) extract_entropy(base_crng.key, sizeof(base_crng.key)); crng_fast_key_erasure(base_crng.key, chacha_state, random_data, random_data_len); } spin_unlock_irqrestore(&base_crng.lock, flags); if (!ready) return; } local_lock_irqsave(&crngs.lock, flags); crng = raw_cpu_ptr(&crngs); /* * If our per-cpu crng is older than the base_crng, then it means * somebody reseeded the base_crng. In that case, we do fast key * erasure on the base_crng, and use its output as the new key * for our per-cpu crng. This brings us up to date with base_crng. */ if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) { spin_lock(&base_crng.lock); crng_fast_key_erasure(base_crng.key, chacha_state, crng->key, sizeof(crng->key)); crng->generation = base_crng.generation; spin_unlock(&base_crng.lock); } /* * Finally, when we've made it this far, our per-cpu crng has an up * to date key, and we can do fast key erasure with it to produce * some random data and a ChaCha state for the caller. All other * branches of this function are "unlikely", so most of the time we * should wind up here immediately. */ crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len); local_unlock_irqrestore(&crngs.lock, flags); } static void _get_random_bytes(void *buf, size_t len) { u32 chacha_state[CHACHA_STATE_WORDS]; u8 tmp[CHACHA_BLOCK_SIZE]; size_t first_block_len; if (!len) return; first_block_len = min_t(size_t, 32, len); crng_make_state(chacha_state, buf, first_block_len); len -= first_block_len; buf += first_block_len; while (len) { if (len < CHACHA_BLOCK_SIZE) { chacha20_block(chacha_state, tmp); memcpy(buf, tmp, len); memzero_explicit(tmp, sizeof(tmp)); break; } chacha20_block(chacha_state, buf); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; len -= CHACHA_BLOCK_SIZE; buf += CHACHA_BLOCK_SIZE; } memzero_explicit(chacha_state, sizeof(chacha_state)); } /* * This returns random bytes in arbitrary quantities. The quality of the * random bytes is good as /dev/urandom. In order to ensure that the * randomness provided by this function is okay, the function * wait_for_random_bytes() should be called and return 0 at least once * at any point prior. */ void get_random_bytes(void *buf, size_t len) { warn_unseeded_randomness(); _get_random_bytes(buf, len); } EXPORT_SYMBOL(get_random_bytes); static ssize_t get_random_bytes_user(struct iov_iter *iter) { u32 chacha_state[CHACHA_STATE_WORDS]; u8 block[CHACHA_BLOCK_SIZE]; size_t ret = 0, copied; if (unlikely(!iov_iter_count(iter))) return 0; /* * Immediately overwrite the ChaCha key at index 4 with random * bytes, in case userspace causes copy_to_iter() below to sleep * forever, so that we still retain forward secrecy in that case. */ crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE); /* * However, if we're doing a read of len <= 32, we don't need to * use chacha_state after, so we can simply return those bytes to * the user directly. */ if (iov_iter_count(iter) <= CHACHA_KEY_SIZE) { ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, iter); goto out_zero_chacha; } for (;;) { chacha20_block(chacha_state, block); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; copied = copy_to_iter(block, sizeof(block), iter); ret += copied; if (!iov_iter_count(iter) || copied != sizeof(block)) break; BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); if (ret % PAGE_SIZE == 0) { if (signal_pending(current)) break; cond_resched(); } } memzero_explicit(block, sizeof(block)); out_zero_chacha: memzero_explicit(chacha_state, sizeof(chacha_state)); return ret ? ret : -EFAULT; } /* * Batched entropy returns random integers. The quality of the random * number is good as /dev/urandom. In order to ensure that the randomness * provided by this function is okay, the function wait_for_random_bytes() * should be called and return 0 at least once at any point prior. */ #define DEFINE_BATCHED_ENTROPY(type) \ struct batch_ ##type { \ /* \ * We make this 1.5x a ChaCha block, so that we get the \ * remaining 32 bytes from fast key erasure, plus one full \ * block from the detached ChaCha state. We can increase \ * the size of this later if needed so long as we keep the \ * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. \ */ \ type entropy[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(type))]; \ local_lock_t lock; \ unsigned long generation; \ unsigned int position; \ }; \ \ static DEFINE_PER_CPU(struct batch_ ##type, batched_entropy_ ##type) = { \ .lock = INIT_LOCAL_LOCK(batched_entropy_ ##type.lock), \ .position = UINT_MAX \ }; \ \ type get_random_ ##type(void) \ { \ type ret; \ unsigned long flags; \ struct batch_ ##type *batch; \ unsigned long next_gen; \ \ warn_unseeded_randomness(); \ \ if (!crng_ready()) { \ _get_random_bytes(&ret, sizeof(ret)); \ return ret; \ } \ \ local_lock_irqsave(&batched_entropy_ ##type.lock, flags); \ batch = raw_cpu_ptr(&batched_entropy_##type); \ \ next_gen = READ_ONCE(base_crng.generation); \ if (batch->position >= ARRAY_SIZE(batch->entropy) || \ next_gen != batch->generation) { \ _get_random_bytes(batch->entropy, sizeof(batch->entropy)); \ batch->position = 0; \ batch->generation = next_gen; \ } \ \ ret = batch->entropy[batch->position]; \ batch->entropy[batch->position] = 0; \ ++batch->position; \ local_unlock_irqrestore(&batched_entropy_ ##type.lock, flags); \ return ret; \ } \ EXPORT_SYMBOL(get_random_ ##type); DEFINE_BATCHED_ENTROPY(u8) DEFINE_BATCHED_ENTROPY(u16) DEFINE_BATCHED_ENTROPY(u32) DEFINE_BATCHED_ENTROPY(u64) u32 __get_random_u32_below(u32 ceil) { /* * This is the slow path for variable ceil. It is still fast, most of * the time, by doing traditional reciprocal multiplication and * opportunistically comparing the lower half to ceil itself, before * falling back to computing a larger bound, and then rejecting samples * whose lower half would indicate a range indivisible by ceil. The use * of `-ceil % ceil` is analogous to `2^32 % ceil`, but is computable * in 32-bits. */ u32 rand = get_random_u32(); u64 mult; /* * This function is technically undefined for ceil == 0, and in fact * for the non-underscored constant version in the header, we build bug * on that. But for the non-constant case, it's convenient to have that * evaluate to being a straight call to get_random_u32(), so that * get_random_u32_inclusive() can work over its whole range without * undefined behavior. */ if (unlikely(!ceil)) return rand; mult = (u64)ceil * rand; if (unlikely((u32)mult < ceil)) { u32 bound = -ceil % ceil; while (unlikely((u32)mult < bound)) mult = (u64)ceil * get_random_u32(); } return mult >> 32; } EXPORT_SYMBOL(__get_random_u32_below); #ifdef CONFIG_SMP /* * This function is called when the CPU is coming up, with entry * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP. */ int __cold random_prepare_cpu(unsigned int cpu) { /* * When the cpu comes back online, immediately invalidate both * the per-cpu crng and all batches, so that we serve fresh * randomness. */ per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX; per_cpu_ptr(&batched_entropy_u8, cpu)->position = UINT_MAX; per_cpu_ptr(&batched_entropy_u16, cpu)->position = UINT_MAX; per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX; per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX; return 0; } #endif /********************************************************************** * * Entropy accumulation and extraction routines. * * Callers may add entropy via: * * static void mix_pool_bytes(const void *buf, size_t len) * * After which, if added entropy should be credited: * * static void credit_init_bits(size_t bits) * * Finally, extract entropy via: * * static void extract_entropy(void *buf, size_t len) * **********************************************************************/ enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_READY_BITS = POOL_BITS, /* When crng_init->CRNG_READY */ POOL_EARLY_BITS = POOL_READY_BITS / 2 /* When crng_init->CRNG_EARLY */ }; static struct { struct blake2s_state hash; spinlock_t lock; unsigned int init_bits; } input_pool = { .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 }, .hash.outlen = BLAKE2S_HASH_SIZE, .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), }; static void _mix_pool_bytes(const void *buf, size_t len) { blake2s_update(&input_pool.hash, buf, len); } /* * This function adds bytes into the input pool. It does not * update the initialization bit counter; the caller should call * credit_init_bits if this is appropriate. */ static void mix_pool_bytes(const void *buf, size_t len) { unsigned long flags; spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(buf, len); spin_unlock_irqrestore(&input_pool.lock, flags); } /* * This is an HKDF-like construction for using the hashed collected entropy * as a PRF key, that's then expanded block-by-block. */ static void extract_entropy(void *buf, size_t len) { unsigned long flags; u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; struct { unsigned long rdseed[32 / sizeof(long)]; size_t counter; } block; size_t i, longs; for (i = 0; i < ARRAY_SIZE(block.rdseed);) { longs = arch_get_random_seed_longs(&block.rdseed[i], ARRAY_SIZE(block.rdseed) - i); if (longs) { i += longs; continue; } longs = arch_get_random_longs(&block.rdseed[i], ARRAY_SIZE(block.rdseed) - i); if (longs) { i += longs; continue; } block.rdseed[i++] = random_get_entropy(); } spin_lock_irqsave(&input_pool.lock, flags); /* seed = HASHPRF(last_key, entropy_input) */ blake2s_final(&input_pool.hash, seed); /* next_key = HASHPRF(seed, RDSEED || 0) */ block.counter = 0; blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); spin_unlock_irqrestore(&input_pool.lock, flags); memzero_explicit(next_key, sizeof(next_key)); while (len) { i = min_t(size_t, len, BLAKE2S_HASH_SIZE); /* output = HASHPRF(seed, RDSEED || ++counter) */ ++block.counter; blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); len -= i; buf += i; } memzero_explicit(seed, sizeof(seed)); memzero_explicit(&block, sizeof(block)); } #define credit_init_bits(bits) if (!crng_ready()) _credit_init_bits(bits) static void __cold _credit_init_bits(size_t bits) { static DECLARE_WORK(set_ready, crng_set_ready); unsigned int new, orig, add; unsigned long flags; if (!bits) return; add = min_t(size_t, bits, POOL_BITS); orig = READ_ONCE(input_pool.init_bits); do { new = min_t(unsigned int, POOL_BITS, orig + add); } while (!try_cmpxchg(&input_pool.init_bits, &orig, new)); if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) { crng_reseed(NULL); /* Sets crng_init to CRNG_READY under base_crng.lock. */ if (static_key_initialized && system_unbound_wq) queue_work(system_unbound_wq, &set_ready); atomic_notifier_call_chain(&random_ready_notifier, 0, NULL); #ifdef CONFIG_VDSO_GETRANDOM WRITE_ONCE(_vdso_rng_data.is_ready, true); #endif wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); pr_notice("crng init done\n"); if (urandom_warning.missed) pr_notice("%d urandom warning(s) missed due to ratelimiting\n", urandom_warning.missed); } else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) { spin_lock_irqsave(&base_crng.lock, flags); /* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */ if (crng_init == CRNG_EMPTY) { extract_entropy(base_crng.key, sizeof(base_crng.key)); crng_init = CRNG_EARLY; } spin_unlock_irqrestore(&base_crng.lock, flags); } } /********************************************************************** * * Entropy collection routines. * * The following exported functions are used for pushing entropy into * the above entropy accumulation routines: * * void add_device_randomness(const void *buf, size_t len); * void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after); * void add_bootloader_randomness(const void *buf, size_t len); * void add_vmfork_randomness(const void *unique_vm_id, size_t len); * void add_interrupt_randomness(int irq); * void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); * void add_disk_randomness(struct gendisk *disk); * * add_device_randomness() adds data to the input pool that * is likely to differ between two devices (or possibly even per boot). * This would be things like MAC addresses or serial numbers, or the * read-out of the RTC. This does *not* credit any actual entropy to * the pool, but it initializes the pool to different values for devices * that might otherwise be identical and have very little entropy * available to them (particularly common in the embedded world). * * add_hwgenerator_randomness() is for true hardware RNGs, and will credit * entropy as specified by the caller. If the entropy pool is full it will * block until more entropy is needed. * * add_bootloader_randomness() is called by bootloader drivers, such as EFI * and device tree, and credits its input depending on whether or not the * command line option 'random.trust_bootloader'. * * add_vmfork_randomness() adds a unique (but not necessarily secret) ID * representing the current instance of a VM to the pool, without crediting, * and then force-reseeds the crng so that it takes effect immediately. * * add_interrupt_randomness() uses the interrupt timing as random * inputs to the entropy pool. Using the cycle counters and the irq source * as inputs, it feeds the input pool roughly once a second or after 64 * interrupts, crediting 1 bit of entropy for whichever comes first. * * add_input_randomness() uses the input layer interrupt timing, as well * as the event type information from the hardware. * * add_disk_randomness() uses what amounts to the seek time of block * layer request events, on a per-disk_devt basis, as input to the * entropy pool. Note that high-speed solid state drives with very low * seek times do not make for good sources of entropy, as their seek * times are usually fairly consistent. * * The last two routines try to estimate how many bits of entropy * to credit. They do this by keeping track of the first and second * order deltas of the event timings. * **********************************************************************/ static bool trust_cpu __initdata = true; static bool trust_bootloader __initdata = true; static int __init parse_trust_cpu(char *arg) { return kstrtobool(arg, &trust_cpu); } static int __init parse_trust_bootloader(char *arg) { return kstrtobool(arg, &trust_bootloader); } early_param("random.trust_cpu", parse_trust_cpu); early_param("random.trust_bootloader", parse_trust_bootloader); static int random_pm_notification(struct notifier_block *nb, unsigned long action, void *data) { unsigned long flags, entropy = random_get_entropy(); /* * Encode a representation of how long the system has been suspended, * in a way that is distinct from prior system suspends. */ ktime_t stamps[] = { ktime_get(), ktime_get_boottime(), ktime_get_real() }; spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&action, sizeof(action)); _mix_pool_bytes(stamps, sizeof(stamps)); _mix_pool_bytes(&entropy, sizeof(entropy)); spin_unlock_irqrestore(&input_pool.lock, flags); if (crng_ready() && (action == PM_RESTORE_PREPARE || (action == PM_POST_SUSPEND && !IS_ENABLED(CONFIG_PM_AUTOSLEEP) && !IS_ENABLED(CONFIG_PM_USERSPACE_AUTOSLEEP)))) { crng_reseed(NULL); pr_notice("crng reseeded on system resumption\n"); } return 0; } static struct notifier_block pm_notifier = { .notifier_call = random_pm_notification }; /* * This is called extremely early, before time keeping functionality is * available, but arch randomness is. Interrupts are not yet enabled. */ void __init random_init_early(const char *command_line) { unsigned long entropy[BLAKE2S_BLOCK_SIZE / sizeof(long)]; size_t i, longs, arch_bits; #if defined(LATENT_ENTROPY_PLUGIN) static const u8 compiletime_seed[BLAKE2S_BLOCK_SIZE] __initconst __latent_entropy; _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed)); #endif for (i = 0, arch_bits = sizeof(entropy) * 8; i < ARRAY_SIZE(entropy);) { longs = arch_get_random_seed_longs(entropy, ARRAY_SIZE(entropy) - i); if (longs) { _mix_pool_bytes(entropy, sizeof(*entropy) * longs); i += longs; continue; } longs = arch_get_random_longs(entropy, ARRAY_SIZE(entropy) - i); if (longs) { _mix_pool_bytes(entropy, sizeof(*entropy) * longs); i += longs; continue; } arch_bits -= sizeof(*entropy) * 8; ++i; } _mix_pool_bytes(init_utsname(), sizeof(*(init_utsname()))); _mix_pool_bytes(command_line, strlen(command_line)); /* Reseed if already seeded by earlier phases. */ if (crng_ready()) crng_reseed(NULL); else if (trust_cpu) _credit_init_bits(arch_bits); } /* * This is called a little bit after the prior function, and now there is * access to timestamps counters. Interrupts are not yet enabled. */ void __init random_init(void) { unsigned long entropy = random_get_entropy(); ktime_t now = ktime_get_real(); _mix_pool_bytes(&now, sizeof(now)); _mix_pool_bytes(&entropy, sizeof(entropy)); add_latent_entropy(); /* * If we were initialized by the cpu or bootloader before jump labels * or workqueues are initialized, then we should enable the static * branch here, where it's guaranteed that these have been initialized. */ if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY) crng_set_ready(NULL); /* Reseed if already seeded by earlier phases. */ if (crng_ready()) crng_reseed(NULL); WARN_ON(register_pm_notifier(&pm_notifier)); WARN(!entropy, "Missing cycle counter and fallback timer; RNG " "entropy collection will consequently suffer."); } /* * Add device- or boot-specific data to the input pool to help * initialize it. * * None of this adds any entropy; it is meant to avoid the problem of * the entropy pool having similar initial state across largely * identical devices. */ void add_device_randomness(const void *buf, size_t len) { unsigned long entropy = random_get_entropy(); unsigned long flags; spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&entropy, sizeof(entropy)); _mix_pool_bytes(buf, len); spin_unlock_irqrestore(&input_pool.lock, flags); } EXPORT_SYMBOL(add_device_randomness); /* * Interface for in-kernel drivers of true hardware RNGs. Those devices * may produce endless random bits, so this function will sleep for * some amount of time after, if the sleep_after parameter is true. */ void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after) { mix_pool_bytes(buf, len); credit_init_bits(entropy); /* * Throttle writing to once every reseed interval, unless we're not yet * initialized or no entropy is credited. */ if (sleep_after && !kthread_should_stop() && (crng_ready() || !entropy)) schedule_timeout_interruptible(crng_reseed_interval()); } EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); /* * Handle random seed passed by bootloader, and credit it depending * on the command line option 'random.trust_bootloader'. */ void __init add_bootloader_randomness(const void *buf, size_t len) { mix_pool_bytes(buf, len); if (trust_bootloader) credit_init_bits(len * 8); } #if IS_ENABLED(CONFIG_VMGENID) static BLOCKING_NOTIFIER_HEAD(vmfork_chain); /* * Handle a new unique VM ID, which is unique, not secret, so we * don't credit it, but we do immediately force a reseed after so * that it's used by the crng posthaste. */ void __cold add_vmfork_randomness(const void *unique_vm_id, size_t len) { add_device_randomness(unique_vm_id, len); if (crng_ready()) { crng_reseed(NULL); pr_notice("crng reseeded due to virtual machine fork\n"); } blocking_notifier_call_chain(&vmfork_chain, 0, NULL); } #if IS_MODULE(CONFIG_VMGENID) EXPORT_SYMBOL_GPL(add_vmfork_randomness); #endif int __cold register_random_vmfork_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&vmfork_chain, nb); } EXPORT_SYMBOL_GPL(register_random_vmfork_notifier); int __cold unregister_random_vmfork_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&vmfork_chain, nb); } EXPORT_SYMBOL_GPL(unregister_random_vmfork_notifier); #endif struct fast_pool { unsigned long pool[4]; unsigned long last; unsigned int count; struct timer_list mix; }; static void mix_interrupt_randomness(struct timer_list *work); static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { #ifdef CONFIG_64BIT #define FASTMIX_PERM SIPHASH_PERMUTATION .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 }, #else #define FASTMIX_PERM HSIPHASH_PERMUTATION .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 }, #endif .mix = __TIMER_INITIALIZER(mix_interrupt_randomness, 0) }; /* * This is [Half]SipHash-1-x, starting from an empty key. Because * the key is fixed, it assumes that its inputs are non-malicious, * and therefore this has no security on its own. s represents the * four-word SipHash state, while v represents a two-word input. */ static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2) { s[3] ^= v1; FASTMIX_PERM(s[0], s[1], s[2], s[3]); s[0] ^= v1; s[3] ^= v2; FASTMIX_PERM(s[0], s[1], s[2], s[3]); s[0] ^= v2; } #ifdef CONFIG_SMP /* * This function is called when the CPU has just come online, with * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE. */ int __cold random_online_cpu(unsigned int cpu) { /* * During CPU shutdown and before CPU onlining, add_interrupt_ * randomness() may schedule mix_interrupt_randomness(), and * set the MIX_INFLIGHT flag. However, because the worker can * be scheduled on a different CPU during this period, that * flag will never be cleared. For that reason, we zero out * the flag here, which runs just after workqueues are onlined * for the CPU again. This also has the effect of setting the * irq randomness count to zero so that new accumulated irqs * are fresh. */ per_cpu_ptr(&irq_randomness, cpu)->count = 0; return 0; } #endif static void mix_interrupt_randomness(struct timer_list *work) { struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix); /* * The size of the copied stack pool is explicitly 2 longs so that we * only ever ingest half of the siphash output each time, retaining * the other half as the next "key" that carries over. The entropy is * supposed to be sufficiently dispersed between bits so on average * we don't wind up "losing" some. */ unsigned long pool[2]; unsigned int count; /* Check to see if we're running on the wrong CPU due to hotplug. */ local_irq_disable(); if (fast_pool != this_cpu_ptr(&irq_randomness)) { local_irq_enable(); return; } /* * Copy the pool to the stack so that the mixer always has a * consistent view, before we reenable irqs again. */ memcpy(pool, fast_pool->pool, sizeof(pool)); count = fast_pool->count; fast_pool->count = 0; fast_pool->last = jiffies; local_irq_enable(); mix_pool_bytes(pool, sizeof(pool)); credit_init_bits(clamp_t(unsigned int, (count & U16_MAX) / 64, 1, sizeof(pool) * 8)); memzero_explicit(pool, sizeof(pool)); } void add_interrupt_randomness(int irq) { enum { MIX_INFLIGHT = 1U << 31 }; unsigned long entropy = random_get_entropy(); struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned int new_count; fast_mix(fast_pool->pool, entropy, (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq)); new_count = ++fast_pool->count; if (new_count & MIX_INFLIGHT) return; if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ)) return; fast_pool->count |= MIX_INFLIGHT; if (!timer_pending(&fast_pool->mix)) { fast_pool->mix.expires = jiffies; add_timer_on(&fast_pool->mix, raw_smp_processor_id()); } } EXPORT_SYMBOL_GPL(add_interrupt_randomness); /* There is one of these per entropy source */ struct timer_rand_state { unsigned long last_time; long last_delta, last_delta2; }; /* * This function adds entropy to the entropy "pool" by using timing * delays. It uses the timer_rand_state structure to make an estimate * of how many bits of entropy this call has added to the pool. The * value "num" is also added to the pool; it should somehow describe * the type of event that just happened. */ static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { unsigned long entropy = random_get_entropy(), now = jiffies, flags; long delta, delta2, delta3; unsigned int bits; /* * If we're in a hard IRQ, add_interrupt_randomness() will be called * sometime after, so mix into the fast pool. */ if (in_hardirq()) { fast_mix(this_cpu_ptr(&irq_randomness)->pool, entropy, num); } else { spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&entropy, sizeof(entropy)); _mix_pool_bytes(&num, sizeof(num)); spin_unlock_irqrestore(&input_pool.lock, flags); } if (crng_ready()) return; /* * Calculate number of bits of randomness we probably added. * We take into account the first, second and third-order deltas * in order to make our estimate. */ delta = now - READ_ONCE(state->last_time); WRITE_ONCE(state->last_time, now); delta2 = delta - READ_ONCE(state->last_delta); WRITE_ONCE(state->last_delta, delta); delta3 = delta2 - READ_ONCE(state->last_delta2); WRITE_ONCE(state->last_delta2, delta2); if (delta < 0) delta = -delta; if (delta2 < 0) delta2 = -delta2; if (delta3 < 0) delta3 = -delta3; if (delta > delta2) delta = delta2; if (delta > delta3) delta = delta3; /* * delta is now minimum absolute delta. Round down by 1 bit * on general principles, and limit entropy estimate to 11 bits. */ bits = min(fls(delta >> 1), 11); /* * As mentioned above, if we're in a hard IRQ, add_interrupt_randomness() * will run after this, which uses a different crediting scheme of 1 bit * per every 64 interrupts. In order to let that function do accounting * close to the one in this function, we credit a full 64/64 bit per bit, * and then subtract one to account for the extra one added. */ if (in_hardirq()) this_cpu_ptr(&irq_randomness)->count += max(1u, bits * 64) - 1; else _credit_init_bits(bits); } void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) { static unsigned char last_value; static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; /* Ignore autorepeat and the like. */ if (value == last_value) return; last_value = value; add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); } EXPORT_SYMBOL_GPL(add_input_randomness); #ifdef CONFIG_BLOCK void add_disk_randomness(struct gendisk *disk) { if (!disk || !disk->random) return; /* First major is 1, so we get >= 0x200 here. */ add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); } EXPORT_SYMBOL_GPL(add_disk_randomness); void __cold rand_initialize_disk(struct gendisk *disk) { struct timer_rand_state *state; /* * If kzalloc returns null, we just won't use that entropy * source. */ state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); if (state) { state->last_time = INITIAL_JIFFIES; disk->random = state; } } #endif struct entropy_timer_state { unsigned long entropy; struct timer_list timer; atomic_t samples; unsigned int samples_per_bit; }; /* * Each time the timer fires, we expect that we got an unpredictable jump in * the cycle counter. Even if the timer is running on another CPU, the timer * activity will be touching the stack of the CPU that is generating entropy. * * Note that we don't re-arm the timer in the timer itself - we are happy to be * scheduled away, since that just makes the load more complex, but we do not * want the timer to keep ticking unless the entropy loop is running. * * So the re-arming always happens in the entropy loop itself. */ static void __cold entropy_timer(struct timer_list *timer) { struct entropy_timer_state *state = container_of(timer, struct entropy_timer_state, timer); unsigned long entropy = random_get_entropy(); mix_pool_bytes(&entropy, sizeof(entropy)); if (atomic_inc_return(&state->samples) % state->samples_per_bit == 0) credit_init_bits(1); } /* * If we have an actual cycle counter, see if we can generate enough entropy * with timing noise. */ static void __cold try_to_generate_entropy(void) { enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = HZ / 15 }; u8 stack_bytes[sizeof(struct entropy_timer_state) + SMP_CACHE_BYTES - 1]; struct entropy_timer_state *stack = PTR_ALIGN((void *)stack_bytes, SMP_CACHE_BYTES); unsigned int i, num_different = 0; unsigned long last = random_get_entropy(); int cpu = -1; for (i = 0; i < NUM_TRIAL_SAMPLES - 1; ++i) { stack->entropy = random_get_entropy(); if (stack->entropy != last) ++num_different; last = stack->entropy; } stack->samples_per_bit = DIV_ROUND_UP(NUM_TRIAL_SAMPLES, num_different + 1); if (stack->samples_per_bit > MAX_SAMPLES_PER_BIT) return; atomic_set(&stack->samples, 0); timer_setup_on_stack(&stack->timer, entropy_timer, 0); while (!crng_ready() && !signal_pending(current)) { /* * Check !timer_pending() and then ensure that any previous callback has finished * executing by checking try_to_del_timer_sync(), before queueing the next one. */ if (!timer_pending(&stack->timer) && try_to_del_timer_sync(&stack->timer) >= 0) { struct cpumask timer_cpus; unsigned int num_cpus; /* * Preemption must be disabled here, both to read the current CPU number * and to avoid scheduling a timer on a dead CPU. */ preempt_disable(); /* Only schedule callbacks on timer CPUs that are online. */ cpumask_and(&timer_cpus, housekeeping_cpumask(HK_TYPE_TIMER), cpu_online_mask); num_cpus = cpumask_weight(&timer_cpus); /* In very bizarre case of misconfiguration, fallback to all online. */ if (unlikely(num_cpus == 0)) { timer_cpus = *cpu_online_mask; num_cpus = cpumask_weight(&timer_cpus); } /* Basic CPU round-robin, which avoids the current CPU. */ do { cpu = cpumask_next(cpu, &timer_cpus); if (cpu >= nr_cpu_ids) cpu = cpumask_first(&timer_cpus); } while (cpu == smp_processor_id() && num_cpus > 1); /* Expiring the timer at `jiffies` means it's the next tick. */ stack->timer.expires = jiffies; add_timer_on(&stack->timer, cpu); preempt_enable(); } mix_pool_bytes(&stack->entropy, sizeof(stack->entropy)); schedule(); stack->entropy = random_get_entropy(); } mix_pool_bytes(&stack->entropy, sizeof(stack->entropy)); del_timer_sync(&stack->timer); destroy_timer_on_stack(&stack->timer); } /********************************************************************** * * Userspace reader/writer interfaces. * * getrandom(2) is the primary modern interface into the RNG and should * be used in preference to anything else. * * Reading from /dev/random has the same functionality as calling * getrandom(2) with flags=0. In earlier versions, however, it had * vastly different semantics and should therefore be avoided, to * prevent backwards compatibility issues. * * Reading from /dev/urandom has the same functionality as calling * getrandom(2) with flags=GRND_INSECURE. Because it does not block * waiting for the RNG to be ready, it should not be used. * * Writing to either /dev/random or /dev/urandom adds entropy to * the input pool but does not credit it. * * Polling on /dev/random indicates when the RNG is initialized, on * the read side, and when it wants new entropy, on the write side. * * Both /dev/random and /dev/urandom have the same set of ioctls for * adding entropy, getting the entropy count, zeroing the count, and * reseeding the crng. * **********************************************************************/ SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags) { struct iov_iter iter; int ret; if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) return -EINVAL; /* * Requesting insecure and blocking randomness at the same time makes * no sense. */ if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) return -EINVAL; if (!crng_ready() && !(flags & GRND_INSECURE)) { if (flags & GRND_NONBLOCK) return -EAGAIN; ret = wait_for_random_bytes(); if (unlikely(ret)) return ret; } ret = import_ubuf(ITER_DEST, ubuf, len, &iter); if (unlikely(ret)) return ret; return get_random_bytes_user(&iter); } static __poll_t random_poll(struct file *file, poll_table *wait) { poll_wait(file, &crng_init_wait, wait); return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM; } static ssize_t write_pool_user(struct iov_iter *iter) { u8 block[BLAKE2S_BLOCK_SIZE]; ssize_t ret = 0; size_t copied; if (unlikely(!iov_iter_count(iter))) return 0; for (;;) { copied = copy_from_iter(block, sizeof(block), iter); ret += copied; mix_pool_bytes(block, copied); if (!iov_iter_count(iter) || copied != sizeof(block)) break; BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); if (ret % PAGE_SIZE == 0) { if (signal_pending(current)) break; cond_resched(); } } memzero_explicit(block, sizeof(block)); return ret ? ret : -EFAULT; } static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *iter) { return write_pool_user(iter); } static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter) { static int maxwarn = 10; /* * Opportunistically attempt to initialize the RNG on platforms that * have fast cycle counters, but don't (for now) require it to succeed. */ if (!crng_ready()) try_to_generate_entropy(); if (!crng_ready()) { if (!ratelimit_disable && maxwarn <= 0) ++urandom_warning.missed; else if (ratelimit_disable || __ratelimit(&urandom_warning)) { --maxwarn; pr_notice("%s: uninitialized urandom read (%zu bytes read)\n", current->comm, iov_iter_count(iter)); } } return get_random_bytes_user(iter); } static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *iter) { int ret; if (!crng_ready() && ((kiocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) || (kiocb->ki_filp->f_flags & O_NONBLOCK))) return -EAGAIN; ret = wait_for_random_bytes(); if (ret != 0) return ret; return get_random_bytes_user(iter); } static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { int __user *p = (int __user *)arg; int ent_count; switch (cmd) { case RNDGETENTCNT: /* Inherently racy, no point locking. */ if (put_user(input_pool.init_bits, p)) return -EFAULT; return 0; case RNDADDTOENTCNT: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ent_count, p)) return -EFAULT; if (ent_count < 0) return -EINVAL; credit_init_bits(ent_count); return 0; case RNDADDENTROPY: { struct iov_iter iter; ssize_t ret; int len; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ent_count, p++)) return -EFAULT; if (ent_count < 0) return -EINVAL; if (get_user(len, p++)) return -EFAULT; ret = import_ubuf(ITER_SOURCE, p, len, &iter); if (unlikely(ret)) return ret; ret = write_pool_user(&iter); if (unlikely(ret < 0)) return ret; /* Since we're crediting, enforce that it was all written into the pool. */ if (unlikely(ret != len)) return -EFAULT; credit_init_bits(ent_count); return 0; } case RNDZAPENTCNT: case RNDCLEARPOOL: /* No longer has any effect. */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; return 0; case RNDRESEEDCRNG: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!crng_ready()) return -ENODATA; crng_reseed(NULL); return 0; default: return -EINVAL; } } static int random_fasync(int fd, struct file *filp, int on) { return fasync_helper(fd, filp, on, &fasync); } const struct file_operations random_fops = { .read_iter = random_read_iter, .write_iter = random_write_iter, .poll = random_poll, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, }; const struct file_operations urandom_fops = { .read_iter = urandom_read_iter, .write_iter = random_write_iter, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, }; /******************************************************************** * * Sysctl interface. * * These are partly unused legacy knobs with dummy values to not break * userspace and partly still useful things. They are usually accessible * in /proc/sys/kernel/random/ and are as follows: * * - boot_id - a UUID representing the current boot. * * - uuid - a random UUID, different each time the file is read. * * - poolsize - the number of bits of entropy that the input pool can * hold, tied to the POOL_BITS constant. * * - entropy_avail - the number of bits of entropy currently in the * input pool. Always <= poolsize. * * - write_wakeup_threshold - the amount of entropy in the input pool * below which write polls to /dev/random will unblock, requesting * more entropy, tied to the POOL_READY_BITS constant. It is writable * to avoid breaking old userspaces, but writing to it does not * change any behavior of the RNG. * * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL. * It is writable to avoid breaking old userspaces, but writing * to it does not change any behavior of the RNG. * ********************************************************************/ #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ; static int sysctl_random_write_wakeup_bits = POOL_READY_BITS; static int sysctl_poolsize = POOL_BITS; static u8 sysctl_bootid[UUID_SIZE]; /* * This function is used to return both the bootid UUID, and random * UUID. The difference is in whether table->data is NULL; if it is, * then a new UUID is generated and returned to the user. */ static int proc_do_uuid(const struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos) { u8 tmp_uuid[UUID_SIZE], *uuid; char uuid_string[UUID_STRING_LEN + 1]; struct ctl_table fake_table = { .data = uuid_string, .maxlen = UUID_STRING_LEN }; if (write) return -EPERM; uuid = table->data; if (!uuid) { uuid = tmp_uuid; generate_random_uuid(uuid); } else { static DEFINE_SPINLOCK(bootid_spinlock); spin_lock(&bootid_spinlock); if (!uuid[8]) generate_random_uuid(uuid); spin_unlock(&bootid_spinlock); } snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid); return proc_dostring(&fake_table, 0, buf, lenp, ppos); } /* The same as proc_dointvec, but writes don't change anything. */ static int proc_do_rointvec(const struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos) { return write ? 0 : proc_dointvec(table, 0, buf, lenp, ppos); } static struct ctl_table random_table[] = { { .procname = "poolsize", .data = &sysctl_poolsize, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, }, { .procname = "entropy_avail", .data = &input_pool.init_bits, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, }, { .procname = "write_wakeup_threshold", .data = &sysctl_random_write_wakeup_bits, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_rointvec, }, { .procname = "urandom_min_reseed_secs", .data = &sysctl_random_min_urandom_seed, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_rointvec, }, { .procname = "boot_id", .data = &sysctl_bootid, .mode = 0444, .proc_handler = proc_do_uuid, }, { .procname = "uuid", .mode = 0444, .proc_handler = proc_do_uuid, }, }; /* * random_init() is called before sysctl_init(), * so we cannot call register_sysctl_init() in random_init() */ static int __init random_sysctls_init(void) { register_sysctl_init("kernel/random", random_table); return 0; } device_initcall(random_sysctls_init); #endif
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 // SPDX-License-Identifier: GPL-2.0+ /* Framework for finding and configuring PHYs. * Also contains generic PHY driver * * Author: Andy Fleming * * Copyright (c) 2004 Freescale Semiconductor, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/acpi.h> #include <linux/bitmap.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/io.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mdio.h> #include <linux/mii.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/of.h> #include <linux/netdevice.h> #include <linux/phy.h> #include <linux/phylib_stubs.h> #include <linux/phy_led_triggers.h> #include <linux/pse-pd/pse.h> #include <linux/property.h> #include <linux/rtnetlink.h> #include <linux/sfp.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/uaccess.h> #include <linux/unistd.h> MODULE_DESCRIPTION("PHY library"); MODULE_AUTHOR("Andy Fleming"); MODULE_LICENSE("GPL"); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_basic_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_basic_t1_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1s_p2mp_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_basic_t1s_p2mp_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_gbit_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_gbit_fibre_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_gbit_all_ports_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_10gbit_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_10gbit_fec_features); const int phy_basic_ports_array[3] = { ETHTOOL_LINK_MODE_Autoneg_BIT, ETHTOOL_LINK_MODE_TP_BIT, ETHTOOL_LINK_MODE_MII_BIT, }; EXPORT_SYMBOL_GPL(phy_basic_ports_array); const int phy_fibre_port_array[1] = { ETHTOOL_LINK_MODE_FIBRE_BIT, }; EXPORT_SYMBOL_GPL(phy_fibre_port_array); const int phy_all_ports_features_array[7] = { ETHTOOL_LINK_MODE_Autoneg_BIT, ETHTOOL_LINK_MODE_TP_BIT, ETHTOOL_LINK_MODE_MII_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT, ETHTOOL_LINK_MODE_AUI_BIT, ETHTOOL_LINK_MODE_BNC_BIT, ETHTOOL_LINK_MODE_Backplane_BIT, }; EXPORT_SYMBOL_GPL(phy_all_ports_features_array); const int phy_10_100_features_array[4] = { ETHTOOL_LINK_MODE_10baseT_Half_BIT, ETHTOOL_LINK_MODE_10baseT_Full_BIT, ETHTOOL_LINK_MODE_100baseT_Half_BIT, ETHTOOL_LINK_MODE_100baseT_Full_BIT, }; EXPORT_SYMBOL_GPL(phy_10_100_features_array); const int phy_basic_t1_features_array[3] = { ETHTOOL_LINK_MODE_TP_BIT, ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, ETHTOOL_LINK_MODE_100baseT1_Full_BIT, }; EXPORT_SYMBOL_GPL(phy_basic_t1_features_array); const int phy_basic_t1s_p2mp_features_array[2] = { ETHTOOL_LINK_MODE_TP_BIT, ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT, }; EXPORT_SYMBOL_GPL(phy_basic_t1s_p2mp_features_array); const int phy_gbit_features_array[2] = { ETHTOOL_LINK_MODE_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT, }; EXPORT_SYMBOL_GPL(phy_gbit_features_array); const int phy_10gbit_features_array[1] = { ETHTOOL_LINK_MODE_10000baseT_Full_BIT, }; EXPORT_SYMBOL_GPL(phy_10gbit_features_array); static const int phy_10gbit_fec_features_array[1] = { ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, }; __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_10gbit_full_features); static const int phy_10gbit_full_features_array[] = { ETHTOOL_LINK_MODE_10baseT_Full_BIT, ETHTOOL_LINK_MODE_100baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_10000baseT_Full_BIT, }; static const int phy_eee_cap1_features_array[] = { ETHTOOL_LINK_MODE_100baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_10000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, }; __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_eee_cap1_features); static const int phy_eee_cap2_features_array[] = { ETHTOOL_LINK_MODE_2500baseT_Full_BIT, ETHTOOL_LINK_MODE_5000baseT_Full_BIT, }; __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_eee_cap2_features); static void features_init(void) { /* 10/100 half/full*/ linkmode_set_bit_array(phy_basic_ports_array, ARRAY_SIZE(phy_basic_ports_array), phy_basic_features); linkmode_set_bit_array(phy_10_100_features_array, ARRAY_SIZE(phy_10_100_features_array), phy_basic_features); /* 100 full, TP */ linkmode_set_bit_array(phy_basic_t1_features_array, ARRAY_SIZE(phy_basic_t1_features_array), phy_basic_t1_features); /* 10 half, P2MP, TP */ linkmode_set_bit_array(phy_basic_t1s_p2mp_features_array, ARRAY_SIZE(phy_basic_t1s_p2mp_features_array), phy_basic_t1s_p2mp_features); /* 10/100 half/full + 1000 half/full */ linkmode_set_bit_array(phy_basic_ports_array, ARRAY_SIZE(phy_basic_ports_array), phy_gbit_features); linkmode_set_bit_array(phy_10_100_features_array, ARRAY_SIZE(phy_10_100_features_array), phy_gbit_features); linkmode_set_bit_array(phy_gbit_features_array, ARRAY_SIZE(phy_gbit_features_array), phy_gbit_features); /* 10/100 half/full + 1000 half/full + fibre*/ linkmode_set_bit_array(phy_basic_ports_array, ARRAY_SIZE(phy_basic_ports_array), phy_gbit_fibre_features); linkmode_set_bit_array(phy_10_100_features_array, ARRAY_SIZE(phy_10_100_features_array), phy_gbit_fibre_features); linkmode_set_bit_array(phy_gbit_features_array, ARRAY_SIZE(phy_gbit_features_array), phy_gbit_fibre_features); linkmode_set_bit_array(phy_fibre_port_array, ARRAY_SIZE(phy_fibre_port_array), phy_gbit_fibre_features); /* 10/100 half/full + 1000 half/full + TP/MII/FIBRE/AUI/BNC/Backplane*/ linkmode_set_bit_array(phy_all_ports_features_array, ARRAY_SIZE(phy_all_ports_features_array), phy_gbit_all_ports_features); linkmode_set_bit_array(phy_10_100_features_array, ARRAY_SIZE(phy_10_100_features_array), phy_gbit_all_ports_features); linkmode_set_bit_array(phy_gbit_features_array, ARRAY_SIZE(phy_gbit_features_array), phy_gbit_all_ports_features); /* 10/100 half/full + 1000 half/full + 10G full*/ linkmode_set_bit_array(phy_all_ports_features_array, ARRAY_SIZE(phy_all_ports_features_array), phy_10gbit_features); linkmode_set_bit_array(phy_10_100_features_array, ARRAY_SIZE(phy_10_100_features_array), phy_10gbit_features); linkmode_set_bit_array(phy_gbit_features_array, ARRAY_SIZE(phy_gbit_features_array), phy_10gbit_features); linkmode_set_bit_array(phy_10gbit_features_array, ARRAY_SIZE(phy_10gbit_features_array), phy_10gbit_features); /* 10/100/1000/10G full */ linkmode_set_bit_array(phy_all_ports_features_array, ARRAY_SIZE(phy_all_ports_features_array), phy_10gbit_full_features); linkmode_set_bit_array(phy_10gbit_full_features_array, ARRAY_SIZE(phy_10gbit_full_features_array), phy_10gbit_full_features); /* 10G FEC only */ linkmode_set_bit_array(phy_10gbit_fec_features_array, ARRAY_SIZE(phy_10gbit_fec_features_array), phy_10gbit_fec_features); linkmode_set_bit_array(phy_eee_cap1_features_array, ARRAY_SIZE(phy_eee_cap1_features_array), phy_eee_cap1_features); linkmode_set_bit_array(phy_eee_cap2_features_array, ARRAY_SIZE(phy_eee_cap2_features_array), phy_eee_cap2_features); } void phy_device_free(struct phy_device *phydev) { put_device(&phydev->mdio.dev); } EXPORT_SYMBOL(phy_device_free); static void phy_mdio_device_free(struct mdio_device *mdiodev) { struct phy_device *phydev; phydev = container_of(mdiodev, struct phy_device, mdio); phy_device_free(phydev); } static void phy_device_release(struct device *dev) { fwnode_handle_put(dev->fwnode); kfree(to_phy_device(dev)); } static void phy_mdio_device_remove(struct mdio_device *mdiodev) { struct phy_device *phydev; phydev = container_of(mdiodev, struct phy_device, mdio); phy_device_remove(phydev); } static struct phy_driver genphy_driver; static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { struct device_driver *drv = phydev->mdio.dev.driver; struct phy_driver *phydrv = to_phy_driver(drv); struct net_device *netdev = phydev->attached_dev; if (!drv || !phydrv->suspend) return false; /* PHY not attached? May suspend if the PHY has not already been * suspended as part of a prior call to phy_disconnect() -> * phy_detach() -> phy_suspend() because the parent netdev might be the * MDIO bus driver and clock gated at this point. */ if (!netdev) goto out; if (netdev->ethtool->wol_enabled) return false; /* As long as not all affected network drivers support the * wol_enabled flag, let's check for hints that WoL is enabled. * Don't suspend PHY if the attached netdev parent may wake up. * The parent may point to a PCI device, as in tg3 driver. */ if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent)) return false; /* Also don't suspend PHY if the netdev itself may wakeup. This * is the case for devices w/o underlaying pwr. mgmt. aware bus, * e.g. SoC devices. */ if (device_may_wakeup(&netdev->dev)) return false; out: return !phydev->suspended; } static __maybe_unused int mdio_bus_phy_suspend(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); if (phydev->mac_managed_pm) return 0; /* Wakeup interrupts may occur during the system sleep transition when * the PHY is inaccessible. Set flag to postpone handling until the PHY * has resumed. Wait for concurrent interrupt handler to complete. */ if (phy_interrupt_is_valid(phydev)) { phydev->irq_suspended = 1; synchronize_irq(phydev->irq); } /* We must stop the state machine manually, otherwise it stops out of * control, possibly with the phydev->lock held. Upon resume, netdev * may call phy routines that try to grab the same lock, and that may * lead to a deadlock. */ if (phydev->attached_dev && phydev->adjust_link) phy_stop_machine(phydev); if (!mdio_bus_phy_may_suspend(phydev)) return 0; phydev->suspended_by_mdio_bus = 1; return phy_suspend(phydev); } static __maybe_unused int mdio_bus_phy_resume(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); int ret; if (phydev->mac_managed_pm) return 0; if (!phydev->suspended_by_mdio_bus) goto no_resume; phydev->suspended_by_mdio_bus = 0; /* If we managed to get here with the PHY state machine in a state * neither PHY_HALTED, PHY_READY nor PHY_UP, this is an indication * that something went wrong and we should most likely be using * MAC managed PM, but we are not. */ WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY && phydev->state != PHY_UP); ret = phy_init_hw(phydev); if (ret < 0) return ret; ret = phy_resume(phydev); if (ret < 0) return ret; no_resume: if (phy_interrupt_is_valid(phydev)) { phydev->irq_suspended = 0; synchronize_irq(phydev->irq); /* Rerun interrupts which were postponed by phy_interrupt() * because they occurred during the system sleep transition. */ if (phydev->irq_rerun) { phydev->irq_rerun = 0; enable_irq(phydev->irq); irq_wake_thread(phydev->irq, phydev); } } if (phydev->attached_dev && phydev->adjust_link) phy_start_machine(phydev); return 0; } static SIMPLE_DEV_PM_OPS(mdio_bus_phy_pm_ops, mdio_bus_phy_suspend, mdio_bus_phy_resume); /** * phy_register_fixup - creates a new phy_fixup and adds it to the list * @bus_id: A string which matches phydev->mdio.dev.bus_id (or PHY_ANY_ID) * @phy_uid: Used to match against phydev->phy_id (the UID of the PHY) * It can also be PHY_ANY_UID * @phy_uid_mask: Applied to phydev->phy_id and fixup->phy_uid before * comparison * @run: The actual code to be run when a matching PHY is found */ int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)) { struct phy_fixup *fixup = kzalloc(sizeof(*fixup), GFP_KERNEL); if (!fixup) return -ENOMEM; strscpy(fixup->bus_id, bus_id, sizeof(fixup->bus_id)); fixup->phy_uid = phy_uid; fixup->phy_uid_mask = phy_uid_mask; fixup->run = run; mutex_lock(&phy_fixup_lock); list_add_tail(&fixup->list, &phy_fixup_list); mutex_unlock(&phy_fixup_lock); return 0; } EXPORT_SYMBOL(phy_register_fixup); /* Registers a fixup to be run on any PHY with the UID in phy_uid */ int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)) { return phy_register_fixup(PHY_ANY_ID, phy_uid, phy_uid_mask, run); } EXPORT_SYMBOL(phy_register_fixup_for_uid); /* Registers a fixup to be run on the PHY with id string bus_id */ int phy_register_fixup_for_id(const char *bus_id, int (*run)(struct phy_device *)) { return phy_register_fixup(bus_id, PHY_ANY_UID, 0xffffffff, run); } EXPORT_SYMBOL(phy_register_fixup_for_id); /** * phy_unregister_fixup - remove a phy_fixup from the list * @bus_id: A string matches fixup->bus_id (or PHY_ANY_ID) in phy_fixup_list * @phy_uid: A phy id matches fixup->phy_id (or PHY_ANY_UID) in phy_fixup_list * @phy_uid_mask: Applied to phy_uid and fixup->phy_uid before comparison */ int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask) { struct list_head *pos, *n; struct phy_fixup *fixup; int ret; ret = -ENODEV; mutex_lock(&phy_fixup_lock); list_for_each_safe(pos, n, &phy_fixup_list) { fixup = list_entry(pos, struct phy_fixup, list); if ((!strcmp(fixup->bus_id, bus_id)) && phy_id_compare(fixup->phy_uid, phy_uid, phy_uid_mask)) { list_del(&fixup->list); kfree(fixup); ret = 0; break; } } mutex_unlock(&phy_fixup_lock); return ret; } EXPORT_SYMBOL(phy_unregister_fixup); /* Unregisters a fixup of any PHY with the UID in phy_uid */ int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask) { return phy_unregister_fixup(PHY_ANY_ID, phy_uid, phy_uid_mask); } EXPORT_SYMBOL(phy_unregister_fixup_for_uid); /* Unregisters a fixup of the PHY with id string bus_id */ int phy_unregister_fixup_for_id(const char *bus_id) { return phy_unregister_fixup(bus_id, PHY_ANY_UID, 0xffffffff); } EXPORT_SYMBOL(phy_unregister_fixup_for_id); /* Returns 1 if fixup matches phydev in bus_id and phy_uid. * Fixups can be set to match any in one or more fields. */ static int phy_needs_fixup(struct phy_device *phydev, struct phy_fixup *fixup) { if (strcmp(fixup->bus_id, phydev_name(phydev)) != 0) if (strcmp(fixup->bus_id, PHY_ANY_ID) != 0) return 0; if (!phy_id_compare(phydev->phy_id, fixup->phy_uid, fixup->phy_uid_mask)) if (fixup->phy_uid != PHY_ANY_UID) return 0; return 1; } /* Runs any matching fixups for this phydev */ static int phy_scan_fixups(struct phy_device *phydev) { struct phy_fixup *fixup; mutex_lock(&phy_fixup_lock); list_for_each_entry(fixup, &phy_fixup_list, list) { if (phy_needs_fixup(phydev, fixup)) { int err = fixup->run(phydev); if (err < 0) { mutex_unlock(&phy_fixup_lock); return err; } phydev->has_fixups = true; } } mutex_unlock(&phy_fixup_lock); return 0; } static int phy_bus_match(struct device *dev, const struct device_driver *drv) { struct phy_device *phydev = to_phy_device(dev); const struct phy_driver *phydrv = to_phy_driver(drv); const int num_ids = ARRAY_SIZE(phydev->c45_ids.device_ids); int i; if (!(phydrv->mdiodrv.flags & MDIO_DEVICE_IS_PHY)) return 0; if (phydrv->match_phy_device) return phydrv->match_phy_device(phydev); if (phydev->is_c45) { for (i = 1; i < num_ids; i++) { if (phydev->c45_ids.device_ids[i] == 0xffffffff) continue; if (phy_id_compare(phydev->c45_ids.device_ids[i], phydrv->phy_id, phydrv->phy_id_mask)) return 1; } return 0; } else { return phy_id_compare(phydev->phy_id, phydrv->phy_id, phydrv->phy_id_mask); } } static ssize_t phy_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct phy_device *phydev = to_phy_device(dev); return sysfs_emit(buf, "0x%.8lx\n", (unsigned long)phydev->phy_id); } static DEVICE_ATTR_RO(phy_id); static ssize_t phy_interface_show(struct device *dev, struct device_attribute *attr, char *buf) { struct phy_device *phydev = to_phy_device(dev); const char *mode = NULL; if (phy_is_internal(phydev)) mode = "internal"; else mode = phy_modes(phydev->interface); return sysfs_emit(buf, "%s\n", mode); } static DEVICE_ATTR_RO(phy_interface); static ssize_t phy_has_fixups_show(struct device *dev, struct device_attribute *attr, char *buf) { struct phy_device *phydev = to_phy_device(dev); return sysfs_emit(buf, "%d\n", phydev->has_fixups); } static DEVICE_ATTR_RO(phy_has_fixups); static ssize_t phy_dev_flags_show(struct device *dev, struct device_attribute *attr, char *buf) { struct phy_device *phydev = to_phy_device(dev); return sysfs_emit(buf, "0x%08x\n", phydev->dev_flags); } static DEVICE_ATTR_RO(phy_dev_flags); static struct attribute *phy_dev_attrs[] = { &dev_attr_phy_id.attr, &dev_attr_phy_interface.attr, &dev_attr_phy_has_fixups.attr, &dev_attr_phy_dev_flags.attr, NULL, }; ATTRIBUTE_GROUPS(phy_dev); static const struct device_type mdio_bus_phy_type = { .name = "PHY", .groups = phy_dev_groups, .release = phy_device_release, .pm = pm_ptr(&mdio_bus_phy_pm_ops), }; static int phy_request_driver_module(struct phy_device *dev, u32 phy_id) { int ret; ret = request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, MDIO_ID_ARGS(phy_id)); /* We only check for failures in executing the usermode binary, * not whether a PHY driver module exists for the PHY ID. * Accept -ENOENT because this may occur in case no initramfs exists, * then modprobe isn't available. */ if (IS_ENABLED(CONFIG_MODULES) && ret < 0 && ret != -ENOENT) { phydev_err(dev, "error %d loading PHY driver module for ID 0x%08lx\n", ret, (unsigned long)phy_id); return ret; } return 0; } struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids) { struct phy_device *dev; struct mdio_device *mdiodev; int ret = 0; /* We allocate the device, and initialize the default values */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); mdiodev = &dev->mdio; mdiodev->dev.parent = &bus->dev; mdiodev->dev.bus = &mdio_bus_type; mdiodev->dev.type = &mdio_bus_phy_type; mdiodev->bus = bus; mdiodev->bus_match = phy_bus_match; mdiodev->addr = addr; mdiodev->flags = MDIO_DEVICE_FLAG_PHY; mdiodev->device_free = phy_mdio_device_free; mdiodev->device_remove = phy_mdio_device_remove; mdiodev->reset_state = -1; dev->speed = SPEED_UNKNOWN; dev->duplex = DUPLEX_UNKNOWN; dev->pause = 0; dev->asym_pause = 0; dev->link = 0; dev->port = PORT_TP; dev->interface = PHY_INTERFACE_MODE_GMII; dev->autoneg = AUTONEG_ENABLE; dev->pma_extable = -ENODATA; dev->is_c45 = is_c45; dev->phy_id = phy_id; if (c45_ids) dev->c45_ids = *c45_ids; dev->irq = bus->irq[addr]; dev_set_name(&mdiodev->dev, PHY_ID_FMT, bus->id, addr); device_initialize(&mdiodev->dev); dev->state = PHY_DOWN; INIT_LIST_HEAD(&dev->leds); mutex_init(&dev->lock); INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine); /* Request the appropriate module unconditionally; don't * bother trying to do so only if it isn't already loaded, * because that gets complicated. A hotplug event would have * done an unconditional modprobe anyway. * We don't do normal hotplug because it won't work for MDIO * -- because it relies on the device staying around for long * enough for the driver to get loaded. With MDIO, the NIC * driver will get bored and give up as soon as it finds that * there's no driver _already_ loaded. */ if (is_c45 && c45_ids) { const int num_ids = ARRAY_SIZE(c45_ids->device_ids); int i; for (i = 1; i < num_ids; i++) { if (c45_ids->device_ids[i] == 0xffffffff) continue; ret = phy_request_driver_module(dev, c45_ids->device_ids[i]); if (ret) break; } } else { ret = phy_request_driver_module(dev, phy_id); } if (ret) { put_device(&mdiodev->dev); dev = ERR_PTR(ret); } return dev; } EXPORT_SYMBOL(phy_device_create); /* phy_c45_probe_present - checks to see if a MMD is present in the package * @bus: the target MII bus * @prtad: PHY package address on the MII bus * @devad: PHY device (MMD) address * * Read the MDIO_STAT2 register, and check whether a device is responding * at this address. * * Returns: negative error number on bus access error, zero if no device * is responding, or positive if a device is present. */ static int phy_c45_probe_present(struct mii_bus *bus, int prtad, int devad) { int stat2; stat2 = mdiobus_c45_read(bus, prtad, devad, MDIO_STAT2); if (stat2 < 0) return stat2; return (stat2 & MDIO_STAT2_DEVPRST) == MDIO_STAT2_DEVPRST_VAL; } /* get_phy_c45_devs_in_pkg - reads a MMD's devices in package registers. * @bus: the target MII bus * @addr: PHY address on the MII bus * @dev_addr: MMD address in the PHY. * @devices_in_package: where to store the devices in package information. * * Description: reads devices in package registers of a MMD at @dev_addr * from PHY at @addr on @bus. * * Returns: 0 on success, -EIO on failure. */ static int get_phy_c45_devs_in_pkg(struct mii_bus *bus, int addr, int dev_addr, u32 *devices_in_package) { int phy_reg; phy_reg = mdiobus_c45_read(bus, addr, dev_addr, MDIO_DEVS2); if (phy_reg < 0) return -EIO; *devices_in_package = phy_reg << 16; phy_reg = mdiobus_c45_read(bus, addr, dev_addr, MDIO_DEVS1); if (phy_reg < 0) return -EIO; *devices_in_package |= phy_reg; return 0; } /** * get_phy_c45_ids - reads the specified addr for its 802.3-c45 IDs. * @bus: the target MII bus * @addr: PHY address on the MII bus * @c45_ids: where to store the c45 ID information. * * Read the PHY "devices in package". If this appears to be valid, read * the PHY identifiers for each device. Return the "devices in package" * and identifiers in @c45_ids. * * Returns zero on success, %-EIO on bus access error, or %-ENODEV if * the "devices in package" is invalid or no device responds. */ static int get_phy_c45_ids(struct mii_bus *bus, int addr, struct phy_c45_device_ids *c45_ids) { const int num_ids = ARRAY_SIZE(c45_ids->device_ids); u32 devs_in_pkg = 0; int i, ret, phy_reg; /* Find first non-zero Devices In package. Device zero is reserved * for 802.3 c45 complied PHYs, so don't probe it at first. */ for (i = 1; i < MDIO_MMD_NUM && (devs_in_pkg == 0 || (devs_in_pkg & 0x1fffffff) == 0x1fffffff); i++) { if (i == MDIO_MMD_VEND1 || i == MDIO_MMD_VEND2) { /* Check that there is a device present at this * address before reading the devices-in-package * register to avoid reading garbage from the PHY. * Some PHYs (88x3310) vendor space is not IEEE802.3 * compliant. */ ret = phy_c45_probe_present(bus, addr, i); if (ret < 0) /* returning -ENODEV doesn't stop bus * scanning */ return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO; if (!ret) continue; } phy_reg = get_phy_c45_devs_in_pkg(bus, addr, i, &devs_in_pkg); if (phy_reg < 0) return -EIO; } if ((devs_in_pkg & 0x1fffffff) == 0x1fffffff) { /* If mostly Fs, there is no device there, then let's probe * MMD 0, as some 10G PHYs have zero Devices In package, * e.g. Cortina CS4315/CS4340 PHY. */ phy_reg = get_phy_c45_devs_in_pkg(bus, addr, 0, &devs_in_pkg); if (phy_reg < 0) return -EIO; /* no device there, let's get out of here */ if ((devs_in_pkg & 0x1fffffff) == 0x1fffffff) return -ENODEV; } /* Now probe Device Identifiers for each device present. */ for (i = 1; i < num_ids; i++) { if (!(devs_in_pkg & (1 << i))) continue; if (i == MDIO_MMD_VEND1 || i == MDIO_MMD_VEND2) { /* Probe the "Device Present" bits for the vendor MMDs * to ignore these if they do not contain IEEE 802.3 * registers. */ ret = phy_c45_probe_present(bus, addr, i); if (ret < 0) return ret; if (!ret) continue; } phy_reg = mdiobus_c45_read(bus, addr, i, MII_PHYSID1); if (phy_reg < 0) return -EIO; c45_ids->device_ids[i] = phy_reg << 16; phy_reg = mdiobus_c45_read(bus, addr, i, MII_PHYSID2); if (phy_reg < 0) return -EIO; c45_ids->device_ids[i] |= phy_reg; } c45_ids->devices_in_package = devs_in_pkg; /* Bit 0 doesn't represent a device, it indicates c22 regs presence */ c45_ids->mmds_present = devs_in_pkg & ~BIT(0); return 0; } /** * get_phy_c22_id - reads the specified addr for its clause 22 ID. * @bus: the target MII bus * @addr: PHY address on the MII bus * @phy_id: where to store the ID retrieved. * * Read the 802.3 clause 22 PHY ID from the PHY at @addr on the @bus, * placing it in @phy_id. Return zero on successful read and the ID is * valid, %-EIO on bus access error, or %-ENODEV if no device responds * or invalid ID. */ static int get_phy_c22_id(struct mii_bus *bus, int addr, u32 *phy_id) { int phy_reg; /* Grab the bits from PHYIR1, and put them in the upper half */ phy_reg = mdiobus_read(bus, addr, MII_PHYSID1); if (phy_reg < 0) { /* returning -ENODEV doesn't stop bus scanning */ return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO; } *phy_id = phy_reg << 16; /* Grab the bits from PHYIR2, and put them in the lower half */ phy_reg = mdiobus_read(bus, addr, MII_PHYSID2); if (phy_reg < 0) { /* returning -ENODEV doesn't stop bus scanning */ return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO; } *phy_id |= phy_reg; /* If the phy_id is mostly Fs, there is no device there */ if ((*phy_id & 0x1fffffff) == 0x1fffffff) return -ENODEV; return 0; } /* Extract the phy ID from the compatible string of the form * ethernet-phy-idAAAA.BBBB. */ int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id) { unsigned int upper, lower; const char *cp; int ret; ret = fwnode_property_read_string(fwnode, "compatible", &cp); if (ret) return ret; if (sscanf(cp, "ethernet-phy-id%4x.%4x", &upper, &lower) != 2) return -EINVAL; *phy_id = ((upper & GENMASK(15, 0)) << 16) | (lower & GENMASK(15, 0)); return 0; } EXPORT_SYMBOL(fwnode_get_phy_id); /** * get_phy_device - reads the specified PHY device and returns its @phy_device * struct * @bus: the target MII bus * @addr: PHY address on the MII bus * @is_c45: If true the PHY uses the 802.3 clause 45 protocol * * Probe for a PHY at @addr on @bus. * * When probing for a clause 22 PHY, then read the ID registers. If we find * a valid ID, allocate and return a &struct phy_device. * * When probing for a clause 45 PHY, read the "devices in package" registers. * If the "devices in package" appears valid, read the ID registers for each * MMD, allocate and return a &struct phy_device. * * Returns an allocated &struct phy_device on success, %-ENODEV if there is * no PHY present, or %-EIO on bus access error. */ struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45) { struct phy_c45_device_ids c45_ids; u32 phy_id = 0; int r; c45_ids.devices_in_package = 0; c45_ids.mmds_present = 0; memset(c45_ids.device_ids, 0xff, sizeof(c45_ids.device_ids)); if (is_c45) r = get_phy_c45_ids(bus, addr, &c45_ids); else r = get_phy_c22_id(bus, addr, &phy_id); if (r) return ERR_PTR(r); /* PHY device such as the Marvell Alaska 88E2110 will return a PHY ID * of 0 when probed using get_phy_c22_id() with no error. Proceed to * probe with C45 to see if we're able to get a valid PHY ID in the C45 * space, if successful, create the C45 PHY device. */ if (!is_c45 && phy_id == 0 && bus->read_c45) { r = get_phy_c45_ids(bus, addr, &c45_ids); if (!r) return phy_device_create(bus, addr, phy_id, true, &c45_ids); } return phy_device_create(bus, addr, phy_id, is_c45, &c45_ids); } EXPORT_SYMBOL(get_phy_device); /** * phy_device_register - Register the phy device on the MDIO bus * @phydev: phy_device structure to be added to the MDIO bus */ int phy_device_register(struct phy_device *phydev) { int err; err = mdiobus_register_device(&phydev->mdio); if (err) return err; /* Deassert the reset signal */ phy_device_reset(phydev, 0); /* Run all of the fixups for this PHY */ err = phy_scan_fixups(phydev); if (err) { phydev_err(phydev, "failed to initialize\n"); goto out; } err = device_add(&phydev->mdio.dev); if (err) { phydev_err(phydev, "failed to add\n"); goto out; } return 0; out: /* Assert the reset signal */ phy_device_reset(phydev, 1); mdiobus_unregister_device(&phydev->mdio); return err; } EXPORT_SYMBOL(phy_device_register); /** * phy_device_remove - Remove a previously registered phy device from the MDIO bus * @phydev: phy_device structure to remove * * This doesn't free the phy_device itself, it merely reverses the effects * of phy_device_register(). Use phy_device_free() to free the device * after calling this function. */ void phy_device_remove(struct phy_device *phydev) { unregister_mii_timestamper(phydev->mii_ts); pse_control_put(phydev->psec); device_del(&phydev->mdio.dev); /* Assert the reset signal */ phy_device_reset(phydev, 1); mdiobus_unregister_device(&phydev->mdio); } EXPORT_SYMBOL(phy_device_remove); /** * phy_get_c45_ids - Read 802.3-c45 IDs for phy device. * @phydev: phy_device structure to read 802.3-c45 IDs * * Returns zero on success, %-EIO on bus access error, or %-ENODEV if * the "devices in package" is invalid. */ int phy_get_c45_ids(struct phy_device *phydev) { return get_phy_c45_ids(phydev->mdio.bus, phydev->mdio.addr, &phydev->c45_ids); } EXPORT_SYMBOL(phy_get_c45_ids); /** * phy_find_first - finds the first PHY device on the bus * @bus: the target MII bus */ struct phy_device *phy_find_first(struct mii_bus *bus) { struct phy_device *phydev; int addr; for (addr = 0; addr < PHY_MAX_ADDR; addr++) { phydev = mdiobus_get_phy(bus, addr); if (phydev) return phydev; } return NULL; } EXPORT_SYMBOL(phy_find_first); static void phy_link_change(struct phy_device *phydev, bool up) { struct net_device *netdev = phydev->attached_dev; if (up) netif_carrier_on(netdev); else netif_carrier_off(netdev); phydev->adjust_link(netdev); if (phydev->mii_ts && phydev->mii_ts->link_state) phydev->mii_ts->link_state(phydev->mii_ts, phydev); } /** * phy_prepare_link - prepares the PHY layer to monitor link status * @phydev: target phy_device struct * @handler: callback function for link status change notifications * * Description: Tells the PHY infrastructure to handle the * gory details on monitoring link status (whether through * polling or an interrupt), and to call back to the * connected device driver when the link status changes. * If you want to monitor your own link state, don't call * this function. */ static void phy_prepare_link(struct phy_device *phydev, void (*handler)(struct net_device *)) { phydev->adjust_link = handler; } /** * phy_connect_direct - connect an ethernet device to a specific phy_device * @dev: the network device to connect * @phydev: the pointer to the phy device * @handler: callback function for state change notifications * @interface: PHY device's interface */ int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, void (*handler)(struct net_device *), phy_interface_t interface) { int rc; if (!dev) return -EINVAL; rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface); if (rc) return rc; phy_prepare_link(phydev, handler); if (phy_interrupt_is_valid(phydev)) phy_request_interrupt(phydev); return 0; } EXPORT_SYMBOL(phy_connect_direct); /** * phy_connect - connect an ethernet device to a PHY device * @dev: the network device to connect * @bus_id: the id string of the PHY device to connect * @handler: callback function for state change notifications * @interface: PHY device's interface * * Description: Convenience function for connecting ethernet * devices to PHY devices. The default behavior is for * the PHY infrastructure to handle everything, and only notify * the connected driver when the link status changes. If you * don't want, or can't use the provided functionality, you may * choose to call only the subset of functions which provide * the desired functionality. */ struct phy_device *phy_connect(struct net_device *dev, const char *bus_id, void (*handler)(struct net_device *), phy_interface_t interface) { struct phy_device *phydev; struct device *d; int rc; /* Search the list of PHY devices on the mdio bus for the * PHY with the requested name */ d = bus_find_device_by_name(&mdio_bus_type, NULL, bus_id); if (!d) { pr_err("PHY %s not found\n", bus_id); return ERR_PTR(-ENODEV); } phydev = to_phy_device(d); rc = phy_connect_direct(dev, phydev, handler, interface); put_device(d); if (rc) return ERR_PTR(rc); return phydev; } EXPORT_SYMBOL(phy_connect); /** * phy_disconnect - disable interrupts, stop state machine, and detach a PHY * device * @phydev: target phy_device struct */ void phy_disconnect(struct phy_device *phydev) { if (phy_is_started(phydev)) phy_stop(phydev); if (phy_interrupt_is_valid(phydev)) phy_free_interrupt(phydev); phydev->adjust_link = NULL; phy_detach(phydev); } EXPORT_SYMBOL(phy_disconnect); /** * phy_poll_reset - Safely wait until a PHY reset has properly completed * @phydev: The PHY device to poll * * Description: According to IEEE 802.3, Section 2, Subsection 22.2.4.1.1, as * published in 2008, a PHY reset may take up to 0.5 seconds. The MII BMCR * register must be polled until the BMCR_RESET bit clears. * * Furthermore, any attempts to write to PHY registers may have no effect * or even generate MDIO bus errors until this is complete. * * Some PHYs (such as the Marvell 88E1111) don't entirely conform to the * standard and do not fully reset after the BMCR_RESET bit is set, and may * even *REQUIRE* a soft-reset to properly restart autonegotiation. In an * effort to support such broken PHYs, this function is separate from the * standard phy_init_hw() which will zero all the other bits in the BMCR * and reapply all driver-specific and board-specific fixups. */ static int phy_poll_reset(struct phy_device *phydev) { /* Poll until the reset bit clears (50ms per retry == 0.6 sec) */ int ret, val; ret = phy_read_poll_timeout(phydev, MII_BMCR, val, !(val & BMCR_RESET), 50000, 600000, true); if (ret) return ret; /* Some chips (smsc911x) may still need up to another 1ms after the * BMCR_RESET bit is cleared before they are usable. */ msleep(1); return 0; } int phy_init_hw(struct phy_device *phydev) { int ret = 0; /* Deassert the reset signal */ phy_device_reset(phydev, 0); if (!phydev->drv) return 0; if (phydev->drv->soft_reset) { ret = phydev->drv->soft_reset(phydev); if (ret < 0) return ret; /* see comment in genphy_soft_reset for an explanation */ phydev->suspended = 0; } ret = phy_scan_fixups(phydev); if (ret < 0) return ret; phy_interface_zero(phydev->possible_interfaces); if (phydev->drv->config_init) { ret = phydev->drv->config_init(phydev); if (ret < 0) return ret; } if (phydev->drv->config_intr) { ret = phydev->drv->config_intr(phydev); if (ret < 0) return ret; } return 0; } EXPORT_SYMBOL(phy_init_hw); void phy_attached_info(struct phy_device *phydev) { phy_attached_print(phydev, NULL); } EXPORT_SYMBOL(phy_attached_info); #define ATTACHED_FMT "attached PHY driver %s(mii_bus:phy_addr=%s, irq=%s)" char *phy_attached_info_irq(struct phy_device *phydev) { char *irq_str; char irq_num[8]; switch(phydev->irq) { case PHY_POLL: irq_str = "POLL"; break; case PHY_MAC_INTERRUPT: irq_str = "MAC"; break; default: snprintf(irq_num, sizeof(irq_num), "%d", phydev->irq); irq_str = irq_num; break; } return kasprintf(GFP_KERNEL, "%s", irq_str); } EXPORT_SYMBOL(phy_attached_info_irq); void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) { const char *unbound = phydev->drv ? "" : "[unbound] "; char *irq_str = phy_attached_info_irq(phydev); if (!fmt) { phydev_info(phydev, ATTACHED_FMT "\n", unbound, phydev_name(phydev), irq_str); } else { va_list ap; phydev_info(phydev, ATTACHED_FMT, unbound, phydev_name(phydev), irq_str); va_start(ap, fmt); vprintk(fmt, ap); va_end(ap); } kfree(irq_str); } EXPORT_SYMBOL(phy_attached_print); static void phy_sysfs_create_links(struct phy_device *phydev) { struct net_device *dev = phydev->attached_dev; int err; if (!dev) return; err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj, "attached_dev"); if (err) return; err = sysfs_create_link_nowarn(&dev->dev.kobj, &phydev->mdio.dev.kobj, "phydev"); if (err) { dev_err(&dev->dev, "could not add device link to %s err %d\n", kobject_name(&phydev->mdio.dev.kobj), err); /* non-fatal - some net drivers can use one netdevice * with more then one phy */ } phydev->sysfs_links = true; } static ssize_t phy_standalone_show(struct device *dev, struct device_attribute *attr, char *buf) { struct phy_device *phydev = to_phy_device(dev); return sysfs_emit(buf, "%d\n", !phydev->attached_dev); } static DEVICE_ATTR_RO(phy_standalone); /** * phy_sfp_attach - attach the SFP bus to the PHY upstream network device * @upstream: pointer to the phy device * @bus: sfp bus representing cage being attached * * This is used to fill in the sfp_upstream_ops .attach member. */ void phy_sfp_attach(void *upstream, struct sfp_bus *bus) { struct phy_device *phydev = upstream; if (phydev->attached_dev) phydev->attached_dev->sfp_bus = bus; phydev->sfp_bus_attached = true; } EXPORT_SYMBOL(phy_sfp_attach); /** * phy_sfp_detach - detach the SFP bus from the PHY upstream network device * @upstream: pointer to the phy device * @bus: sfp bus representing cage being attached * * This is used to fill in the sfp_upstream_ops .detach member. */ void phy_sfp_detach(void *upstream, struct sfp_bus *bus) { struct phy_device *phydev = upstream; if (phydev->attached_dev) phydev->attached_dev->sfp_bus = NULL; phydev->sfp_bus_attached = false; } EXPORT_SYMBOL(phy_sfp_detach); /** * phy_sfp_probe - probe for a SFP cage attached to this PHY device * @phydev: Pointer to phy_device * @ops: SFP's upstream operations */ int phy_sfp_probe(struct phy_device *phydev, const struct sfp_upstream_ops *ops) { struct sfp_bus *bus; int ret = 0; if (phydev->mdio.dev.fwnode) { bus = sfp_bus_find_fwnode(phydev->mdio.dev.fwnode); if (IS_ERR(bus)) return PTR_ERR(bus); phydev->sfp_bus = bus; ret = sfp_bus_add_upstream(bus, phydev, ops); sfp_bus_put(bus); } return ret; } EXPORT_SYMBOL(phy_sfp_probe); static bool phy_drv_supports_irq(const struct phy_driver *phydrv) { return phydrv->config_intr && phydrv->handle_interrupt; } /** * phy_attach_direct - attach a network device to a given PHY device pointer * @dev: network device to attach * @phydev: Pointer to phy_device to attach * @flags: PHY device's dev_flags * @interface: PHY device's interface * * Description: Called by drivers to attach to a particular PHY * device. The phy_device is found, and properly hooked up * to the phy_driver. If no driver is attached, then a * generic driver is used. The phy_device is given a ptr to * the attaching device, and given a callback for link status * change. The phy_device is returned to the attaching driver. * This function takes a reference on the phy device. */ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface) { struct mii_bus *bus = phydev->mdio.bus; struct device *d = &phydev->mdio.dev; struct module *ndev_owner = NULL; bool using_genphy = false; int err; /* For Ethernet device drivers that register their own MDIO bus, we * will have bus->owner match ndev_mod, so we do not want to increment * our own module->refcnt here, otherwise we would not be able to * unload later on. */ if (dev) ndev_owner = dev->dev.parent->driver->owner; if (ndev_owner != bus->owner && !try_module_get(bus->owner)) { phydev_err(phydev, "failed to get the bus module\n"); return -EIO; } get_device(d); /* Assume that if there is no driver, that it doesn't * exist, and we should use the genphy driver. */ if (!d->driver) { if (phydev->is_c45) d->driver = &genphy_c45_driver.mdiodrv.driver; else d->driver = &genphy_driver.mdiodrv.driver; using_genphy = true; } if (!try_module_get(d->driver->owner)) { phydev_err(phydev, "failed to get the device driver module\n"); err = -EIO; goto error_put_device; } if (using_genphy) { err = d->driver->probe(d); if (err >= 0) err = device_bind_driver(d); if (err) goto error_module_put; } if (phydev->attached_dev) { dev_err(&dev->dev, "PHY already attached\n"); err = -EBUSY; goto error; } phydev->phy_link_change = phy_link_change; if (dev) { phydev->attached_dev = dev; dev->phydev = phydev; if (phydev->sfp_bus_attached) dev->sfp_bus = phydev->sfp_bus; } /* Some Ethernet drivers try to connect to a PHY device before * calling register_netdevice() -> netdev_register_kobject() and * does the dev->dev.kobj initialization. Here we only check for * success which indicates that the network device kobject is * ready. Once we do that we still need to keep track of whether * links were successfully set up or not for phy_detach() to * remove them accordingly. */ phydev->sysfs_links = false; phy_sysfs_create_links(phydev); if (!phydev->attached_dev) { err = sysfs_create_file(&phydev->mdio.dev.kobj, &dev_attr_phy_standalone.attr); if (err) phydev_err(phydev, "error creating 'phy_standalone' sysfs entry\n"); } phydev->dev_flags |= flags; phydev->interface = interface; phydev->state = PHY_READY; phydev->interrupts = PHY_INTERRUPT_DISABLED; /* PHYs can request to use poll mode even though they have an * associated interrupt line. This could be the case if they * detect a broken interrupt handling. */ if (phydev->dev_flags & PHY_F_NO_IRQ) phydev->irq = PHY_POLL; if (!phy_drv_supports_irq(phydev->drv) && phy_interrupt_is_valid(phydev)) phydev->irq = PHY_POLL; /* Port is set to PORT_TP by default and the actual PHY driver will set * it to different value depending on the PHY configuration. If we have * the generic PHY driver we can't figure it out, thus set the old * legacy PORT_MII value. */ if (using_genphy) phydev->port = PORT_MII; /* Initial carrier state is off as the phy is about to be * (re)initialized. */ if (dev) netif_carrier_off(phydev->attached_dev); /* Do initial configuration here, now that * we have certain key parameters * (dev_flags and interface) */ err = phy_init_hw(phydev); if (err) goto error; phy_resume(phydev); if (!phydev->is_on_sfp_module) phy_led_triggers_register(phydev); /** * If the external phy used by current mac interface is managed by * another mac interface, so we should create a device link between * phy dev and mac dev. */ if (dev && phydev->mdio.bus->parent && dev->dev.parent != phydev->mdio.bus->parent) phydev->devlink = device_link_add(dev->dev.parent, &phydev->mdio.dev, DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS); return err; error: /* phy_detach() does all of the cleanup below */ phy_detach(phydev); return err; error_module_put: module_put(d->driver->owner); d->driver = NULL; error_put_device: put_device(d); if (ndev_owner != bus->owner) module_put(bus->owner); return err; } EXPORT_SYMBOL(phy_attach_direct); /** * phy_attach - attach a network device to a particular PHY device * @dev: network device to attach * @bus_id: Bus ID of PHY device to attach * @interface: PHY device's interface * * Description: Same as phy_attach_direct() except that a PHY bus_id * string is passed instead of a pointer to a struct phy_device. */ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phy_interface_t interface) { struct phy_device *phydev; struct device *d; int rc; if (!dev) return ERR_PTR(-EINVAL); /* Search the list of PHY devices on the mdio bus for the * PHY with the requested name */ d = bus_find_device_by_name(&mdio_bus_type, NULL, bus_id); if (!d) { pr_err("PHY %s not found\n", bus_id); return ERR_PTR(-ENODEV); } phydev = to_phy_device(d); rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface); put_device(d); if (rc) return ERR_PTR(rc); return phydev; } EXPORT_SYMBOL(phy_attach); static bool phy_driver_is_genphy_kind(struct phy_device *phydev, struct device_driver *driver) { struct device *d = &phydev->mdio.dev; bool ret = false; if (!phydev->drv) return ret; get_device(d); ret = d->driver == driver; put_device(d); return ret; } bool phy_driver_is_genphy(struct phy_device *phydev) { return phy_driver_is_genphy_kind(phydev, &genphy_driver.mdiodrv.driver); } EXPORT_SYMBOL_GPL(phy_driver_is_genphy); bool phy_driver_is_genphy_10g(struct phy_device *phydev) { return phy_driver_is_genphy_kind(phydev, &genphy_c45_driver.mdiodrv.driver); } EXPORT_SYMBOL_GPL(phy_driver_is_genphy_10g); /** * phy_package_join - join a common PHY group * @phydev: target phy_device struct * @base_addr: cookie and base PHY address of PHY package for offset * calculation of global register access * @priv_size: if non-zero allocate this amount of bytes for private data * * This joins a PHY group and provides a shared storage for all phydevs in * this group. This is intended to be used for packages which contain * more than one PHY, for example a quad PHY transceiver. * * The base_addr parameter serves as cookie which has to have the same values * for all members of one group and as the base PHY address of the PHY package * for offset calculation to access generic registers of a PHY package. * Usually, one of the PHY addresses of the different PHYs in the package * provides access to these global registers. * The address which is given here, will be used in the phy_package_read() * and phy_package_write() convenience functions as base and added to the * passed offset in those functions. * * This will set the shared pointer of the phydev to the shared storage. * If this is the first call for a this cookie the shared storage will be * allocated. If priv_size is non-zero, the given amount of bytes are * allocated for the priv member. * * Returns < 1 on error, 0 on success. Esp. calling phy_package_join() * with the same cookie but a different priv_size is an error. */ int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size) { struct mii_bus *bus = phydev->mdio.bus; struct phy_package_shared *shared; int ret; if (base_addr < 0 || base_addr >= PHY_MAX_ADDR) return -EINVAL; mutex_lock(&bus->shared_lock); shared = bus->shared[base_addr]; if (!shared) { ret = -ENOMEM; shared = kzalloc(sizeof(*shared), GFP_KERNEL); if (!shared) goto err_unlock; if (priv_size) { shared->priv = kzalloc(priv_size, GFP_KERNEL); if (!shared->priv) goto err_free; shared->priv_size = priv_size; } shared->base_addr = base_addr; shared->np = NULL; refcount_set(&shared->refcnt, 1); bus->shared[base_addr] = shared; } else { ret = -EINVAL; if (priv_size && priv_size != shared->priv_size) goto err_unlock; refcount_inc(&shared->refcnt); } mutex_unlock(&bus->shared_lock); phydev->shared = shared; return 0; err_free: kfree(shared); err_unlock: mutex_unlock(&bus->shared_lock); return ret; } EXPORT_SYMBOL_GPL(phy_package_join); /** * of_phy_package_join - join a common PHY group in PHY package * @phydev: target phy_device struct * @priv_size: if non-zero allocate this amount of bytes for private data * * This is a variant of phy_package_join for PHY package defined in DT. * * The parent node of the @phydev is checked as a valid PHY package node * structure (by matching the node name "ethernet-phy-package") and the * base_addr for the PHY package is passed to phy_package_join. * * With this configuration the shared struct will also have the np value * filled to use additional DT defined properties in PHY specific * probe_once and config_init_once PHY package OPs. * * Returns < 0 on error, 0 on success. Esp. calling phy_package_join() * with the same cookie but a different priv_size is an error. Or a parent * node is not detected or is not valid or doesn't match the expected node * name for PHY package. */ int of_phy_package_join(struct phy_device *phydev, size_t priv_size) { struct device_node *node = phydev->mdio.dev.of_node; struct device_node *package_node; u32 base_addr; int ret; if (!node) return -EINVAL; package_node = of_get_parent(node); if (!package_node) return -EINVAL; if (!of_node_name_eq(package_node, "ethernet-phy-package")) { ret = -EINVAL; goto exit; } if (of_property_read_u32(package_node, "reg", &base_addr)) { ret = -EINVAL; goto exit; } ret = phy_package_join(phydev, base_addr, priv_size); if (ret) goto exit; phydev->shared->np = package_node; return 0; exit: of_node_put(package_node); return ret; } EXPORT_SYMBOL_GPL(of_phy_package_join); /** * phy_package_leave - leave a common PHY group * @phydev: target phy_device struct * * This leaves a PHY group created by phy_package_join(). If this phydev * was the last user of the shared data between the group, this data is * freed. Resets the phydev->shared pointer to NULL. */ void phy_package_leave(struct phy_device *phydev) { struct phy_package_shared *shared = phydev->shared; struct mii_bus *bus = phydev->mdio.bus; if (!shared) return; /* Decrease the node refcount on leave if present */ if (shared->np) of_node_put(shared->np); if (refcount_dec_and_mutex_lock(&shared->refcnt, &bus->shared_lock)) { bus->shared[shared->base_addr] = NULL; mutex_unlock(&bus->shared_lock); kfree(shared->priv); kfree(shared); } phydev->shared = NULL; } EXPORT_SYMBOL_GPL(phy_package_leave); static void devm_phy_package_leave(struct device *dev, void *res) { phy_package_leave(*(struct phy_device **)res); } /** * devm_phy_package_join - resource managed phy_package_join() * @dev: device that is registering this PHY package * @phydev: target phy_device struct * @base_addr: cookie and base PHY address of PHY package for offset * calculation of global register access * @priv_size: if non-zero allocate this amount of bytes for private data * * Managed phy_package_join(). Shared storage fetched by this function, * phy_package_leave() is automatically called on driver detach. See * phy_package_join() for more information. */ int devm_phy_package_join(struct device *dev, struct phy_device *phydev, int base_addr, size_t priv_size) { struct phy_device **ptr; int ret; ptr = devres_alloc(devm_phy_package_leave, sizeof(*ptr), GFP_KERNEL); if (!ptr) return -ENOMEM; ret = phy_package_join(phydev, base_addr, priv_size); if (!ret) { *ptr = phydev; devres_add(dev, ptr); } else { devres_free(ptr); } return ret; } EXPORT_SYMBOL_GPL(devm_phy_package_join); /** * devm_of_phy_package_join - resource managed of_phy_package_join() * @dev: device that is registering this PHY package * @phydev: target phy_device struct * @priv_size: if non-zero allocate this amount of bytes for private data * * Managed of_phy_package_join(). Shared storage fetched by this function, * phy_package_leave() is automatically called on driver detach. See * of_phy_package_join() for more information. */ int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev, size_t priv_size) { struct phy_device **ptr; int ret; ptr = devres_alloc(devm_phy_package_leave, sizeof(*ptr), GFP_KERNEL); if (!ptr) return -ENOMEM; ret = of_phy_package_join(phydev, priv_size); if (!ret) { *ptr = phydev; devres_add(dev, ptr); } else { devres_free(ptr); } return ret; } EXPORT_SYMBOL_GPL(devm_of_phy_package_join); /** * phy_detach - detach a PHY device from its network device * @phydev: target phy_device struct * * This detaches the phy device from its network device and the phy * driver, and drops the reference count taken in phy_attach_direct(). */ void phy_detach(struct phy_device *phydev) { struct net_device *dev = phydev->attached_dev; struct module *ndev_owner = NULL; struct mii_bus *bus; if (phydev->devlink) device_link_del(phydev->devlink); if (phydev->sysfs_links) { if (dev) sysfs_remove_link(&dev->dev.kobj, "phydev"); sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev"); } if (!phydev->attached_dev) sysfs_remove_file(&phydev->mdio.dev.kobj, &dev_attr_phy_standalone.attr); phy_suspend(phydev); if (dev) { phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; } phydev->phylink = NULL; if (!phydev->is_on_sfp_module) phy_led_triggers_unregister(phydev); if (phydev->mdio.dev.driver) module_put(phydev->mdio.dev.driver->owner); /* If the device had no specific driver before (i.e. - it * was using the generic driver), we unbind the device * from the generic driver so that there's a chance a * real driver could be loaded */ if (phy_driver_is_genphy(phydev) || phy_driver_is_genphy_10g(phydev)) device_release_driver(&phydev->mdio.dev); /* Assert the reset signal */ phy_device_reset(phydev, 1); /* * The phydev might go away on the put_device() below, so avoid * a use-after-free bug by reading the underlying bus first. */ bus = phydev->mdio.bus; put_device(&phydev->mdio.dev); if (dev) ndev_owner = dev->dev.parent->driver->owner; if (ndev_owner != bus->owner) module_put(bus->owner); } EXPORT_SYMBOL(phy_detach); int phy_suspend(struct phy_device *phydev) { struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; struct net_device *netdev = phydev->attached_dev; const struct phy_driver *phydrv = phydev->drv; int ret; if (phydev->suspended || !phydrv) return 0; phy_ethtool_get_wol(phydev, &wol); phydev->wol_enabled = wol.wolopts || (netdev && netdev->ethtool->wol_enabled); /* If the device has WOL enabled, we cannot suspend the PHY */ if (phydev->wol_enabled && !(phydrv->flags & PHY_ALWAYS_CALL_SUSPEND)) return -EBUSY; if (!phydrv->suspend) return 0; ret = phydrv->suspend(phydev); if (!ret) phydev->suspended = true; return ret; } EXPORT_SYMBOL(phy_suspend); int __phy_resume(struct phy_device *phydev) { const struct phy_driver *phydrv = phydev->drv; int ret; lockdep_assert_held(&phydev->lock); if (!phydrv || !phydrv->resume) return 0; ret = phydrv->resume(phydev); if (!ret) phydev->suspended = false; return ret; } EXPORT_SYMBOL(__phy_resume); int phy_resume(struct phy_device *phydev) { int ret; mutex_lock(&phydev->lock); ret = __phy_resume(phydev); mutex_unlock(&phydev->lock); return ret; } EXPORT_SYMBOL(phy_resume); int phy_loopback(struct phy_device *phydev, bool enable) { int ret = 0; if (!phydev->drv) return -EIO; mutex_lock(&phydev->lock); if (enable && phydev->loopback_enabled) { ret = -EBUSY; goto out; } if (!enable && !phydev->loopback_enabled) { ret = -EINVAL; goto out; } if (phydev->drv->set_loopback) ret = phydev->drv->set_loopback(phydev, enable); else ret = genphy_loopback(phydev, enable); if (ret) goto out; phydev->loopback_enabled = enable; out: mutex_unlock(&phydev->lock); return ret; } EXPORT_SYMBOL(phy_loopback); /** * phy_reset_after_clk_enable - perform a PHY reset if needed * @phydev: target phy_device struct * * Description: Some PHYs are known to need a reset after their refclk was * enabled. This function evaluates the flags and perform the reset if it's * needed. Returns < 0 on error, 0 if the phy wasn't reset and 1 if the phy * was reset. */ int phy_reset_after_clk_enable(struct phy_device *phydev) { if (!phydev || !phydev->drv) return -ENODEV; if (phydev->drv->flags & PHY_RST_AFTER_CLK_EN) { phy_device_reset(phydev, 1); phy_device_reset(phydev, 0); return 1; } return 0; } EXPORT_SYMBOL(phy_reset_after_clk_enable); /* Generic PHY support and helper functions */ /** * genphy_config_advert - sanitize and advertise auto-negotiation parameters * @phydev: target phy_device struct * * Description: Writes MII_ADVERTISE with the appropriate values, * after sanitizing the values to make sure we only advertise * what is supported. Returns < 0 on error, 0 if the PHY's advertisement * hasn't changed, and > 0 if it has changed. */ static int genphy_config_advert(struct phy_device *phydev) { int err, bmsr, changed = 0; u32 adv; /* Only allow advertising what this PHY supports */ linkmode_and(phydev->advertising, phydev->advertising, phydev->supported); adv = linkmode_adv_to_mii_adv_t(phydev->advertising); /* Setup standard advertisement */ err = phy_modify_changed(phydev, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, adv); if (err < 0) return err; if (err > 0) changed = 1; bmsr = phy_read(phydev, MII_BMSR); if (bmsr < 0) return bmsr; /* Per 802.3-2008, Section 22.2.4.2.16 Extended status all * 1000Mbits/sec capable PHYs shall have the BMSR_ESTATEN bit set to a * logical 1. */ if (!(bmsr & BMSR_ESTATEN)) return changed; adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising); err = phy_modify_changed(phydev, MII_CTRL1000, ADVERTISE_1000FULL | ADVERTISE_1000HALF, adv); if (err < 0) return err; if (err > 0) changed = 1; return changed; } /** * genphy_c37_config_advert - sanitize and advertise auto-negotiation parameters * @phydev: target phy_device struct * * Description: Writes MII_ADVERTISE with the appropriate values, * after sanitizing the values to make sure we only advertise * what is supported. Returns < 0 on error, 0 if the PHY's advertisement * hasn't changed, and > 0 if it has changed. This function is intended * for Clause 37 1000Base-X mode. */ static int genphy_c37_config_advert(struct phy_device *phydev) { u16 adv = 0; /* Only allow advertising what this PHY supports */ linkmode_and(phydev->advertising, phydev->advertising, phydev->supported); if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, phydev->advertising)) adv |= ADVERTISE_1000XFULL; if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->advertising)) adv |= ADVERTISE_1000XPAUSE; if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->advertising)) adv |= ADVERTISE_1000XPSE_ASYM; return phy_modify_changed(phydev, MII_ADVERTISE, ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE | ADVERTISE_1000XHALF | ADVERTISE_1000XPSE_ASYM, adv); } /** * genphy_config_eee_advert - disable unwanted eee mode advertisement * @phydev: target phy_device struct * * Description: Writes MDIO_AN_EEE_ADV after disabling unsupported energy * efficent ethernet modes. Returns 0 if the PHY's advertisement hasn't * changed, and 1 if it has changed. */ int genphy_config_eee_advert(struct phy_device *phydev) { int err; /* Nothing to disable */ if (!phydev->eee_broken_modes) return 0; err = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, phydev->eee_broken_modes, 0); /* If the call failed, we assume that EEE is not supported */ return err < 0 ? 0 : err; } EXPORT_SYMBOL(genphy_config_eee_advert); /** * genphy_setup_forced - configures/forces speed/duplex from @phydev * @phydev: target phy_device struct * * Description: Configures MII_BMCR to force speed/duplex * to the values in phydev. Assumes that the values are valid. * Please see phy_sanitize_settings(). */ int genphy_setup_forced(struct phy_device *phydev) { u16 ctl; phydev->pause = 0; phydev->asym_pause = 0; ctl = mii_bmcr_encode_fixed(phydev->speed, phydev->duplex); return phy_modify(phydev, MII_BMCR, ~(BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN), ctl); } EXPORT_SYMBOL(genphy_setup_forced); static int genphy_setup_master_slave(struct phy_device *phydev) { u16 ctl = 0; if (!phydev->is_gigabit_capable) return 0; switch (phydev->master_slave_set) { case MASTER_SLAVE_CFG_MASTER_PREFERRED: ctl |= CTL1000_PREFER_MASTER; break; case MASTER_SLAVE_CFG_SLAVE_PREFERRED: break; case MASTER_SLAVE_CFG_MASTER_FORCE: ctl |= CTL1000_AS_MASTER; fallthrough; case MASTER_SLAVE_CFG_SLAVE_FORCE: ctl |= CTL1000_ENABLE_MASTER; break; case MASTER_SLAVE_CFG_UNKNOWN: case MASTER_SLAVE_CFG_UNSUPPORTED: return 0; default: phydev_warn(phydev, "Unsupported Master/Slave mode\n"); return -EOPNOTSUPP; } return phy_modify_changed(phydev, MII_CTRL1000, (CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER | CTL1000_PREFER_MASTER), ctl); } int genphy_read_master_slave(struct phy_device *phydev) { int cfg, state; int val; phydev->master_slave_get = MASTER_SLAVE_CFG_UNKNOWN; phydev->master_slave_state = MASTER_SLAVE_STATE_UNKNOWN; val = phy_read(phydev, MII_CTRL1000); if (val < 0) return val; if (val & CTL1000_ENABLE_MASTER) { if (val & CTL1000_AS_MASTER) cfg = MASTER_SLAVE_CFG_MASTER_FORCE; else cfg = MASTER_SLAVE_CFG_SLAVE_FORCE; } else { if (val & CTL1000_PREFER_MASTER) cfg = MASTER_SLAVE_CFG_MASTER_PREFERRED; else cfg = MASTER_SLAVE_CFG_SLAVE_PREFERRED; } val = phy_read(phydev, MII_STAT1000); if (val < 0) return val; if (val & LPA_1000MSFAIL) { state = MASTER_SLAVE_STATE_ERR; } else if (phydev->link) { /* this bits are valid only for active link */ if (val & LPA_1000MSRES) state = MASTER_SLAVE_STATE_MASTER; else state = MASTER_SLAVE_STATE_SLAVE; } else { state = MASTER_SLAVE_STATE_UNKNOWN; } phydev->master_slave_get = cfg; phydev->master_slave_state = state; return 0; } EXPORT_SYMBOL(genphy_read_master_slave); /** * genphy_restart_aneg - Enable and Restart Autonegotiation * @phydev: target phy_device struct */ int genphy_restart_aneg(struct phy_device *phydev) { /* Don't isolate the PHY if we're negotiating */ return phy_modify(phydev, MII_BMCR, BMCR_ISOLATE, BMCR_ANENABLE | BMCR_ANRESTART); } EXPORT_SYMBOL(genphy_restart_aneg); /** * genphy_check_and_restart_aneg - Enable and restart auto-negotiation * @phydev: target phy_device struct * @restart: whether aneg restart is requested * * Check, and restart auto-negotiation if needed. */ int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart) { int ret; if (!restart) { /* Advertisement hasn't changed, but maybe aneg was never on to * begin with? Or maybe phy was isolated? */ ret = phy_read(phydev, MII_BMCR); if (ret < 0) return ret; if (!(ret & BMCR_ANENABLE) || (ret & BMCR_ISOLATE)) restart = true; } if (restart) return genphy_restart_aneg(phydev); return 0; } EXPORT_SYMBOL(genphy_check_and_restart_aneg); /** * __genphy_config_aneg - restart auto-negotiation or write BMCR * @phydev: target phy_device struct * @changed: whether autoneg is requested * * Description: If auto-negotiation is enabled, we configure the * advertising, and then restart auto-negotiation. If it is not * enabled, then we write the BMCR. */ int __genphy_config_aneg(struct phy_device *phydev, bool changed) { int err; err = genphy_c45_an_config_eee_aneg(phydev); if (err < 0) return err; else if (err) changed = true; err = genphy_setup_master_slave(phydev); if (err < 0) return err; else if (err) changed = true; if (AUTONEG_ENABLE != phydev->autoneg) return genphy_setup_forced(phydev); err = genphy_config_advert(phydev); if (err < 0) /* error */ return err; else if (err) changed = true; return genphy_check_and_restart_aneg(phydev, changed); } EXPORT_SYMBOL(__genphy_config_aneg); /** * genphy_c37_config_aneg - restart auto-negotiation or write BMCR * @phydev: target phy_device struct * * Description: If auto-negotiation is enabled, we configure the * advertising, and then restart auto-negotiation. If it is not * enabled, then we write the BMCR. This function is intended * for use with Clause 37 1000Base-X mode. */ int genphy_c37_config_aneg(struct phy_device *phydev) { int err, changed; if (phydev->autoneg != AUTONEG_ENABLE) return genphy_setup_forced(phydev); err = phy_modify(phydev, MII_BMCR, BMCR_SPEED1000 | BMCR_SPEED100, BMCR_SPEED1000); if (err) return err; changed = genphy_c37_config_advert(phydev); if (changed < 0) /* error */ return changed; if (!changed) { /* Advertisement hasn't changed, but maybe aneg was never on to * begin with? Or maybe phy was isolated? */ int ctl = phy_read(phydev, MII_BMCR); if (ctl < 0) return ctl; if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE)) changed = 1; /* do restart aneg */ } /* Only restart aneg if we are advertising something different * than we were before. */ if (changed > 0) return genphy_restart_aneg(phydev); return 0; } EXPORT_SYMBOL(genphy_c37_config_aneg); /** * genphy_aneg_done - return auto-negotiation status * @phydev: target phy_device struct * * Description: Reads the status register and returns 0 either if * auto-negotiation is incomplete, or if there was an error. * Returns BMSR_ANEGCOMPLETE if auto-negotiation is done. */ int genphy_aneg_done(struct phy_device *phydev) { int retval = phy_read(phydev, MII_BMSR); return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); } EXPORT_SYMBOL(genphy_aneg_done); /** * genphy_update_link - update link status in @phydev * @phydev: target phy_device struct * * Description: Update the value in phydev->link to reflect the * current link value. In order to do this, we need to read * the status register twice, keeping the second value. */ int genphy_update_link(struct phy_device *phydev) { int status = 0, bmcr; bmcr = phy_read(phydev, MII_BMCR); if (bmcr < 0) return bmcr; /* Autoneg is being started, therefore disregard BMSR value and * report link as down. */ if (bmcr & BMCR_ANRESTART) goto done; /* The link state is latched low so that momentary link * drops can be detected. Do not double-read the status * in polling mode to detect such short link drops except * the link was already down. */ if (!phy_polling_mode(phydev) || !phydev->link) { status = phy_read(phydev, MII_BMSR); if (status < 0) return status; else if (status & BMSR_LSTATUS) goto done; } /* Read link and autonegotiation status */ status = phy_read(phydev, MII_BMSR); if (status < 0) return status; done: phydev->link = status & BMSR_LSTATUS ? 1 : 0; phydev->autoneg_complete = status & BMSR_ANEGCOMPLETE ? 1 : 0; /* Consider the case that autoneg was started and "aneg complete" * bit has been reset, but "link up" bit not yet. */ if (phydev->autoneg == AUTONEG_ENABLE && !phydev->autoneg_complete) phydev->link = 0; return 0; } EXPORT_SYMBOL(genphy_update_link); int genphy_read_lpa(struct phy_device *phydev) { int lpa, lpagb; if (phydev->autoneg == AUTONEG_ENABLE) { if (!phydev->autoneg_complete) { mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, 0); mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, 0); return 0; } if (phydev->is_gigabit_capable) { lpagb = phy_read(phydev, MII_STAT1000); if (lpagb < 0) return lpagb; if (lpagb & LPA_1000MSFAIL) { int adv = phy_read(phydev, MII_CTRL1000); if (adv < 0) return adv; if (adv & CTL1000_ENABLE_MASTER) phydev_err(phydev, "Master/Slave resolution failed, maybe conflicting manual settings?\n"); else phydev_err(phydev, "Master/Slave resolution failed\n"); return -ENOLINK; } mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, lpagb); } lpa = phy_read(phydev, MII_LPA); if (lpa < 0) return lpa; mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa); } else { linkmode_zero(phydev->lp_advertising); } return 0; } EXPORT_SYMBOL(genphy_read_lpa); /** * genphy_read_status_fixed - read the link parameters for !aneg mode * @phydev: target phy_device struct * * Read the current duplex and speed state for a PHY operating with * autonegotiation disabled. */ int genphy_read_status_fixed(struct phy_device *phydev) { int bmcr = phy_read(phydev, MII_BMCR); if (bmcr < 0) return bmcr; if (bmcr & BMCR_FULLDPLX) phydev->duplex = DUPLEX_FULL; else phydev->duplex = DUPLEX_HALF; if (bmcr & BMCR_SPEED1000) phydev->speed = SPEED_1000; else if (bmcr & BMCR_SPEED100) phydev->speed = SPEED_100; else phydev->speed = SPEED_10; return 0; } EXPORT_SYMBOL(genphy_read_status_fixed); /** * genphy_read_status - check the link status and update current link state * @phydev: target phy_device struct * * Description: Check the link, then figure out the current state * by comparing what we advertise with what the link partner * advertises. Start by checking the gigabit possibilities, * then move on to 10/100. */ int genphy_read_status(struct phy_device *phydev) { int err, old_link = phydev->link; /* Update the link, but return if there was an error */ err = genphy_update_link(phydev); if (err) return err; /* why bother the PHY if nothing can have changed */ if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) return 0; phydev->master_slave_get = MASTER_SLAVE_CFG_UNSUPPORTED; phydev->master_slave_state = MASTER_SLAVE_STATE_UNSUPPORTED; phydev->speed = SPEED_UNKNOWN; phydev->duplex = DUPLEX_UNKNOWN; phydev->pause = 0; phydev->asym_pause = 0; if (phydev->is_gigabit_capable) { err = genphy_read_master_slave(phydev); if (err < 0) return err; } err = genphy_read_lpa(phydev); if (err < 0) return err; if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { phy_resolve_aneg_linkmode(phydev); } else if (phydev->autoneg == AUTONEG_DISABLE) { err = genphy_read_status_fixed(phydev); if (err < 0) return err; } return 0; } EXPORT_SYMBOL(genphy_read_status); /** * genphy_c37_read_status - check the link status and update current link state * @phydev: target phy_device struct * @changed: pointer where to store if link changed * * Description: Check the link, then figure out the current state * by comparing what we advertise with what the link partner * advertises. This function is for Clause 37 1000Base-X mode. * * If link has changed, @changed is set to true, false otherwise. */ int genphy_c37_read_status(struct phy_device *phydev, bool *changed) { int lpa, err, old_link = phydev->link; /* Update the link, but return if there was an error */ err = genphy_update_link(phydev); if (err) return err; /* why bother the PHY if nothing can have changed */ if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) { *changed = false; return 0; } /* Signal link has changed */ *changed = true; phydev->duplex = DUPLEX_UNKNOWN; phydev->pause = 0; phydev->asym_pause = 0; if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { lpa = phy_read(phydev, MII_LPA); if (lpa < 0) return lpa; linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->lp_advertising, lpa & LPA_LPACK); linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, phydev->lp_advertising, lpa & LPA_1000XFULL); linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->lp_advertising, lpa & LPA_1000XPAUSE); linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->lp_advertising, lpa & LPA_1000XPAUSE_ASYM); phy_resolve_aneg_linkmode(phydev); } else if (phydev->autoneg == AUTONEG_DISABLE) { int bmcr = phy_read(phydev, MII_BMCR); if (bmcr < 0) return bmcr; if (bmcr & BMCR_FULLDPLX) phydev->duplex = DUPLEX_FULL; else phydev->duplex = DUPLEX_HALF; } return 0; } EXPORT_SYMBOL(genphy_c37_read_status); /** * genphy_soft_reset - software reset the PHY via BMCR_RESET bit * @phydev: target phy_device struct * * Description: Perform a software PHY reset using the standard * BMCR_RESET bit and poll for the reset bit to be cleared. * * Returns: 0 on success, < 0 on failure */ int genphy_soft_reset(struct phy_device *phydev) { u16 res = BMCR_RESET; int ret; if (phydev->autoneg == AUTONEG_ENABLE) res |= BMCR_ANRESTART; ret = phy_modify(phydev, MII_BMCR, BMCR_ISOLATE, res); if (ret < 0) return ret; /* Clause 22 states that setting bit BMCR_RESET sets control registers * to their default value. Therefore the POWER DOWN bit is supposed to * be cleared after soft reset. */ phydev->suspended = 0; ret = phy_poll_reset(phydev); if (ret) return ret; /* BMCR may be reset to defaults */ if (phydev->autoneg == AUTONEG_DISABLE) ret = genphy_setup_forced(phydev); return ret; } EXPORT_SYMBOL(genphy_soft_reset); irqreturn_t genphy_handle_interrupt_no_ack(struct phy_device *phydev) { /* It seems there are cases where the interrupts are handled by another * entity (ie an IRQ controller embedded inside the PHY) and do not * need any other interraction from phylib. In this case, just trigger * the state machine directly. */ phy_trigger_machine(phydev); return 0; } EXPORT_SYMBOL(genphy_handle_interrupt_no_ack); /** * genphy_read_abilities - read PHY abilities from Clause 22 registers * @phydev: target phy_device struct * * Description: Reads the PHY's abilities and populates * phydev->supported accordingly. * * Returns: 0 on success, < 0 on failure */ int genphy_read_abilities(struct phy_device *phydev) { int val; linkmode_set_bit_array(phy_basic_ports_array, ARRAY_SIZE(phy_basic_ports_array), phydev->supported); val = phy_read(phydev, MII_BMSR); if (val < 0) return val; linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->supported, val & BMSR_ANEGCAPABLE); linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, phydev->supported, val & BMSR_100FULL); linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, phydev->supported, val & BMSR_100HALF); linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, phydev->supported, val & BMSR_10FULL); linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, phydev->supported, val & BMSR_10HALF); if (val & BMSR_ESTATEN) { val = phy_read(phydev, MII_ESTATUS); if (val < 0) return val; linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, phydev->supported, val & ESTATUS_1000_TFULL); linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, phydev->supported, val & ESTATUS_1000_THALF); linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, phydev->supported, val & ESTATUS_1000_XFULL); } /* This is optional functionality. If not supported, we may get an error * which should be ignored. */ genphy_c45_read_eee_abilities(phydev); return 0; } EXPORT_SYMBOL(genphy_read_abilities); /* This is used for the phy device which doesn't support the MMD extended * register access, but it does have side effect when we are trying to access * the MMD register via indirect method. */ int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, u16 regnum) { return -EOPNOTSUPP; } EXPORT_SYMBOL(genphy_read_mmd_unsupported); int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, u16 regnum, u16 val) { return -EOPNOTSUPP; } EXPORT_SYMBOL(genphy_write_mmd_unsupported); int genphy_suspend(struct phy_device *phydev) { return phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN); } EXPORT_SYMBOL(genphy_suspend); int genphy_resume(struct phy_device *phydev) { return phy_clear_bits(phydev, MII_BMCR, BMCR_PDOWN); } EXPORT_SYMBOL(genphy_resume); int genphy_loopback(struct phy_device *phydev, bool enable) { if (enable) { u16 ctl = BMCR_LOOPBACK; int ret, val; ctl |= mii_bmcr_encode_fixed(phydev->speed, phydev->duplex); phy_modify(phydev, MII_BMCR, ~0, ctl); ret = phy_read_poll_timeout(phydev, MII_BMSR, val, val & BMSR_LSTATUS, 5000, 500000, true); if (ret) return ret; } else { phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, 0); phy_config_aneg(phydev); } return 0; } EXPORT_SYMBOL(genphy_loopback); /** * phy_remove_link_mode - Remove a supported link mode * @phydev: phy_device structure to remove link mode from * @link_mode: Link mode to be removed * * Description: Some MACs don't support all link modes which the PHY * does. e.g. a 1G MAC often does not support 1000Half. Add a helper * to remove a link mode. */ void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode) { linkmode_clear_bit(link_mode, phydev->supported); phy_advertise_supported(phydev); } EXPORT_SYMBOL(phy_remove_link_mode); static void phy_copy_pause_bits(unsigned long *dst, unsigned long *src) { linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, dst, linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, src)); linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, dst, linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, src)); } /** * phy_advertise_supported - Advertise all supported modes * @phydev: target phy_device struct * * Description: Called to advertise all supported modes, doesn't touch * pause mode advertising. */ void phy_advertise_supported(struct phy_device *phydev) { __ETHTOOL_DECLARE_LINK_MODE_MASK(new); linkmode_copy(new, phydev->supported); phy_copy_pause_bits(new, phydev->advertising); linkmode_copy(phydev->advertising, new); } EXPORT_SYMBOL(phy_advertise_supported); /** * phy_advertise_eee_all - Advertise all supported EEE modes * @phydev: target phy_device struct * * Description: Per default phylib preserves the EEE advertising at the time of * phy probing, which might be a subset of the supported EEE modes. Use this * function when all supported EEE modes should be advertised. This does not * trigger auto-negotiation, so must be called before phy_start()/ * phylink_start() which will start auto-negotiation. */ void phy_advertise_eee_all(struct phy_device *phydev) { linkmode_copy(phydev->advertising_eee, phydev->supported_eee); } EXPORT_SYMBOL_GPL(phy_advertise_eee_all); /** * phy_support_eee - Set initial EEE policy configuration * @phydev: Target phy_device struct * * This function configures the initial policy for Energy Efficient Ethernet * (EEE) on the specified PHY device, influencing that EEE capabilities are * advertised before the link is established. It should be called during PHY * registration by the MAC driver and/or the PHY driver (for SmartEEE PHYs) * if MAC supports LPI or PHY is capable to compensate missing LPI functionality * of the MAC. * * The function sets default EEE policy parameters, including preparing the PHY * to advertise EEE capabilities based on hardware support. * * It also sets the expected configuration for Low Power Idle (LPI) in the MAC * driver. If the PHY framework determines that both local and remote * advertisements support EEE, and the negotiated link mode is compatible with * EEE, it will set enable_tx_lpi = true. The MAC driver is expected to act on * this setting by enabling the LPI timer if enable_tx_lpi is set. */ void phy_support_eee(struct phy_device *phydev) { linkmode_copy(phydev->advertising_eee, phydev->supported_eee); phydev->eee_cfg.tx_lpi_enabled = true; phydev->eee_cfg.eee_enabled = true; } EXPORT_SYMBOL(phy_support_eee); /** * phy_support_sym_pause - Enable support of symmetrical pause * @phydev: target phy_device struct * * Description: Called by the MAC to indicate is supports symmetrical * Pause, but not asym pause. */ void phy_support_sym_pause(struct phy_device *phydev) { linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported); phy_copy_pause_bits(phydev->advertising, phydev->supported); } EXPORT_SYMBOL(phy_support_sym_pause); /** * phy_support_asym_pause - Enable support of asym pause * @phydev: target phy_device struct * * Description: Called by the MAC to indicate is supports Asym Pause. */ void phy_support_asym_pause(struct phy_device *phydev) { phy_copy_pause_bits(phydev->advertising, phydev->supported); } EXPORT_SYMBOL(phy_support_asym_pause); /** * phy_set_sym_pause - Configure symmetric Pause * @phydev: target phy_device struct * @rx: Receiver Pause is supported * @tx: Transmit Pause is supported * @autoneg: Auto neg should be used * * Description: Configure advertised Pause support depending on if * receiver pause and pause auto neg is supported. Generally called * from the set_pauseparam .ndo. */ void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg) { linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); if (rx && tx && autoneg) linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); linkmode_copy(phydev->advertising, phydev->supported); } EXPORT_SYMBOL(phy_set_sym_pause); /** * phy_set_asym_pause - Configure Pause and Asym Pause * @phydev: target phy_device struct * @rx: Receiver Pause is supported * @tx: Transmit Pause is supported * * Description: Configure advertised Pause support depending on if * transmit and receiver pause is supported. If there has been a * change in adverting, trigger a new autoneg. Generally called from * the set_pauseparam .ndo. */ void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx) { __ETHTOOL_DECLARE_LINK_MODE_MASK(oldadv); linkmode_copy(oldadv, phydev->advertising); linkmode_set_pause(phydev->advertising, tx, rx); if (!linkmode_equal(oldadv, phydev->advertising) && phydev->autoneg) phy_start_aneg(phydev); } EXPORT_SYMBOL(phy_set_asym_pause); /** * phy_validate_pause - Test if the PHY/MAC support the pause configuration * @phydev: phy_device struct * @pp: requested pause configuration * * Description: Test if the PHY/MAC combination supports the Pause * configuration the user is requesting. Returns True if it is * supported, false otherwise. */ bool phy_validate_pause(struct phy_device *phydev, struct ethtool_pauseparam *pp) { if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported) && pp->rx_pause) return false; if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported) && pp->rx_pause != pp->tx_pause) return false; return true; } EXPORT_SYMBOL(phy_validate_pause); /** * phy_get_pause - resolve negotiated pause modes * @phydev: phy_device struct * @tx_pause: pointer to bool to indicate whether transmit pause should be * enabled. * @rx_pause: pointer to bool to indicate whether receive pause should be * enabled. * * Resolve and return the flow control modes according to the negotiation * result. This includes checking that we are operating in full duplex mode. * See linkmode_resolve_pause() for further details. */ void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause) { if (phydev->duplex != DUPLEX_FULL) { *tx_pause = false; *rx_pause = false; return; } return linkmode_resolve_pause(phydev->advertising, phydev->lp_advertising, tx_pause, rx_pause); } EXPORT_SYMBOL(phy_get_pause); #if IS_ENABLED(CONFIG_OF_MDIO) static int phy_get_int_delay_property(struct device *dev, const char *name) { s32 int_delay; int ret; ret = device_property_read_u32(dev, name, &int_delay); if (ret) return ret; return int_delay; } #else static int phy_get_int_delay_property(struct device *dev, const char *name) { return -EINVAL; } #endif /** * phy_get_internal_delay - returns the index of the internal delay * @phydev: phy_device struct * @dev: pointer to the devices device struct * @delay_values: array of delays the PHY supports * @size: the size of the delay array * @is_rx: boolean to indicate to get the rx internal delay * * Returns the index within the array of internal delay passed in. * If the device property is not present then the interface type is checked * if the interface defines use of internal delay then a 1 is returned otherwise * a 0 is returned. * The array must be in ascending order. If PHY does not have an ascending order * array then size = 0 and the value of the delay property is returned. * Return -EINVAL if the delay is invalid or cannot be found. */ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, const int *delay_values, int size, bool is_rx) { s32 delay; int i; if (is_rx) { delay = phy_get_int_delay_property(dev, "rx-internal-delay-ps"); if (delay < 0 && size == 0) { if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) return 1; else return 0; } } else { delay = phy_get_int_delay_property(dev, "tx-internal-delay-ps"); if (delay < 0 && size == 0) { if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) return 1; else return 0; } } if (delay < 0) return delay; if (size == 0) return delay; if (delay < delay_values[0] || delay > delay_values[size - 1]) { phydev_err(phydev, "Delay %d is out of range\n", delay); return -EINVAL; } if (delay == delay_values[0]) return 0; for (i = 1; i < size; i++) { if (delay == delay_values[i]) return i; /* Find an approximate index by looking up the table */ if (delay > delay_values[i - 1] && delay < delay_values[i]) { if (delay - delay_values[i - 1] < delay_values[i] - delay) return i - 1; else return i; } } phydev_err(phydev, "error finding internal delay index for %d\n", delay); return -EINVAL; } EXPORT_SYMBOL(phy_get_internal_delay); static int phy_led_set_brightness(struct led_classdev *led_cdev, enum led_brightness value) { struct phy_led *phyled = to_phy_led(led_cdev); struct phy_device *phydev = phyled->phydev; int err; mutex_lock(&phydev->lock); err = phydev->drv->led_brightness_set(phydev, phyled->index, value); mutex_unlock(&phydev->lock); return err; } static int phy_led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, unsigned long *delay_off) { struct phy_led *phyled = to_phy_led(led_cdev); struct phy_device *phydev = phyled->phydev; int err; mutex_lock(&phydev->lock); err = phydev->drv->led_blink_set(phydev, phyled->index, delay_on, delay_off); mutex_unlock(&phydev->lock); return err; } static __maybe_unused struct device * phy_led_hw_control_get_device(struct led_classdev *led_cdev) { struct phy_led *phyled = to_phy_led(led_cdev); struct phy_device *phydev = phyled->phydev; if (phydev->attached_dev) return &phydev->attached_dev->dev; return NULL; } static int __maybe_unused phy_led_hw_control_get(struct led_classdev *led_cdev, unsigned long *rules) { struct phy_led *phyled = to_phy_led(led_cdev); struct phy_device *phydev = phyled->phydev; int err; mutex_lock(&phydev->lock); err = phydev->drv->led_hw_control_get(phydev, phyled->index, rules); mutex_unlock(&phydev->lock); return err; } static int __maybe_unused phy_led_hw_control_set(struct led_classdev *led_cdev, unsigned long rules) { struct phy_led *phyled = to_phy_led(led_cdev); struct phy_device *phydev = phyled->phydev; int err; mutex_lock(&phydev->lock); err = phydev->drv->led_hw_control_set(phydev, phyled->index, rules); mutex_unlock(&phydev->lock); return err; } static __maybe_unused int phy_led_hw_is_supported(struct led_classdev *led_cdev, unsigned long rules) { struct phy_led *phyled = to_phy_led(led_cdev); struct phy_device *phydev = phyled->phydev; int err; mutex_lock(&phydev->lock); err = phydev->drv->led_hw_is_supported(phydev, phyled->index, rules); mutex_unlock(&phydev->lock); return err; } static void phy_leds_unregister(struct phy_device *phydev) { struct phy_led *phyled; list_for_each_entry(phyled, &phydev->leds, list) { led_classdev_unregister(&phyled->led_cdev); } } static int of_phy_led(struct phy_device *phydev, struct device_node *led) { struct device *dev = &phydev->mdio.dev; struct led_init_data init_data = {}; struct led_classdev *cdev; unsigned long modes = 0; struct phy_led *phyled; u32 index; int err; phyled = devm_kzalloc(dev, sizeof(*phyled), GFP_KERNEL); if (!phyled) return -ENOMEM; cdev = &phyled->led_cdev; phyled->phydev = phydev; err = of_property_read_u32(led, "reg", &index); if (err) return err; if (index > U8_MAX) return -EINVAL; if (of_property_read_bool(led, "active-low")) set_bit(PHY_LED_ACTIVE_LOW, &modes); if (of_property_read_bool(led, "inactive-high-impedance")) set_bit(PHY_LED_INACTIVE_HIGH_IMPEDANCE, &modes); if (modes) { /* Return error if asked to set polarity modes but not supported */ if (!phydev->drv->led_polarity_set) return -EINVAL; err = phydev->drv->led_polarity_set(phydev, index, modes); if (err) return err; } phyled->index = index; if (phydev->drv->led_brightness_set) cdev->brightness_set_blocking = phy_led_set_brightness; if (phydev->drv->led_blink_set) cdev->blink_set = phy_led_blink_set; #ifdef CONFIG_LEDS_TRIGGERS if (phydev->drv->led_hw_is_supported && phydev->drv->led_hw_control_set && phydev->drv->led_hw_control_get) { cdev->hw_control_is_supported = phy_led_hw_is_supported; cdev->hw_control_set = phy_led_hw_control_set; cdev->hw_control_get = phy_led_hw_control_get; cdev->hw_control_trigger = "netdev"; } cdev->hw_control_get_device = phy_led_hw_control_get_device; #endif cdev->max_brightness = 1; init_data.devicename = dev_name(&phydev->mdio.dev); init_data.fwnode = of_fwnode_handle(led); init_data.devname_mandatory = true; err = led_classdev_register_ext(dev, cdev, &init_data); if (err) return err; list_add(&phyled->list, &phydev->leds); return 0; } static int of_phy_leds(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; struct device_node *leds, *led; int err; if (!IS_ENABLED(CONFIG_OF_MDIO)) return 0; if (!node) return 0; leds = of_get_child_by_name(node, "leds"); if (!leds) return 0; for_each_available_child_of_node(leds, led) { err = of_phy_led(phydev, led); if (err) { of_node_put(led); phy_leds_unregister(phydev); return err; } } return 0; } /** * fwnode_mdio_find_device - Given a fwnode, find the mdio_device * @fwnode: pointer to the mdio_device's fwnode * * If successful, returns a pointer to the mdio_device with the embedded * struct device refcount incremented by one, or NULL on failure. * The caller should call put_device() on the mdio_device after its use. */ struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode) { struct device *d; if (!fwnode) return NULL; d = bus_find_device_by_fwnode(&mdio_bus_type, fwnode); if (!d) return NULL; return to_mdio_device(d); } EXPORT_SYMBOL(fwnode_mdio_find_device); /** * fwnode_phy_find_device - For provided phy_fwnode, find phy_device. * * @phy_fwnode: Pointer to the phy's fwnode. * * If successful, returns a pointer to the phy_device with the embedded * struct device refcount incremented by one, or NULL on failure. */ struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode) { struct mdio_device *mdiodev; mdiodev = fwnode_mdio_find_device(phy_fwnode); if (!mdiodev) return NULL; if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY) return to_phy_device(&mdiodev->dev); put_device(&mdiodev->dev); return NULL; } EXPORT_SYMBOL(fwnode_phy_find_device); /** * device_phy_find_device - For the given device, get the phy_device * @dev: Pointer to the given device * * Refer return conditions of fwnode_phy_find_device(). */ struct phy_device *device_phy_find_device(struct device *dev) { return fwnode_phy_find_device(dev_fwnode(dev)); } EXPORT_SYMBOL_GPL(device_phy_find_device); /** * fwnode_get_phy_node - Get the phy_node using the named reference. * @fwnode: Pointer to fwnode from which phy_node has to be obtained. * * Refer return conditions of fwnode_find_reference(). * For ACPI, only "phy-handle" is supported. Legacy DT properties "phy" * and "phy-device" are not supported in ACPI. DT supports all the three * named references to the phy node. */ struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode) { struct fwnode_handle *phy_node; /* Only phy-handle is used for ACPI */ phy_node = fwnode_find_reference(fwnode, "phy-handle", 0); if (is_acpi_node(fwnode) || !IS_ERR(phy_node)) return phy_node; phy_node = fwnode_find_reference(fwnode, "phy", 0); if (IS_ERR(phy_node)) phy_node = fwnode_find_reference(fwnode, "phy-device", 0); return phy_node; } EXPORT_SYMBOL_GPL(fwnode_get_phy_node); /** * phy_probe - probe and init a PHY device * @dev: device to probe and init * * Take care of setting up the phy_device structure, set the state to READY. */ static int phy_probe(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); struct device_driver *drv = phydev->mdio.dev.driver; struct phy_driver *phydrv = to_phy_driver(drv); int err = 0; phydev->drv = phydrv; /* Disable the interrupt if the PHY doesn't support it * but the interrupt is still a valid one */ if (!phy_drv_supports_irq(phydrv) && phy_interrupt_is_valid(phydev)) phydev->irq = PHY_POLL; if (phydrv->flags & PHY_IS_INTERNAL) phydev->is_internal = true; /* Deassert the reset signal */ phy_device_reset(phydev, 0); if (phydev->drv->probe) { err = phydev->drv->probe(phydev); if (err) goto out; } phy_disable_interrupts(phydev); /* Start out supporting everything. Eventually, * a controller will attach, and may modify one * or both of these values */ if (phydrv->features) { linkmode_copy(phydev->supported, phydrv->features); genphy_c45_read_eee_abilities(phydev); } else if (phydrv->get_features) err = phydrv->get_features(phydev); else if (phydev->is_c45) err = genphy_c45_pma_read_abilities(phydev); else err = genphy_read_abilities(phydev); if (err) goto out; if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->supported)) phydev->autoneg = 0; if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, phydev->supported)) phydev->is_gigabit_capable = 1; if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, phydev->supported)) phydev->is_gigabit_capable = 1; of_set_phy_supported(phydev); phy_advertise_supported(phydev); /* Get PHY default EEE advertising modes and handle them as potentially * safe initial configuration. */ err = genphy_c45_read_eee_adv(phydev, phydev->advertising_eee); if (err) goto out; /* There is no "enabled" flag. If PHY is advertising, assume it is * kind of enabled. */ phydev->eee_enabled = !linkmode_empty(phydev->advertising_eee); /* Some PHYs may advertise, by default, not support EEE modes. So, * we need to clean them. */ if (phydev->eee_enabled) linkmode_and(phydev->advertising_eee, phydev->supported_eee, phydev->advertising_eee); /* Get the EEE modes we want to prohibit. We will ask * the PHY stop advertising these mode later on */ of_set_phy_eee_broken(phydev); /* The Pause Frame bits indicate that the PHY can support passing * pause frames. During autonegotiation, the PHYs will determine if * they should allow pause frames to pass. The MAC driver should then * use that result to determine whether to enable flow control via * pause frames. * * Normally, PHY drivers should not set the Pause bits, and instead * allow phylib to do that. However, there may be some situations * (e.g. hardware erratum) where the driver wants to set only one * of these bits. */ if (!test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported) && !test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported)) { linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported); } /* Set the state to READY by default */ phydev->state = PHY_READY; /* Get the LEDs from the device tree, and instantiate standard * LEDs for them. */ if (IS_ENABLED(CONFIG_PHYLIB_LEDS)) err = of_phy_leds(phydev); out: /* Re-assert the reset signal on error */ if (err) phy_device_reset(phydev, 1); return err; } static int phy_remove(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); cancel_delayed_work_sync(&phydev->state_queue); if (IS_ENABLED(CONFIG_PHYLIB_LEDS)) phy_leds_unregister(phydev); phydev->state = PHY_DOWN; sfp_bus_del_upstream(phydev->sfp_bus); phydev->sfp_bus = NULL; if (phydev->drv && phydev->drv->remove) phydev->drv->remove(phydev); /* Assert the reset signal */ phy_device_reset(phydev, 1); phydev->drv = NULL; return 0; } /** * phy_driver_register - register a phy_driver with the PHY layer * @new_driver: new phy_driver to register * @owner: module owning this PHY */ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) { int retval; /* Either the features are hard coded, or dynamically * determined. It cannot be both. */ if (WARN_ON(new_driver->features && new_driver->get_features)) { pr_err("%s: features and get_features must not both be set\n", new_driver->name); return -EINVAL; } /* PHYLIB device drivers must not match using a DT compatible table * as this bypasses our checks that the mdiodev that is being matched * is backed by a struct phy_device. If such a case happens, we will * make out-of-bounds accesses and lockup in phydev->lock. */ if (WARN(new_driver->mdiodrv.driver.of_match_table, "%s: driver must not provide a DT match table\n", new_driver->name)) return -EINVAL; new_driver->mdiodrv.flags |= MDIO_DEVICE_IS_PHY; new_driver->mdiodrv.driver.name = new_driver->name; new_driver->mdiodrv.driver.bus = &mdio_bus_type; new_driver->mdiodrv.driver.probe = phy_probe; new_driver->mdiodrv.driver.remove = phy_remove; new_driver->mdiodrv.driver.owner = owner; new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS; retval = driver_register(&new_driver->mdiodrv.driver); if (retval) { pr_err("%s: Error %d in registering driver\n", new_driver->name, retval); return retval; } pr_debug("%s: Registered new driver\n", new_driver->name); return 0; } EXPORT_SYMBOL(phy_driver_register); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner) { int i, ret = 0; for (i = 0; i < n; i++) { ret = phy_driver_register(new_driver + i, owner); if (ret) { while (i-- > 0) phy_driver_unregister(new_driver + i); break; } } return ret; } EXPORT_SYMBOL(phy_drivers_register); void phy_driver_unregister(struct phy_driver *drv) { driver_unregister(&drv->mdiodrv.driver); } EXPORT_SYMBOL(phy_driver_unregister); void phy_drivers_unregister(struct phy_driver *drv, int n) { int i; for (i = 0; i < n; i++) phy_driver_unregister(drv + i); } EXPORT_SYMBOL(phy_drivers_unregister); static struct phy_driver genphy_driver = { .phy_id = 0xffffffff, .phy_id_mask = 0xffffffff, .name = "Generic PHY", .get_features = genphy_read_abilities, .suspend = genphy_suspend, .resume = genphy_resume, .set_loopback = genphy_loopback, }; static const struct ethtool_phy_ops phy_ethtool_phy_ops = { .get_sset_count = phy_ethtool_get_sset_count, .get_strings = phy_ethtool_get_strings, .get_stats = phy_ethtool_get_stats, .get_plca_cfg = phy_ethtool_get_plca_cfg, .set_plca_cfg = phy_ethtool_set_plca_cfg, .get_plca_status = phy_ethtool_get_plca_status, .start_cable_test = phy_start_cable_test, .start_cable_test_tdr = phy_start_cable_test_tdr, }; static const struct phylib_stubs __phylib_stubs = { .hwtstamp_get = __phy_hwtstamp_get, .hwtstamp_set = __phy_hwtstamp_set, }; static void phylib_register_stubs(void) { phylib_stubs = &__phylib_stubs; } static void phylib_unregister_stubs(void) { phylib_stubs = NULL; } static int __init phy_init(void) { int rc; rtnl_lock(); ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops); phylib_register_stubs(); rtnl_unlock(); rc = mdio_bus_init(); if (rc) goto err_ethtool_phy_ops; features_init(); rc = phy_driver_register(&genphy_c45_driver, THIS_MODULE); if (rc) goto err_mdio_bus; rc = phy_driver_register(&genphy_driver, THIS_MODULE); if (rc) goto err_c45; return 0; err_c45: phy_driver_unregister(&genphy_c45_driver); err_mdio_bus: mdio_bus_exit(); err_ethtool_phy_ops: rtnl_lock(); phylib_unregister_stubs(); ethtool_set_ethtool_phy_ops(NULL); rtnl_unlock(); return rc; } static void __exit phy_exit(void) { phy_driver_unregister(&genphy_c45_driver); phy_driver_unregister(&genphy_driver); mdio_bus_exit(); rtnl_lock(); phylib_unregister_stubs(); ethtool_set_ethtool_phy_ops(NULL); rtnl_unlock(); } subsys_initcall(phy_init); module_exit(phy_exit);
25 25 25 25 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 // SPDX-License-Identifier: GPL-2.0-only // Copyright (c) 2020 Facebook Inc. #include <linux/debugfs.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <net/udp_tunnel.h> #include "netdevsim.h" static int nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti) { struct netdevsim *ns = netdev_priv(dev); int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; if (ns->udp_ports.sleep) msleep(ns->udp_ports.sleep); if (!ret) { if (ns->udp_ports.ports[table][entry]) { WARN(1, "entry already in use\n"); ret = -EBUSY; } else { ns->udp_ports.ports[table][entry] = be16_to_cpu(ti->port) << 16 | ti->type; } } netdev_info(dev, "set [%d, %d] type %d family %d port %d - %d\n", table, entry, ti->type, ti->sa_family, ntohs(ti->port), ret); return ret; } static int nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti) { struct netdevsim *ns = netdev_priv(dev); int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; if (ns->udp_ports.sleep) msleep(ns->udp_ports.sleep); if (!ret) { u32 val = be16_to_cpu(ti->port) << 16 | ti->type; if (val == ns->udp_ports.ports[table][entry]) { ns->udp_ports.ports[table][entry] = 0; } else { WARN(1, "entry not installed %x vs %x\n", val, ns->udp_ports.ports[table][entry]); ret = -ENOENT; } } netdev_info(dev, "unset [%d, %d] type %d family %d port %d - %d\n", table, entry, ti->type, ti->sa_family, ntohs(ti->port), ret); return ret; } static int nsim_udp_tunnel_sync_table(struct net_device *dev, unsigned int table) { struct netdevsim *ns = netdev_priv(dev); struct udp_tunnel_info ti; unsigned int i; int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; for (i = 0; i < NSIM_UDP_TUNNEL_N_PORTS; i++) { udp_tunnel_nic_get_port(dev, table, i, &ti); ns->udp_ports.ports[table][i] = be16_to_cpu(ti.port) << 16 | ti.type; } return ret; } static const struct udp_tunnel_nic_info nsim_udp_tunnel_info = { .set_port = nsim_udp_tunnel_set_port, .unset_port = nsim_udp_tunnel_unset_port, .sync_table = nsim_udp_tunnel_sync_table, .tables = { { .n_entries = NSIM_UDP_TUNNEL_N_PORTS, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, { .n_entries = NSIM_UDP_TUNNEL_N_PORTS, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE | UDP_TUNNEL_TYPE_VXLAN_GPE, }, }, }; static ssize_t nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) { struct net_device *dev = file->private_data; struct netdevsim *ns = netdev_priv(dev); memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); rtnl_lock(); udp_tunnel_nic_reset_ntf(dev); rtnl_unlock(); return count; } static const struct file_operations nsim_udp_tunnels_info_reset_fops = { .open = simple_open, .write = nsim_udp_tunnels_info_reset_write, .llseek = generic_file_llseek, .owner = THIS_MODULE, }; int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); struct udp_tunnel_nic_info *info; if (nsim_dev->udp_ports.shared && nsim_dev->udp_ports.open_only) { dev_err(&nsim_dev->nsim_bus_dev->dev, "shared can't be used in conjunction with open_only\n"); return -EINVAL; } if (!nsim_dev->udp_ports.shared) ns->udp_ports.ports = ns->udp_ports.__ports; else ns->udp_ports.ports = nsim_dev->udp_ports.__ports; debugfs_create_u32("udp_ports_inject_error", 0600, ns->nsim_dev_port->ddir, &ns->udp_ports.inject_error); ns->udp_ports.dfs_ports[0].array = ns->udp_ports.ports[0]; ns->udp_ports.dfs_ports[0].n_elements = NSIM_UDP_TUNNEL_N_PORTS; debugfs_create_u32_array("udp_ports_table0", 0400, ns->nsim_dev_port->ddir, &ns->udp_ports.dfs_ports[0]); ns->udp_ports.dfs_ports[1].array = ns->udp_ports.ports[1]; ns->udp_ports.dfs_ports[1].n_elements = NSIM_UDP_TUNNEL_N_PORTS; debugfs_create_u32_array("udp_ports_table1", 0400, ns->nsim_dev_port->ddir, &ns->udp_ports.dfs_ports[1]); debugfs_create_file("udp_ports_reset", 0200, ns->nsim_dev_port->ddir, dev, &nsim_udp_tunnels_info_reset_fops); /* Note: it's not normal to allocate the info struct like this! * Drivers are expected to use a static const one, here we're testing. */ info = kmemdup(&nsim_udp_tunnel_info, sizeof(nsim_udp_tunnel_info), GFP_KERNEL); if (!info) return -ENOMEM; ns->udp_ports.sleep = nsim_dev->udp_ports.sleep; if (nsim_dev->udp_ports.sync_all) { info->set_port = NULL; info->unset_port = NULL; } else { info->sync_table = NULL; } if (ns->udp_ports.sleep) info->flags |= UDP_TUNNEL_NIC_INFO_MAY_SLEEP; if (nsim_dev->udp_ports.open_only) info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY; if (nsim_dev->udp_ports.ipv4_only) info->flags |= UDP_TUNNEL_NIC_INFO_IPV4_ONLY; if (nsim_dev->udp_ports.shared) info->shared = &nsim_dev->udp_ports.utn_shared; if (nsim_dev->udp_ports.static_iana_vxlan) info->flags |= UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; dev->udp_tunnel_nic_info = info; return 0; } void nsim_udp_tunnels_info_destroy(struct net_device *dev) { kfree(dev->udp_tunnel_nic_info); dev->udp_tunnel_nic_info = NULL; } void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev) { debugfs_create_bool("udp_ports_sync_all", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.sync_all); debugfs_create_bool("udp_ports_open_only", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.open_only); debugfs_create_bool("udp_ports_ipv4_only", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.ipv4_only); debugfs_create_bool("udp_ports_shared", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.shared); debugfs_create_bool("udp_ports_static_iana_vxlan", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.static_iana_vxlan); debugfs_create_u32("udp_ports_sleep", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.sleep); }
14040 14040 14143 1746 2411 25 1664 62 2406 25 1665 2406 62 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM net #if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_NET_H #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/ip.h> #include <linux/tracepoint.h> TRACE_EVENT(net_dev_start_xmit, TP_PROTO(const struct sk_buff *skb, const struct net_device *dev), TP_ARGS(skb, dev), TP_STRUCT__entry( __string( name, dev->name ) __field( u16, queue_mapping ) __field( const void *, skbaddr ) __field( bool, vlan_tagged ) __field( u16, vlan_proto ) __field( u16, vlan_tci ) __field( u16, protocol ) __field( u8, ip_summed ) __field( unsigned int, len ) __field( unsigned int, data_len ) __field( int, network_offset ) __field( bool, transport_offset_valid) __field( int, transport_offset) __field( u8, tx_flags ) __field( u16, gso_size ) __field( u16, gso_segs ) __field( u16, gso_type ) ), TP_fast_assign( __assign_str(name); __entry->queue_mapping = skb->queue_mapping; __entry->skbaddr = skb; __entry->vlan_tagged = skb_vlan_tag_present(skb); __entry->vlan_proto = ntohs(skb->vlan_proto); __entry->vlan_tci = skb_vlan_tag_get(skb); __entry->protocol = ntohs(skb->protocol); __entry->ip_summed = skb->ip_summed; __entry->len = skb->len; __entry->data_len = skb->data_len; __entry->network_offset = skb_network_offset(skb); __entry->transport_offset_valid = skb_transport_header_was_set(skb); __entry->transport_offset = skb_transport_header_was_set(skb) ? skb_transport_offset(skb) : 0; __entry->tx_flags = skb_shinfo(skb)->tx_flags; __entry->gso_size = skb_shinfo(skb)->gso_size; __entry->gso_segs = skb_shinfo(skb)->gso_segs; __entry->gso_type = skb_shinfo(skb)->gso_type; ), TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", __get_str(name), __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->len, __entry->data_len, __entry->network_offset, __entry->transport_offset_valid, __entry->transport_offset, __entry->tx_flags, __entry->gso_size, __entry->gso_segs, __entry->gso_type) ); TRACE_EVENT(net_dev_xmit, TP_PROTO(struct sk_buff *skb, int rc, struct net_device *dev, unsigned int skb_len), TP_ARGS(skb, rc, dev, skb_len), TP_STRUCT__entry( __field( void *, skbaddr ) __field( unsigned int, len ) __field( int, rc ) __string( name, dev->name ) ), TP_fast_assign( __entry->skbaddr = skb; __entry->len = skb_len; __entry->rc = rc; __assign_str(name); ), TP_printk("dev=%s skbaddr=%p len=%u rc=%d", __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) ); TRACE_EVENT(net_dev_xmit_timeout, TP_PROTO(struct net_device *dev, int queue_index), TP_ARGS(dev, queue_index), TP_STRUCT__entry( __string( name, dev->name ) __string( driver, netdev_drivername(dev)) __field( int, queue_index ) ), TP_fast_assign( __assign_str(name); __assign_str(driver); __entry->queue_index = queue_index; ), TP_printk("dev=%s driver=%s queue=%d", __get_str(name), __get_str(driver), __entry->queue_index) ); DECLARE_EVENT_CLASS(net_dev_template, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb), TP_STRUCT__entry( __field( void *, skbaddr ) __field( unsigned int, len ) __string( name, skb->dev->name ) ), TP_fast_assign( __entry->skbaddr = skb; __entry->len = skb->len; __assign_str(name); ), TP_printk("dev=%s skbaddr=%p len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) DEFINE_EVENT(net_dev_template, net_dev_queue, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_template, netif_receive_skb, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_template, netif_rx, TP_PROTO(struct sk_buff *skb), TP_ARGS(skb) ); DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb), TP_STRUCT__entry( __string( name, skb->dev->name ) __field( unsigned int, napi_id ) __field( u16, queue_mapping ) __field( const void *, skbaddr ) __field( bool, vlan_tagged ) __field( u16, vlan_proto ) __field( u16, vlan_tci ) __field( u16, protocol ) __field( u8, ip_summed ) __field( u32, hash ) __field( bool, l4_hash ) __field( unsigned int, len ) __field( unsigned int, data_len ) __field( unsigned int, truesize ) __field( bool, mac_header_valid) __field( int, mac_header ) __field( unsigned char, nr_frags ) __field( u16, gso_size ) __field( u16, gso_type ) ), TP_fast_assign( __assign_str(name); #ifdef CONFIG_NET_RX_BUSY_POLL __entry->napi_id = skb->napi_id; #else __entry->napi_id = 0; #endif __entry->queue_mapping = skb->queue_mapping; __entry->skbaddr = skb; __entry->vlan_tagged = skb_vlan_tag_present(skb); __entry->vlan_proto = ntohs(skb->vlan_proto); __entry->vlan_tci = skb_vlan_tag_get(skb); __entry->protocol = ntohs(skb->protocol); __entry->ip_summed = skb->ip_summed; __entry->hash = skb->hash; __entry->l4_hash = skb->l4_hash; __entry->len = skb->len; __entry->data_len = skb->data_len; __entry->truesize = skb->truesize; __entry->mac_header_valid = skb_mac_header_was_set(skb); __entry->mac_header = skb_mac_header(skb) - skb->data; __entry->nr_frags = skb_shinfo(skb)->nr_frags; __entry->gso_size = skb_shinfo(skb)->gso_size; __entry->gso_type = skb_shinfo(skb)->gso_type; ), TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", __get_str(name), __entry->napi_id, __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->hash, __entry->l4_hash, __entry->len, __entry->data_len, __entry->truesize, __entry->mac_header_valid, __entry->mac_header, __entry->nr_frags, __entry->gso_size, __entry->gso_type) ); DEFINE_EVENT(net_dev_rx_verbose_template, napi_gro_frags_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, napi_gro_receive_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_list_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_entry, TP_PROTO(const struct sk_buff *skb), TP_ARGS(skb) ); DECLARE_EVENT_CLASS(net_dev_rx_exit_template, TP_PROTO(int ret), TP_ARGS(ret), TP_STRUCT__entry( __field(int, ret) ), TP_fast_assign( __entry->ret = ret; ), TP_printk("ret=%d", __entry->ret) ); DEFINE_EVENT(net_dev_rx_exit_template, napi_gro_frags_exit, TP_PROTO(int ret), TP_ARGS(ret) ); DEFINE_EVENT(net_dev_rx_exit_template, napi_gro_receive_exit, TP_PROTO(int ret), TP_ARGS(ret) ); DEFINE_EVENT(net_dev_rx_exit_template, netif_receive_skb_exit, TP_PROTO(int ret), TP_ARGS(ret) ); DEFINE_EVENT(net_dev_rx_exit_template, netif_rx_exit, TP_PROTO(int ret), TP_ARGS(ret) ); DEFINE_EVENT(net_dev_rx_exit_template, netif_receive_skb_list_exit, TP_PROTO(int ret), TP_ARGS(ret) ); #endif /* _TRACE_NET_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 // SPDX-License-Identifier: GPL-2.0 /* * GNSS receiver core * * Copyright (C) 2018 Johan Hovold <johan@kernel.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cdev.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/gnss.h> #include <linux/idr.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/wait.h> #define GNSS_FLAG_HAS_WRITE_RAW BIT(0) #define GNSS_MINORS 16 static DEFINE_IDA(gnss_minors); static dev_t gnss_first; /* FIFO size must be a power of two */ #define GNSS_READ_FIFO_SIZE 4096 #define GNSS_WRITE_BUF_SIZE 1024 #define to_gnss_device(d) container_of((d), struct gnss_device, dev) static int gnss_open(struct inode *inode, struct file *file) { struct gnss_device *gdev; int ret = 0; gdev = container_of(inode->i_cdev, struct gnss_device, cdev); get_device(&gdev->dev); stream_open(inode, file); file->private_data = gdev; down_write(&gdev->rwsem); if (gdev->disconnected) { ret = -ENODEV; goto unlock; } if (gdev->count++ == 0) { ret = gdev->ops->open(gdev); if (ret) gdev->count--; } unlock: up_write(&gdev->rwsem); if (ret) put_device(&gdev->dev); return ret; } static int gnss_release(struct inode *inode, struct file *file) { struct gnss_device *gdev = file->private_data; down_write(&gdev->rwsem); if (gdev->disconnected) goto unlock; if (--gdev->count == 0) { gdev->ops->close(gdev); kfifo_reset(&gdev->read_fifo); } unlock: up_write(&gdev->rwsem); put_device(&gdev->dev); return 0; } static ssize_t gnss_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct gnss_device *gdev = file->private_data; unsigned int copied; int ret; mutex_lock(&gdev->read_mutex); while (kfifo_is_empty(&gdev->read_fifo)) { mutex_unlock(&gdev->read_mutex); if (gdev->disconnected) return 0; if (file->f_flags & O_NONBLOCK) return -EAGAIN; ret = wait_event_interruptible(gdev->read_queue, gdev->disconnected || !kfifo_is_empty(&gdev->read_fifo)); if (ret) return -ERESTARTSYS; mutex_lock(&gdev->read_mutex); } ret = kfifo_to_user(&gdev->read_fifo, buf, count, &copied); if (ret == 0) ret = copied; mutex_unlock(&gdev->read_mutex); return ret; } static ssize_t gnss_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { struct gnss_device *gdev = file->private_data; size_t written = 0; int ret; if (gdev->disconnected) return -EIO; if (!count) return 0; if (!(gdev->flags & GNSS_FLAG_HAS_WRITE_RAW)) return -EIO; /* Ignoring O_NONBLOCK, write_raw() is synchronous. */ ret = mutex_lock_interruptible(&gdev->write_mutex); if (ret) return -ERESTARTSYS; for (;;) { size_t n = count - written; if (n > GNSS_WRITE_BUF_SIZE) n = GNSS_WRITE_BUF_SIZE; if (copy_from_user(gdev->write_buf, buf, n)) { ret = -EFAULT; goto out_unlock; } /* * Assumes write_raw can always accept GNSS_WRITE_BUF_SIZE * bytes. * * FIXME: revisit */ down_read(&gdev->rwsem); if (!gdev->disconnected) ret = gdev->ops->write_raw(gdev, gdev->write_buf, n); else ret = -EIO; up_read(&gdev->rwsem); if (ret < 0) break; written += ret; buf += ret; if (written == count) break; } if (written) ret = written; out_unlock: mutex_unlock(&gdev->write_mutex); return ret; } static __poll_t gnss_poll(struct file *file, poll_table *wait) { struct gnss_device *gdev = file->private_data; __poll_t mask = 0; poll_wait(file, &gdev->read_queue, wait); if (!kfifo_is_empty(&gdev->read_fifo)) mask |= EPOLLIN | EPOLLRDNORM; if (gdev->disconnected) mask |= EPOLLHUP; return mask; } static const struct file_operations gnss_fops = { .owner = THIS_MODULE, .open = gnss_open, .release = gnss_release, .read = gnss_read, .write = gnss_write, .poll = gnss_poll, .llseek = no_llseek, }; static struct class *gnss_class; static void gnss_device_release(struct device *dev) { struct gnss_device *gdev = to_gnss_device(dev); kfree(gdev->write_buf); kfifo_free(&gdev->read_fifo); ida_free(&gnss_minors, gdev->id); kfree(gdev); } struct gnss_device *gnss_allocate_device(struct device *parent) { struct gnss_device *gdev; struct device *dev; int id; int ret; gdev = kzalloc(sizeof(*gdev), GFP_KERNEL); if (!gdev) return NULL; id = ida_alloc_max(&gnss_minors, GNSS_MINORS - 1, GFP_KERNEL); if (id < 0) { kfree(gdev); return NULL; } gdev->id = id; dev = &gdev->dev; device_initialize(dev); dev->devt = gnss_first + id; dev->class = gnss_class; dev->parent = parent; dev->release = gnss_device_release; dev_set_drvdata(dev, gdev); dev_set_name(dev, "gnss%d", id); init_rwsem(&gdev->rwsem); mutex_init(&gdev->read_mutex); mutex_init(&gdev->write_mutex); init_waitqueue_head(&gdev->read_queue); ret = kfifo_alloc(&gdev->read_fifo, GNSS_READ_FIFO_SIZE, GFP_KERNEL); if (ret) goto err_put_device; gdev->write_buf = kzalloc(GNSS_WRITE_BUF_SIZE, GFP_KERNEL); if (!gdev->write_buf) goto err_put_device; cdev_init(&gdev->cdev, &gnss_fops); gdev->cdev.owner = THIS_MODULE; return gdev; err_put_device: put_device(dev); return NULL; } EXPORT_SYMBOL_GPL(gnss_allocate_device); void gnss_put_device(struct gnss_device *gdev) { put_device(&gdev->dev); } EXPORT_SYMBOL_GPL(gnss_put_device); int gnss_register_device(struct gnss_device *gdev) { int ret; /* Set a flag which can be accessed without holding the rwsem. */ if (gdev->ops->write_raw != NULL) gdev->flags |= GNSS_FLAG_HAS_WRITE_RAW; ret = cdev_device_add(&gdev->cdev, &gdev->dev); if (ret) { dev_err(&gdev->dev, "failed to add device: %d\n", ret); return ret; } return 0; } EXPORT_SYMBOL_GPL(gnss_register_device); void gnss_deregister_device(struct gnss_device *gdev) { down_write(&gdev->rwsem); gdev->disconnected = true; if (gdev->count) { wake_up_interruptible(&gdev->read_queue); gdev->ops->close(gdev); } up_write(&gdev->rwsem); cdev_device_del(&gdev->cdev, &gdev->dev); } EXPORT_SYMBOL_GPL(gnss_deregister_device); /* * Caller guarantees serialisation. * * Must not be called for a closed device. */ int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf, size_t count) { int ret; ret = kfifo_in(&gdev->read_fifo, buf, count); wake_up_interruptible(&gdev->read_queue); return ret; } EXPORT_SYMBOL_GPL(gnss_insert_raw); static const char * const gnss_type_names[GNSS_TYPE_COUNT] = { [GNSS_TYPE_NMEA] = "NMEA", [GNSS_TYPE_SIRF] = "SiRF", [GNSS_TYPE_UBX] = "UBX", [GNSS_TYPE_MTK] = "MTK", }; static const char *gnss_type_name(const struct gnss_device *gdev) { const char *name = NULL; if (gdev->type < GNSS_TYPE_COUNT) name = gnss_type_names[gdev->type]; if (!name) dev_WARN(&gdev->dev, "type name not defined\n"); return name; } static ssize_t type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gnss_device *gdev = to_gnss_device(dev); return sprintf(buf, "%s\n", gnss_type_name(gdev)); } static DEVICE_ATTR_RO(type); static struct attribute *gnss_attrs[] = { &dev_attr_type.attr, NULL, }; ATTRIBUTE_GROUPS(gnss); static int gnss_uevent(const struct device *dev, struct kobj_uevent_env *env) { const struct gnss_device *gdev = to_gnss_device(dev); int ret; ret = add_uevent_var(env, "GNSS_TYPE=%s", gnss_type_name(gdev)); if (ret) return ret; return 0; } static int __init gnss_module_init(void) { int ret; ret = alloc_chrdev_region(&gnss_first, 0, GNSS_MINORS, "gnss"); if (ret < 0) { pr_err("failed to allocate device numbers: %d\n", ret); return ret; } gnss_class = class_create("gnss"); if (IS_ERR(gnss_class)) { ret = PTR_ERR(gnss_class); pr_err("failed to create class: %d\n", ret); goto err_unregister_chrdev; } gnss_class->dev_groups = gnss_groups; gnss_class->dev_uevent = gnss_uevent; pr_info("GNSS driver registered with major %d\n", MAJOR(gnss_first)); return 0; err_unregister_chrdev: unregister_chrdev_region(gnss_first, GNSS_MINORS); return ret; } module_init(gnss_module_init); static void __exit gnss_module_exit(void) { class_destroy(gnss_class); unregister_chrdev_region(gnss_first, GNSS_MINORS); ida_destroy(&gnss_minors); } module_exit(gnss_module_exit); MODULE_AUTHOR("Johan Hovold <johan@kernel.org>"); MODULE_DESCRIPTION("GNSS receiver core"); MODULE_LICENSE("GPL v2");
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for some sunplus "special" devices * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina * Copyright (c) 2008 Jiri Slaby */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" static __u8 *sp_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (*rsize >= 112 && rdesc[104] == 0x26 && rdesc[105] == 0x80 && rdesc[106] == 0x03) { hid_info(hdev, "fixing up Sunplus Wireless Desktop report descriptor\n"); rdesc[105] = rdesc[110] = 0x03; rdesc[106] = rdesc[111] = 0x21; } return rdesc; } #define sp_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ EV_KEY, (c)) static int sp_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { if ((usage->hid & HID_USAGE_PAGE) != HID_UP_CONSUMER) return 0; switch (usage->hid & HID_USAGE) { case 0x2003: sp_map_key_clear(KEY_ZOOMIN); break; case 0x2103: sp_map_key_clear(KEY_ZOOMOUT); break; default: return 0; } return 1; } static const struct hid_device_id sp_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) }, { } }; MODULE_DEVICE_TABLE(hid, sp_devices); static struct hid_driver sp_driver = { .name = "sunplus", .id_table = sp_devices, .report_fixup = sp_report_fixup, .input_mapping = sp_input_mapping, }; module_hid_driver(sp_driver); MODULE_DESCRIPTION("HID driver for some sunplus \"special\" devices"); MODULE_LICENSE("GPL");
257 1818 370 176 66 4 4 15 2 2 2 155 13 9 127 129 130 1 46 46 3 2 1 1 82 28 325 121 27 3 4 2 1 220 112 11 344 115 265 3 3 6 3 125 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> */ #ifndef _BR_PRIVATE_H #define _BR_PRIVATE_H #include <linux/netdevice.h> #include <linux/if_bridge.h> #include <linux/netpoll.h> #include <linux/u64_stats_sync.h> #include <net/route.h> #include <net/ip6_fib.h> #include <net/pkt_cls.h> #include <linux/if_vlan.h> #include <linux/rhashtable.h> #include <linux/refcount.h> #define BR_HASH_BITS 8 #define BR_HASH_SIZE (1 << BR_HASH_BITS) #define BR_HOLD_TIME (1*HZ) #define BR_PORT_BITS 10 #define BR_MAX_PORTS (1<<BR_PORT_BITS) #define BR_MULTICAST_DEFAULT_HASH_MAX 4096 #define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000) #define BR_MULTICAST_STARTUP_QUERY_INTVL_MIN BR_MULTICAST_QUERY_INTVL_MIN #define BR_HWDOM_MAX BITS_PER_LONG #define BR_VERSION "2.3" /* Control of forwarding link local multicast */ #define BR_GROUPFWD_DEFAULT 0 /* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */ enum { BR_GROUPFWD_STP = BIT(0), BR_GROUPFWD_MACPAUSE = BIT(1), BR_GROUPFWD_LACP = BIT(2), }; #define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \ BR_GROUPFWD_LACP) /* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ #define BR_GROUPFWD_8021AD 0xB801u /* Path to usermode spanning tree program */ #define BR_STP_PROG "/sbin/bridge-stp" #define BR_FDB_NOTIFY_SETTABLE_BITS (FDB_NOTIFY_BIT | FDB_NOTIFY_INACTIVE_BIT) typedef struct bridge_id bridge_id; typedef struct mac_addr mac_addr; typedef __u16 port_id; struct bridge_id { unsigned char prio[2]; unsigned char addr[ETH_ALEN]; }; struct mac_addr { unsigned char addr[ETH_ALEN]; }; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING /* our own querier */ struct bridge_mcast_own_query { struct timer_list timer; u32 startup_sent; }; /* other querier */ struct bridge_mcast_other_query { struct timer_list timer; struct timer_list delay_timer; }; /* selected querier */ struct bridge_mcast_querier { struct br_ip addr; int port_ifidx; seqcount_spinlock_t seq; }; /* IGMP/MLD statistics */ struct bridge_mcast_stats { struct br_mcast_stats mstats; struct u64_stats_sync syncp; }; struct br_mdb_src_entry { struct br_ip addr; }; struct br_mdb_config { struct net_bridge *br; struct net_bridge_port *p; struct br_mdb_entry *entry; struct br_ip group; bool src_entry; u8 filter_mode; u16 nlflags; struct br_mdb_src_entry *src_entries; int num_src_entries; u8 rt_protocol; }; #endif /* net_bridge_mcast_port must be always defined due to forwarding stubs */ struct net_bridge_mcast_port { #ifdef CONFIG_BRIDGE_IGMP_SNOOPING struct net_bridge_port *port; struct net_bridge_vlan *vlan; struct bridge_mcast_own_query ip4_own_query; struct timer_list ip4_mc_router_timer; struct hlist_node ip4_rlist; #if IS_ENABLED(CONFIG_IPV6) struct bridge_mcast_own_query ip6_own_query; struct timer_list ip6_mc_router_timer; struct hlist_node ip6_rlist; #endif /* IS_ENABLED(CONFIG_IPV6) */ unsigned char multicast_router; u32 mdb_n_entries; u32 mdb_max_entries; #endif /* CONFIG_BRIDGE_IGMP_SNOOPING */ }; /* net_bridge_mcast must be always defined due to forwarding stubs */ struct net_bridge_mcast { #ifdef CONFIG_BRIDGE_IGMP_SNOOPING struct net_bridge *br; struct net_bridge_vlan *vlan; u32 multicast_last_member_count; u32 multicast_startup_query_count; u8 multicast_querier; u8 multicast_igmp_version; u8 multicast_router; #if IS_ENABLED(CONFIG_IPV6) u8 multicast_mld_version; #endif unsigned long multicast_last_member_interval; unsigned long multicast_membership_interval; unsigned long multicast_querier_interval; unsigned long multicast_query_interval; unsigned long multicast_query_response_interval; unsigned long multicast_startup_query_interval; struct hlist_head ip4_mc_router_list; struct timer_list ip4_mc_router_timer; struct bridge_mcast_other_query ip4_other_query; struct bridge_mcast_own_query ip4_own_query; struct bridge_mcast_querier ip4_querier; #if IS_ENABLED(CONFIG_IPV6) struct hlist_head ip6_mc_router_list; struct timer_list ip6_mc_router_timer; struct bridge_mcast_other_query ip6_other_query; struct bridge_mcast_own_query ip6_own_query; struct bridge_mcast_querier ip6_querier; #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* CONFIG_BRIDGE_IGMP_SNOOPING */ }; struct br_tunnel_info { __be64 tunnel_id; struct metadata_dst __rcu *tunnel_dst; }; /* private vlan flags */ enum { BR_VLFLAG_PER_PORT_STATS = BIT(0), BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1), BR_VLFLAG_MCAST_ENABLED = BIT(2), BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT(3), BR_VLFLAG_NEIGH_SUPPRESS_ENABLED = BIT(4), }; /** * struct net_bridge_vlan - per-vlan entry * * @vnode: rhashtable member * @tnode: rhashtable member * @vid: VLAN id * @flags: bridge vlan flags * @priv_flags: private (in-kernel) bridge vlan flags * @state: STP state (e.g. blocking, learning, forwarding) * @stats: per-cpu VLAN statistics * @br: if MASTER flag set, this points to a bridge struct * @port: if MASTER flag unset, this points to a port struct * @refcnt: if MASTER flag set, this is bumped for each port referencing it * @brvlan: if MASTER flag unset, this points to the global per-VLAN context * for this VLAN entry * @tinfo: bridge tunnel info * @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context * @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast * context * @msti: if MASTER flag set, this holds the VLANs MST instance * @vlist: sorted list of VLAN entries * @rcu: used for entry destruction * * This structure is shared between the global per-VLAN entries contained in * the bridge rhashtable and the local per-port per-VLAN entries contained in * the port's rhashtable. The union entries should be interpreted depending on * the entry flags that are set. */ struct net_bridge_vlan { struct rhash_head vnode; struct rhash_head tnode; u16 vid; u16 flags; u16 priv_flags; u8 state; struct pcpu_sw_netstats __percpu *stats; union { struct net_bridge *br; struct net_bridge_port *port; }; union { refcount_t refcnt; struct net_bridge_vlan *brvlan; }; struct br_tunnel_info tinfo; union { struct net_bridge_mcast br_mcast_ctx; struct net_bridge_mcast_port port_mcast_ctx; }; u16 msti; struct list_head vlist; struct rcu_head rcu; }; /** * struct net_bridge_vlan_group * * @vlan_hash: VLAN entry rhashtable * @vlan_list: sorted VLAN entry list * @num_vlans: number of total VLAN entries * @pvid: PVID VLAN id * @pvid_state: PVID's STP state (e.g. forwarding, learning, blocking) * * IMPORTANT: Be careful when checking if there're VLAN entries using list * primitives because the bridge can have entries in its list which * are just for global context but not for filtering, i.e. they have * the master flag set but not the brentry flag. If you have to check * if there're "real" entries in the bridge please test @num_vlans */ struct net_bridge_vlan_group { struct rhashtable vlan_hash; struct rhashtable tunnel_hash; struct list_head vlan_list; u16 num_vlans; u16 pvid; u8 pvid_state; }; /* bridge fdb flags */ enum { BR_FDB_LOCAL, BR_FDB_STATIC, BR_FDB_STICKY, BR_FDB_ADDED_BY_USER, BR_FDB_ADDED_BY_EXT_LEARN, BR_FDB_OFFLOADED, BR_FDB_NOTIFY, BR_FDB_NOTIFY_INACTIVE, BR_FDB_LOCKED, BR_FDB_DYNAMIC_LEARNED, }; struct net_bridge_fdb_key { mac_addr addr; u16 vlan_id; }; struct net_bridge_fdb_entry { struct rhash_head rhnode; struct net_bridge_port *dst; struct net_bridge_fdb_key key; struct hlist_node fdb_node; unsigned long flags; /* write-heavy members should not affect lookups */ unsigned long updated ____cacheline_aligned_in_smp; unsigned long used; struct rcu_head rcu; }; struct net_bridge_fdb_flush_desc { unsigned long flags; unsigned long flags_mask; int port_ifindex; u16 vlan_id; }; #define MDB_PG_FLAGS_PERMANENT BIT(0) #define MDB_PG_FLAGS_OFFLOAD BIT(1) #define MDB_PG_FLAGS_FAST_LEAVE BIT(2) #define MDB_PG_FLAGS_STAR_EXCL BIT(3) #define MDB_PG_FLAGS_BLOCKED BIT(4) #define PG_SRC_ENT_LIMIT 32 #define BR_SGRP_F_DELETE BIT(0) #define BR_SGRP_F_SEND BIT(1) #define BR_SGRP_F_INSTALLED BIT(2) #define BR_SGRP_F_USER_ADDED BIT(3) struct net_bridge_mcast_gc { struct hlist_node gc_node; void (*destroy)(struct net_bridge_mcast_gc *gc); }; struct net_bridge_group_src { struct hlist_node node; struct br_ip addr; struct net_bridge_port_group *pg; u8 flags; u8 src_query_rexmit_cnt; struct timer_list timer; struct net_bridge *br; struct net_bridge_mcast_gc mcast_gc; struct rcu_head rcu; }; struct net_bridge_port_group_sg_key { struct net_bridge_port *port; struct br_ip addr; }; struct net_bridge_port_group { struct net_bridge_port_group __rcu *next; struct net_bridge_port_group_sg_key key; unsigned char eth_addr[ETH_ALEN] __aligned(2); unsigned char flags; unsigned char filter_mode; unsigned char grp_query_rexmit_cnt; unsigned char rt_protocol; struct hlist_head src_list; unsigned int src_ents; struct timer_list timer; struct timer_list rexmit_timer; struct hlist_node mglist; struct rb_root eht_set_tree; struct rb_root eht_host_tree; struct rhash_head rhnode; struct net_bridge_mcast_gc mcast_gc; struct rcu_head rcu; }; struct net_bridge_mdb_entry { struct rhash_head rhnode; struct net_bridge *br; struct net_bridge_port_group __rcu *ports; struct br_ip addr; bool host_joined; struct timer_list timer; struct hlist_node mdb_node; struct net_bridge_mcast_gc mcast_gc; struct rcu_head rcu; }; struct net_bridge_port { struct net_bridge *br; struct net_device *dev; netdevice_tracker dev_tracker; struct list_head list; unsigned long flags; #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; #endif struct net_bridge_port __rcu *backup_port; u32 backup_nhid; /* STP */ u8 priority; u8 state; u16 port_no; unsigned char topology_change_ack; unsigned char config_pending; port_id port_id; port_id designated_port; bridge_id designated_root; bridge_id designated_bridge; u32 path_cost; u32 designated_cost; unsigned long designated_age; struct timer_list forward_delay_timer; struct timer_list hold_timer; struct timer_list message_age_timer; struct kobject kobj; struct rcu_head rcu; struct net_bridge_mcast_port multicast_ctx; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING struct bridge_mcast_stats __percpu *mcast_stats; u32 multicast_eht_hosts_limit; u32 multicast_eht_hosts_cnt; struct hlist_head mglist; #endif #ifdef CONFIG_SYSFS char sysfs_name[IFNAMSIZ]; #endif #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif #ifdef CONFIG_NET_SWITCHDEV /* Identifier used to group ports that share the same switchdev * hardware domain. */ int hwdom; int offload_count; struct netdev_phys_item_id ppid; #endif u16 group_fwd_mask; u16 backup_redirected_cnt; struct bridge_stp_xstats stp_xstats; }; #define kobj_to_brport(obj) container_of(obj, struct net_bridge_port, kobj) #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) #define br_promisc_port(p) ((p)->flags & BR_PROMISC) static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) { return rcu_dereference(dev->rx_handler_data); } static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *dev) { return netif_is_bridge_port(dev) ? rtnl_dereference(dev->rx_handler_data) : NULL; } static inline struct net_bridge_port *br_port_get_rtnl_rcu(const struct net_device *dev) { return netif_is_bridge_port(dev) ? rcu_dereference_rtnl(dev->rx_handler_data) : NULL; } enum net_bridge_opts { BROPT_VLAN_ENABLED, BROPT_VLAN_STATS_ENABLED, BROPT_NF_CALL_IPTABLES, BROPT_NF_CALL_IP6TABLES, BROPT_NF_CALL_ARPTABLES, BROPT_GROUP_ADDR_SET, BROPT_MULTICAST_ENABLED, BROPT_MULTICAST_QUERY_USE_IFADDR, BROPT_MULTICAST_STATS_ENABLED, BROPT_HAS_IPV6_ADDR, BROPT_NEIGH_SUPPRESS_ENABLED, BROPT_MTU_SET_BY_USER, BROPT_VLAN_STATS_PER_PORT, BROPT_NO_LL_LEARN, BROPT_VLAN_BRIDGE_BINDING, BROPT_MCAST_VLAN_SNOOPING_ENABLED, BROPT_MST_ENABLED, }; struct net_bridge { spinlock_t lock; spinlock_t hash_lock; struct hlist_head frame_type_list; struct net_device *dev; unsigned long options; /* These fields are accessed on each packet */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING __be16 vlan_proto; u16 default_pvid; struct net_bridge_vlan_group __rcu *vlgrp; #endif struct rhashtable fdb_hash_tbl; struct list_head port_list; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) union { struct rtable fake_rtable; struct rt6_info fake_rt6_info; }; #endif u16 group_fwd_mask; u16 group_fwd_mask_required; /* STP */ bridge_id designated_root; bridge_id bridge_id; unsigned char topology_change; unsigned char topology_change_detected; u16 root_port; unsigned long max_age; unsigned long hello_time; unsigned long forward_delay; unsigned long ageing_time; unsigned long bridge_max_age; unsigned long bridge_hello_time; unsigned long bridge_forward_delay; unsigned long bridge_ageing_time; u32 root_path_cost; u8 group_addr[ETH_ALEN]; enum { BR_NO_STP, /* no spanning tree */ BR_KERNEL_STP, /* old STP in kernel */ BR_USER_STP, /* new RSTP in userspace */ } stp_enabled; struct net_bridge_mcast multicast_ctx; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING struct bridge_mcast_stats __percpu *mcast_stats; u32 hash_max; spinlock_t multicast_lock; struct rhashtable mdb_hash_tbl; struct rhashtable sg_port_tbl; struct hlist_head mcast_gc_list; struct hlist_head mdb_list; struct work_struct mcast_gc_work; #endif struct timer_list hello_timer; struct timer_list tcn_timer; struct timer_list topology_change_timer; struct delayed_work gc_work; struct kobject *ifobj; u32 auto_cnt; atomic_t fdb_n_learned; u32 fdb_max_learned; #ifdef CONFIG_NET_SWITCHDEV /* Counter used to make sure that hardware domains get unique * identifiers in case a bridge spans multiple switchdev instances. */ int last_hwdom; /* Bit mask of hardware domain numbers in use */ unsigned long busy_hwdoms; #endif struct hlist_head fdb_list; #if IS_ENABLED(CONFIG_BRIDGE_MRP) struct hlist_head mrp_list; #endif #if IS_ENABLED(CONFIG_BRIDGE_CFM) struct hlist_head mep_list; #endif }; struct br_input_skb_cb { struct net_device *brdev; u16 frag_max_size; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u8 igmp; u8 mrouters_only:1; #endif u8 proxyarp_replied:1; u8 src_port_isolated:1; u8 promisc:1; #ifdef CONFIG_BRIDGE_VLAN_FILTERING u8 vlan_filtered:1; #endif #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE u8 br_netfilter_broute:1; #endif #ifdef CONFIG_NET_SWITCHDEV /* Set if TX data plane offloading is used towards at least one * hardware domain. */ u8 tx_fwd_offload:1; /* The switchdev hardware domain from which this packet was received. * If skb->offload_fwd_mark was set, then this packet was already * forwarded by hardware to the other ports in the source hardware * domain, otherwise it wasn't. */ int src_hwdom; /* Bit mask of hardware domains towards this packet has already been * transmitted using the TX data plane offload. */ unsigned long fwd_hwdoms; #endif u32 backup_nhid; }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) #ifdef CONFIG_BRIDGE_IGMP_SNOOPING # define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (BR_INPUT_SKB_CB(__skb)->mrouters_only) #else # define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0) #endif #define br_printk(level, br, format, args...) \ printk(level "%s: " format, (br)->dev->name, ##args) #define br_err(__br, format, args...) \ br_printk(KERN_ERR, __br, format, ##args) #define br_warn(__br, format, args...) \ br_printk(KERN_WARNING, __br, format, ##args) #define br_notice(__br, format, args...) \ br_printk(KERN_NOTICE, __br, format, ##args) #define br_info(__br, format, args...) \ br_printk(KERN_INFO, __br, format, ##args) #define br_debug(br, format, args...) \ pr_debug("%s: " format, (br)->dev->name, ##args) /* called under bridge lock */ static inline int br_is_root_bridge(const struct net_bridge *br) { return !memcmp(&br->bridge_id, &br->designated_root, 8); } /* check if a VLAN entry is global */ static inline bool br_vlan_is_master(const struct net_bridge_vlan *v) { return v->flags & BRIDGE_VLAN_INFO_MASTER; } /* check if a VLAN entry is used by the bridge */ static inline bool br_vlan_is_brentry(const struct net_bridge_vlan *v) { return v->flags & BRIDGE_VLAN_INFO_BRENTRY; } /* check if we should use the vlan entry, returns false if it's only context */ static inline bool br_vlan_should_use(const struct net_bridge_vlan *v) { if (br_vlan_is_master(v)) { if (br_vlan_is_brentry(v)) return true; else return false; } return true; } static inline bool nbp_state_should_learn(const struct net_bridge_port *p) { return p->state == BR_STATE_LEARNING || p->state == BR_STATE_FORWARDING; } static inline bool br_vlan_valid_id(u16 vid, struct netlink_ext_ack *extack) { bool ret = vid > 0 && vid < VLAN_VID_MASK; if (!ret) NL_SET_ERR_MSG_MOD(extack, "Vlan id is invalid"); return ret; } static inline bool br_vlan_valid_range(const struct bridge_vlan_info *cur, const struct bridge_vlan_info *last, struct netlink_ext_ack *extack) { /* pvid flag is not allowed in ranges */ if (cur->flags & BRIDGE_VLAN_INFO_PVID) { NL_SET_ERR_MSG_MOD(extack, "Pvid isn't allowed in a range"); return false; } /* when cur is the range end, check if: * - it has range start flag * - range ids are invalid (end is equal to or before start) */ if (last) { if (cur->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { NL_SET_ERR_MSG_MOD(extack, "Found a new vlan range start while processing one"); return false; } else if (!(cur->flags & BRIDGE_VLAN_INFO_RANGE_END)) { NL_SET_ERR_MSG_MOD(extack, "Vlan range end flag is missing"); return false; } else if (cur->vid <= last->vid) { NL_SET_ERR_MSG_MOD(extack, "End vlan id is less than or equal to start vlan id"); return false; } } /* check for required range flags */ if (!(cur->flags & (BRIDGE_VLAN_INFO_RANGE_BEGIN | BRIDGE_VLAN_INFO_RANGE_END))) { NL_SET_ERR_MSG_MOD(extack, "Both vlan range flags are missing"); return false; } return true; } static inline u8 br_vlan_multicast_router(const struct net_bridge_vlan *v) { u8 mcast_router = MDB_RTR_TYPE_DISABLED; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING if (!br_vlan_is_master(v)) mcast_router = v->port_mcast_ctx.multicast_router; else mcast_router = v->br_mcast_ctx.multicast_router; #endif return mcast_router; } static inline int br_afspec_cmd_to_rtm(int cmd) { switch (cmd) { case RTM_SETLINK: return RTM_NEWVLAN; case RTM_DELLINK: return RTM_DELVLAN; } return 0; } static inline int br_opt_get(const struct net_bridge *br, enum net_bridge_opts opt) { return test_bit(opt, &br->options); } int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on, struct netlink_ext_ack *extack); int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt); int br_boolopt_multi_toggle(struct net_bridge *br, struct br_boolopt_multi *bm, struct netlink_ext_ack *extack); void br_boolopt_multi_get(const struct net_bridge *br, struct br_boolopt_multi *bm); void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on); #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss) { struct tc_skb_ext *ext; if (!tc_skb_ext_tc_enabled()) return; ext = skb_ext_find(skb, TC_SKB_EXT); if (ext) { ext->l2_miss = miss; return; } if (!miss) return; ext = tc_skb_ext_alloc(skb); if (!ext) return; ext->l2_miss = true; } #else static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss) { } #endif /* br_device.c */ void br_dev_setup(struct net_device *dev); void br_dev_delete(struct net_device *dev, struct list_head *list); netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); #ifdef CONFIG_NET_POLL_CONTROLLER static inline void br_netpoll_send_skb(const struct net_bridge_port *p, struct sk_buff *skb) { netpoll_send_skb(p->np, skb); } int br_netpoll_enable(struct net_bridge_port *p); void br_netpoll_disable(struct net_bridge_port *p); #else static inline void br_netpoll_send_skb(const struct net_bridge_port *p, struct sk_buff *skb) { } static inline int br_netpoll_enable(struct net_bridge_port *p) { return 0; } static inline void br_netpoll_disable(struct net_bridge_port *p) { } #endif /* br_fdb.c */ #define FDB_FLUSH_IGNORED_NDM_FLAGS (NTF_MASTER | NTF_SELF) #define FDB_FLUSH_ALLOWED_NDM_STATES (NUD_PERMANENT | NUD_NOARP) #define FDB_FLUSH_ALLOWED_NDM_FLAGS (NTF_USE | NTF_EXT_LEARNED | \ NTF_STICKY | NTF_OFFLOADED) int br_fdb_init(void); void br_fdb_fini(void); int br_fdb_hash_init(struct net_bridge *br); void br_fdb_hash_fini(struct net_bridge *br); void br_fdb_flush(struct net_bridge *br, const struct net_bridge_fdb_flush_desc *desc); void br_fdb_find_delete_local(struct net_bridge *br, const struct net_bridge_port *p, const unsigned char *addr, u16 vid); void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); void br_fdb_cleanup(struct work_struct *work); void br_fdb_delete_by_port(struct net_bridge *br, const struct net_bridge_port *p, u16 vid, int do_all); struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, const unsigned char *addr, __u16 vid); int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, unsigned long off); int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid); void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid, unsigned long flags); int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, struct netlink_ext_ack *extack); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *fdev, int *idx); int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u32 portid, u32 seq, struct netlink_ext_ack *extack); int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool locked, bool swdev_notify); int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool swdev_notify); void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool offloaded); /* br_forward.c */ enum br_pkt_type { BR_PKT_UNICAST, BR_PKT_MULTICAST, BR_PKT_BROADCAST }; int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb); void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, bool local_rcv, bool local_orig); int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb); void br_flood(struct net_bridge *br, struct sk_buff *skb, enum br_pkt_type pkt_type, bool local_rcv, bool local_orig, u16 vid); /* return true if both source port and dest port are isolated */ static inline bool br_skb_isolated(const struct net_bridge_port *to, const struct sk_buff *skb) { return BR_INPUT_SKB_CB(skb)->src_port_isolated && (to->flags & BR_ISOLATED); } /* br_if.c */ void br_port_carrier_check(struct net_bridge_port *p, bool *notified); int br_add_bridge(struct net *net, const char *name); int br_del_bridge(struct net *net, const char *name); int br_add_if(struct net_bridge *br, struct net_device *dev, struct netlink_ext_ack *extack); int br_del_if(struct net_bridge *br, struct net_device *dev); void br_mtu_auto_adjust(struct net_bridge *br); netdev_features_t br_features_recompute(struct net_bridge *br, netdev_features_t features); void br_port_flags_change(struct net_bridge_port *port, unsigned long mask); void br_manage_promisc(struct net_bridge *br); int nbp_backup_change(struct net_bridge_port *p, struct net_device *backup_dev); /* br_input.c */ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb); rx_handler_func_t *br_get_rx_handler(const struct net_device *dev); struct br_frame_type { __be16 type; int (*frame_handler)(struct net_bridge_port *port, struct sk_buff *skb); struct hlist_node list; }; void br_add_frame(struct net_bridge *br, struct br_frame_type *ft); void br_del_frame(struct net_bridge *br, struct br_frame_type *ft); static inline bool br_rx_handler_check_rcu(const struct net_device *dev) { return rcu_dereference(dev->rx_handler) == br_get_rx_handler(dev); } static inline bool br_rx_handler_check_rtnl(const struct net_device *dev) { return rcu_dereference_rtnl(dev->rx_handler) == br_get_rx_handler(dev); } static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev) { return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL; } static inline struct net_bridge_port * br_port_get_check_rtnl(const struct net_device *dev) { return br_rx_handler_check_rtnl(dev) ? br_port_get_rtnl_rcu(dev) : NULL; } /* br_ioctl.c */ int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd); int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd, struct ifreq *ifr, void __user *uarg); /* br_multicast.c */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING int br_multicast_rcv(struct net_bridge_mcast **brmctx, struct net_bridge_mcast_port **pmctx, struct net_bridge_vlan *vlan, struct sk_buff *skb, u16 vid); struct net_bridge_mdb_entry * br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, u16 vid); int br_multicast_add_port(struct net_bridge_port *port); void br_multicast_del_port(struct net_bridge_port *port); void br_multicast_enable_port(struct net_bridge_port *port); void br_multicast_disable_port(struct net_bridge_port *port); void br_multicast_init(struct net_bridge *br); void br_multicast_join_snoopers(struct net_bridge *br); void br_multicast_leave_snoopers(struct net_bridge *br); void br_multicast_open(struct net_bridge *br); void br_multicast_stop(struct net_bridge *br); void br_multicast_dev_del(struct net_bridge *br); void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, struct net_bridge_mcast *brmctx, bool local_rcv, bool local_orig); int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val); int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx, unsigned long val); int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router); int br_multicast_toggle(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack); int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val); int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx, unsigned long val); #if IS_ENABLED(CONFIG_IPV6) int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx, unsigned long val); #endif struct net_bridge_mdb_entry * br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst); struct net_bridge_mdb_entry * br_multicast_new_group(struct net_bridge *br, struct br_ip *group); struct net_bridge_port_group * br_multicast_new_port_group(struct net_bridge_port *port, const struct br_ip *group, struct net_bridge_port_group __rcu *next, unsigned char flags, const unsigned char *src, u8 filter_mode, u8 rt_protocol, struct netlink_ext_ack *extack); void br_multicast_del_port_group(struct net_bridge_port_group *p); int br_mdb_hash_init(struct net_bridge *br); void br_mdb_hash_fini(struct net_bridge *br); void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, int type); void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx, int type); void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, struct net_bridge_port_group __rcu **pp); void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, const struct sk_buff *skb, u8 type, u8 dir); int br_multicast_init_stats(struct net_bridge *br); void br_multicast_uninit_stats(struct net_bridge *br); void br_multicast_get_stats(const struct net_bridge *br, const struct net_bridge_port *p, struct br_mcast_stats *dest); u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx); void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max); u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx); int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack); int br_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb); int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, struct netlink_ext_ack *extack); void br_multicast_host_join(const struct net_bridge_mcast *brmctx, struct net_bridge_mdb_entry *mp, bool notify); void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify); void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, u8 filter_mode); void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, struct net_bridge_port_group *sg); struct net_bridge_group_src * br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip); struct net_bridge_group_src * br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_ip); void __br_multicast_del_group_src(struct net_bridge_group_src *src); void br_multicast_del_group_src(struct net_bridge_group_src *src, bool fastleave); void br_multicast_ctx_init(struct net_bridge *br, struct net_bridge_vlan *vlan, struct net_bridge_mcast *brmctx); void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx); void br_multicast_port_ctx_init(struct net_bridge_port *port, struct net_bridge_vlan *vlan, struct net_bridge_mcast_port *pmctx); void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx); void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on); int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack); bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on); int br_rports_fill_info(struct sk_buff *skb, const struct net_bridge_mcast *brmctx); int br_multicast_dump_querier_state(struct sk_buff *skb, const struct net_bridge_mcast *brmctx, int nest_attr); size_t br_multicast_querier_state_size(void); size_t br_rports_size(const struct net_bridge_mcast *brmctx); void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, unsigned long val); void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, unsigned long val); static inline bool br_group_is_l2(const struct br_ip *group) { return group->proto == 0; } #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) static inline struct hlist_node * br_multicast_get_first_rport_node(struct net_bridge_mcast *brmctx, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) if (skb->protocol == htons(ETH_P_IPV6)) return rcu_dereference(hlist_first_rcu(&brmctx->ip6_mc_router_list)); #endif return rcu_dereference(hlist_first_rcu(&brmctx->ip4_mc_router_list)); } static inline struct net_bridge_port * br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) { struct net_bridge_mcast_port *mctx; #if IS_ENABLED(CONFIG_IPV6) if (skb->protocol == htons(ETH_P_IPV6)) mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port, ip6_rlist); else #endif mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port, ip4_rlist); if (mctx) return mctx->port; else return NULL; } static inline bool br_ip4_multicast_is_router(struct net_bridge_mcast *brmctx) { return timer_pending(&brmctx->ip4_mc_router_timer); } static inline bool br_ip6_multicast_is_router(struct net_bridge_mcast *brmctx) { #if IS_ENABLED(CONFIG_IPV6) return timer_pending(&brmctx->ip6_mc_router_timer); #else return false; #endif } static inline bool br_multicast_is_router(struct net_bridge_mcast *brmctx, struct sk_buff *skb) { switch (brmctx->multicast_router) { case MDB_RTR_TYPE_PERM: return true; case MDB_RTR_TYPE_TEMP_QUERY: if (skb) { if (skb->protocol == htons(ETH_P_IP)) return br_ip4_multicast_is_router(brmctx); else if (skb->protocol == htons(ETH_P_IPV6)) return br_ip6_multicast_is_router(brmctx); } else { return br_ip4_multicast_is_router(brmctx) || br_ip6_multicast_is_router(brmctx); } fallthrough; default: return false; } } static inline bool __br_multicast_querier_exists(struct net_bridge_mcast *brmctx, struct bridge_mcast_other_query *querier, const bool is_ipv6) { bool own_querier_enabled; if (brmctx->multicast_querier) { if (is_ipv6 && !br_opt_get(brmctx->br, BROPT_HAS_IPV6_ADDR)) own_querier_enabled = false; else own_querier_enabled = true; } else { own_querier_enabled = false; } return !timer_pending(&querier->delay_timer) && (own_querier_enabled || timer_pending(&querier->timer)); } static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx, struct ethhdr *eth, const struct net_bridge_mdb_entry *mdb) { switch (eth->h_proto) { case (htons(ETH_P_IP)): return __br_multicast_querier_exists(brmctx, &brmctx->ip4_other_query, false); #if IS_ENABLED(CONFIG_IPV6) case (htons(ETH_P_IPV6)): return __br_multicast_querier_exists(brmctx, &brmctx->ip6_other_query, true); #endif default: return !!mdb && br_group_is_l2(&mdb->addr); } } static inline bool br_multicast_is_star_g(const struct br_ip *ip) { switch (ip->proto) { case htons(ETH_P_IP): return ipv4_is_zeronet(ip->src.ip4); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): return ipv6_addr_any(&ip->src.ip6); #endif default: return false; } } static inline bool br_multicast_should_handle_mode(const struct net_bridge_mcast *brmctx, __be16 proto) { switch (proto) { case htons(ETH_P_IP): return !!(brmctx->multicast_igmp_version == 3); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): return !!(brmctx->multicast_mld_version == 2); #endif default: return false; } } static inline int br_multicast_igmp_type(const struct sk_buff *skb) { return BR_INPUT_SKB_CB(skb)->igmp; } static inline unsigned long br_multicast_lmqt(const struct net_bridge_mcast *brmctx) { return brmctx->multicast_last_member_interval * brmctx->multicast_last_member_count; } static inline unsigned long br_multicast_gmi(const struct net_bridge_mcast *brmctx) { return brmctx->multicast_membership_interval; } static inline bool br_multicast_ctx_is_vlan(const struct net_bridge_mcast *brmctx) { return !!brmctx->vlan; } static inline bool br_multicast_port_ctx_is_vlan(const struct net_bridge_mcast_port *pmctx) { return !!pmctx->vlan; } static inline struct net_bridge_mcast * br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx) { if (!br_multicast_port_ctx_is_vlan(pmctx)) return &pmctx->port->br->multicast_ctx; else return &pmctx->vlan->brvlan->br_mcast_ctx; } static inline bool br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx) { return br_multicast_ctx_is_vlan(brmctx) && (!br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) || !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)); } static inline bool br_multicast_ctx_vlan_disabled(const struct net_bridge_mcast *brmctx) { return br_multicast_ctx_is_vlan(brmctx) && !(brmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED); } static inline bool br_multicast_port_ctx_vlan_disabled(const struct net_bridge_mcast_port *pmctx) { return br_multicast_port_ctx_is_vlan(pmctx) && !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED); } static inline bool br_multicast_port_ctx_state_disabled(const struct net_bridge_mcast_port *pmctx) { return pmctx->port->state == BR_STATE_DISABLED || (br_multicast_port_ctx_is_vlan(pmctx) && (br_multicast_port_ctx_vlan_disabled(pmctx) || pmctx->vlan->state == BR_STATE_DISABLED)); } static inline bool br_multicast_port_ctx_state_stopped(const struct net_bridge_mcast_port *pmctx) { return br_multicast_port_ctx_state_disabled(pmctx) || pmctx->port->state == BR_STATE_BLOCKING || (br_multicast_port_ctx_is_vlan(pmctx) && pmctx->vlan->state == BR_STATE_BLOCKING); } static inline bool br_rports_have_mc_router(const struct net_bridge_mcast *brmctx) { #if IS_ENABLED(CONFIG_IPV6) return !hlist_empty(&brmctx->ip4_mc_router_list) || !hlist_empty(&brmctx->ip6_mc_router_list); #else return !hlist_empty(&brmctx->ip4_mc_router_list); #endif } static inline bool br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1, const struct net_bridge_mcast *brmctx2) { return brmctx1->multicast_igmp_version == brmctx2->multicast_igmp_version && brmctx1->multicast_last_member_count == brmctx2->multicast_last_member_count && brmctx1->multicast_startup_query_count == brmctx2->multicast_startup_query_count && brmctx1->multicast_last_member_interval == brmctx2->multicast_last_member_interval && brmctx1->multicast_membership_interval == brmctx2->multicast_membership_interval && brmctx1->multicast_querier_interval == brmctx2->multicast_querier_interval && brmctx1->multicast_query_interval == brmctx2->multicast_query_interval && brmctx1->multicast_query_response_interval == brmctx2->multicast_query_response_interval && brmctx1->multicast_startup_query_interval == brmctx2->multicast_startup_query_interval && brmctx1->multicast_querier == brmctx2->multicast_querier && brmctx1->multicast_router == brmctx2->multicast_router && !br_rports_have_mc_router(brmctx1) && !br_rports_have_mc_router(brmctx2) && #if IS_ENABLED(CONFIG_IPV6) brmctx1->multicast_mld_version == brmctx2->multicast_mld_version && #endif true; } static inline bool br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx) { bool vlan_snooping_enabled; vlan_snooping_enabled = !!br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED); return !!(vlan_snooping_enabled == br_multicast_ctx_is_vlan(brmctx)); } #else static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx, struct net_bridge_mcast_port **pmctx, struct net_bridge_vlan *vlan, struct sk_buff *skb, u16 vid) { return 0; } static inline struct net_bridge_mdb_entry * br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, u16 vid) { return NULL; } static inline int br_multicast_add_port(struct net_bridge_port *port) { return 0; } static inline void br_multicast_del_port(struct net_bridge_port *port) { } static inline void br_multicast_enable_port(struct net_bridge_port *port) { } static inline void br_multicast_disable_port(struct net_bridge_port *port) { } static inline void br_multicast_init(struct net_bridge *br) { } static inline void br_multicast_join_snoopers(struct net_bridge *br) { } static inline void br_multicast_leave_snoopers(struct net_bridge *br) { } static inline void br_multicast_open(struct net_bridge *br) { } static inline void br_multicast_stop(struct net_bridge *br) { } static inline void br_multicast_dev_del(struct net_bridge *br) { } static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, struct net_bridge_mcast *brmctx, bool local_rcv, bool local_orig) { } static inline bool br_multicast_is_router(struct net_bridge_mcast *brmctx, struct sk_buff *skb) { return false; } static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx, struct ethhdr *eth, const struct net_bridge_mdb_entry *mdb) { return false; } static inline int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline int br_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb) { return 0; } static inline int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline int br_mdb_hash_init(struct net_bridge *br) { return 0; } static inline void br_mdb_hash_fini(struct net_bridge *br) { } static inline void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, const struct sk_buff *skb, u8 type, u8 dir) { } static inline int br_multicast_init_stats(struct net_bridge *br) { return 0; } static inline void br_multicast_uninit_stats(struct net_bridge *br) { } static inline int br_multicast_igmp_type(const struct sk_buff *skb) { return 0; } static inline void br_multicast_ctx_init(struct net_bridge *br, struct net_bridge_vlan *vlan, struct net_bridge_mcast *brmctx) { } static inline void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx) { } static inline void br_multicast_port_ctx_init(struct net_bridge_port *port, struct net_bridge_vlan *vlan, struct net_bridge_mcast_port *pmctx) { } static inline void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) { } static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) { } static inline int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on) { return false; } static inline bool br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1, const struct net_bridge_mcast *brmctx2) { return true; } #endif /* br_vlan.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool br_allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, u16 *vid, u8 *state, struct net_bridge_vlan **vlan); bool br_allowed_egress(struct net_bridge_vlan_group *vg, const struct sk_buff *skb); bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_bridge_port *port, struct net_bridge_vlan_group *vg, struct sk_buff *skb); int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed, struct netlink_ext_ack *extack); int br_vlan_delete(struct net_bridge *br, u16 vid); void br_vlan_flush(struct net_bridge *br); struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid); void br_recalculate_fwd_mask(struct net_bridge *br); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack); int __br_vlan_set_proto(struct net_bridge *br, __be16 proto, struct netlink_ext_ack *extack); int br_vlan_set_proto(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack); int br_vlan_set_stats(struct net_bridge *br, unsigned long val); int br_vlan_set_stats_per_port(struct net_bridge *br, unsigned long val); int br_vlan_init(struct net_bridge *br); int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack); int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid, struct netlink_ext_ack *extack); int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, bool *changed, struct netlink_ext_ack *extack); int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); void nbp_vlan_flush(struct net_bridge_port *port); int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack); int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask); void br_vlan_get_stats(const struct net_bridge_vlan *v, struct pcpu_sw_netstats *stats); void br_vlan_port_event(struct net_bridge_port *p, unsigned long event); int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr); void br_vlan_rtnl_init(void); void br_vlan_rtnl_uninit(void); void br_vlan_notify(const struct net_bridge *br, const struct net_bridge_port *p, u16 vid, u16 vid_range, int cmd); bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *range_end); void br_vlan_fill_forward_path_pvid(struct net_bridge *br, struct net_device_path_ctx *ctx, struct net_device_path *path); int br_vlan_fill_forward_path_mode(struct net_bridge *br, struct net_bridge_port *dst, struct net_device_path *path); static inline struct net_bridge_vlan_group *br_vlan_group( const struct net_bridge *br) { return rtnl_dereference(br->vlgrp); } static inline struct net_bridge_vlan_group *nbp_vlan_group( const struct net_bridge_port *p) { return rtnl_dereference(p->vlgrp); } static inline struct net_bridge_vlan_group *br_vlan_group_rcu( const struct net_bridge *br) { return rcu_dereference(br->vlgrp); } static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu( const struct net_bridge_port *p) { return rcu_dereference(p->vlgrp); } /* Since bridge now depends on 8021Q module, but the time bridge sees the * skb, the vlan tag will always be present if the frame was tagged. */ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) { int err = 0; if (skb_vlan_tag_present(skb)) { *vid = skb_vlan_tag_get_id(skb); } else { *vid = 0; err = -EINVAL; } return err; } static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg) { if (!vg) return 0; smp_rmb(); return vg->pvid; } static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid) { return v->vid == pvid ? v->flags | BRIDGE_VLAN_INFO_PVID : v->flags; } #else static inline bool br_allowed_ingress(const struct net_bridge *br, struct net_bridge_vlan_group *vg, struct sk_buff *skb, u16 *vid, u8 *state, struct net_bridge_vlan **vlan) { *vlan = NULL; return true; } static inline bool br_allowed_egress(struct net_bridge_vlan_group *vg, const struct sk_buff *skb) { return true; } static inline bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) { return true; } static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_bridge_port *port, struct net_bridge_vlan_group *vg, struct sk_buff *skb) { return skb; } static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed, struct netlink_ext_ack *extack) { *changed = false; return -EOPNOTSUPP; } static inline int br_vlan_delete(struct net_bridge *br, u16 vid) { return -EOPNOTSUPP; } static inline void br_vlan_flush(struct net_bridge *br) { } static inline void br_recalculate_fwd_mask(struct net_bridge *br) { } static inline int br_vlan_init(struct net_bridge *br) { return 0; } static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, bool *changed, struct netlink_ext_ack *extack) { *changed = false; return -EOPNOTSUPP; } static inline int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) { return -EOPNOTSUPP; } static inline void nbp_vlan_flush(struct net_bridge_port *port) { } static inline struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid) { return NULL; } static inline int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack) { return 0; } static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag) { return 0; } static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg) { return 0; } static inline int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask) { return 0; } static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br, struct net_device_path_ctx *ctx, struct net_device_path *path) { } static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br, struct net_bridge_port *dst, struct net_device_path *path) { return 0; } static inline struct net_bridge_vlan_group *br_vlan_group( const struct net_bridge *br) { return NULL; } static inline struct net_bridge_vlan_group *nbp_vlan_group( const struct net_bridge_port *p) { return NULL; } static inline struct net_bridge_vlan_group *br_vlan_group_rcu( const struct net_bridge *br) { return NULL; } static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu( const struct net_bridge_port *p) { return NULL; } static inline void br_vlan_get_stats(const struct net_bridge_vlan *v, struct pcpu_sw_netstats *stats) { } static inline void br_vlan_port_event(struct net_bridge_port *p, unsigned long event) { } static inline int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr) { return 0; } static inline void br_vlan_rtnl_init(void) { } static inline void br_vlan_rtnl_uninit(void) { } static inline void br_vlan_notify(const struct net_bridge *br, const struct net_bridge_port *p, u16 vid, u16 vid_range, int cmd) { } static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *range_end) { return true; } static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid) { return 0; } #endif /* br_vlan_options.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *range_end); bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v, const struct net_bridge_port *p); size_t br_vlan_opts_nl_size(void); int br_vlan_process_options(const struct net_bridge *br, const struct net_bridge_port *p, struct net_bridge_vlan *range_start, struct net_bridge_vlan *range_end, struct nlattr **tb, struct netlink_ext_ack *extack); int br_vlan_rtm_process_global_options(struct net_device *dev, const struct nlattr *attr, int cmd, struct netlink_ext_ack *extack); bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *r_end); bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, const struct net_bridge_vlan *v_opts); /* vlan state manipulation helpers using *_ONCE to annotate lock-free access */ static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v) { return READ_ONCE(v->state); } static inline void br_vlan_set_state(struct net_bridge_vlan *v, u8 state) { WRITE_ONCE(v->state, state); } static inline u8 br_vlan_get_pvid_state(const struct net_bridge_vlan_group *vg) { return READ_ONCE(vg->pvid_state); } static inline void br_vlan_set_pvid_state(struct net_bridge_vlan_group *vg, u8 state) { WRITE_ONCE(vg->pvid_state, state); } /* learn_allow is true at ingress and false at egress */ static inline bool br_vlan_state_allowed(u8 state, bool learn_allow) { switch (state) { case BR_STATE_LEARNING: return learn_allow; case BR_STATE_FORWARDING: return true; default: return false; } } #endif /* br_mst.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING DECLARE_STATIC_KEY_FALSE(br_mst_used); static inline bool br_mst_is_enabled(struct net_bridge *br) { return static_branch_unlikely(&br_mst_used) && br_opt_get(br, BROPT_MST_ENABLED); } int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, struct netlink_ext_ack *extack); int br_mst_vlan_set_msti(struct net_bridge_vlan *v, u16 msti); void br_mst_vlan_init_state(struct net_bridge_vlan *v); int br_mst_set_enabled(struct net_bridge *br, bool on, struct netlink_ext_ack *extack); size_t br_mst_info_size(const struct net_bridge_vlan_group *vg); int br_mst_fill_info(struct sk_buff *skb, const struct net_bridge_vlan_group *vg); int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, struct netlink_ext_ack *extack); #else static inline bool br_mst_is_enabled(struct net_bridge *br) { return false; } static inline int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline int br_mst_set_enabled(struct net_bridge *br, bool on, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline size_t br_mst_info_size(const struct net_bridge_vlan_group *vg) { return 0; } static inline int br_mst_fill_info(struct sk_buff *skb, const struct net_bridge_vlan_group *vg) { return -EOPNOTSUPP; } static inline int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } #endif struct nf_br_ops { int (*br_dev_xmit_hook)(struct sk_buff *skb); }; extern const struct nf_br_ops __rcu *nf_br_ops; /* br_netfilter.c */ #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) int br_nf_core_init(void); void br_nf_core_fini(void); void br_netfilter_rtable_init(struct net_bridge *); #else static inline int br_nf_core_init(void) { return 0; } static inline void br_nf_core_fini(void) {} #define br_netfilter_rtable_init(x) #endif /* br_stp.c */ void br_set_state(struct net_bridge_port *p, unsigned int state); struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no); void br_init_port(struct net_bridge_port *p); void br_become_designated_port(struct net_bridge_port *p); void __br_set_forward_delay(struct net_bridge *br, unsigned long t); int br_set_forward_delay(struct net_bridge *br, unsigned long x); int br_set_hello_time(struct net_bridge *br, unsigned long x); int br_set_max_age(struct net_bridge *br, unsigned long x); int __set_ageing_time(struct net_device *dev, unsigned long t); int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time); /* br_stp_if.c */ void br_stp_enable_bridge(struct net_bridge *br); void br_stp_disable_bridge(struct net_bridge *br); int br_stp_set_enabled(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack); void br_stp_enable_port(struct net_bridge_port *p); void br_stp_disable_port(struct net_bridge_port *p); bool br_stp_recalculate_bridge_id(struct net_bridge *br); void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio); int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio); int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost); ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); /* br_stp_bpdu.c */ struct stp_proto; void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, struct net_device *dev); /* br_stp_timer.c */ void br_stp_timer_init(struct net_bridge *br); void br_stp_port_timer_init(struct net_bridge_port *p); unsigned long br_timer_value(const struct timer_list *timer); /* br.c */ #if IS_ENABLED(CONFIG_ATM_LANE) extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr); #endif /* br_mrp.c */ #if IS_ENABLED(CONFIG_BRIDGE_MRP) int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *attr, int cmd, struct netlink_ext_ack *extack); bool br_mrp_enabled(struct net_bridge *br); void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p); int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br); #else static inline int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *attr, int cmd, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline bool br_mrp_enabled(struct net_bridge *br) { return false; } static inline void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p) { } static inline int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br) { return 0; } #endif /* br_cfm.c */ #if IS_ENABLED(CONFIG_BRIDGE_CFM) int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *attr, int cmd, struct netlink_ext_ack *extack); bool br_cfm_created(struct net_bridge *br); void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *p); int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br); int br_cfm_status_fill_info(struct sk_buff *skb, struct net_bridge *br, bool getlink); int br_cfm_mep_count(struct net_bridge *br, u32 *count); int br_cfm_peer_mep_count(struct net_bridge *br, u32 *count); #else static inline int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *attr, int cmd, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline bool br_cfm_created(struct net_bridge *br) { return false; } static inline void br_cfm_port_del(struct net_bridge *br, struct net_bridge_port *p) { } static inline int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br) { return -EOPNOTSUPP; } static inline int br_cfm_status_fill_info(struct sk_buff *skb, struct net_bridge *br, bool getlink) { return -EOPNOTSUPP; } static inline int br_cfm_mep_count(struct net_bridge *br, u32 *count) { *count = 0; return -EOPNOTSUPP; } static inline int br_cfm_peer_mep_count(struct net_bridge *br, u32 *count) { *count = 0; return -EOPNOTSUPP; } #endif /* br_netlink.c */ extern struct rtnl_link_ops br_link_ops; int br_netlink_init(void); void br_netlink_fini(void); void br_ifinfo_notify(int event, const struct net_bridge *br, const struct net_bridge_port *port); void br_info_notify(int event, const struct net_bridge *br, const struct net_bridge_port *port, u32 filter); int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags, struct netlink_ext_ack *extack); int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask, int nlflags); int br_process_vlan_info(struct net_bridge *br, struct net_bridge_port *p, int cmd, struct bridge_vlan_info *vinfo_curr, struct bridge_vlan_info **vinfo_last, bool *changed, struct netlink_ext_ack *extack); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ extern const struct sysfs_ops brport_sysfs_ops; int br_sysfs_addif(struct net_bridge_port *p); int br_sysfs_renameif(struct net_bridge_port *p); /* br_sysfs_br.c */ int br_sysfs_addbr(struct net_device *dev); void br_sysfs_delbr(struct net_device *dev); #else static inline int br_sysfs_addif(struct net_bridge_port *p) { return 0; } static inline int br_sysfs_renameif(struct net_bridge_port *p) { return 0; } static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } static inline void br_sysfs_delbr(struct net_device *dev) { return; } #endif /* CONFIG_SYSFS */ /* br_switchdev.c */ #ifdef CONFIG_NET_SWITCHDEV int br_switchdev_port_offload(struct net_bridge_port *p, struct net_device *dev, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb, bool tx_fwd_offload, struct netlink_ext_ack *extack); void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb); int br_switchdev_port_replay(struct net_bridge_port *p, struct net_device *dev, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb, struct netlink_ext_ack *extack); bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb); void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb); void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p, struct sk_buff *skb); void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p, struct sk_buff *skb); void nbp_switchdev_frame_mark(const struct net_bridge_port *p, struct sk_buff *skb); bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, const struct sk_buff *skb); int br_switchdev_set_port_flag(struct net_bridge_port *p, unsigned long flags, unsigned long mask, struct netlink_ext_ack *extack); void br_switchdev_fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, int type); void br_switchdev_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, int type); int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, bool changed, struct netlink_ext_ack *extack); int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid); void br_switchdev_init(struct net_bridge *br); static inline void br_switchdev_frame_unmark(struct sk_buff *skb) { skb->offload_fwd_mark = 0; } #else static inline int br_switchdev_port_offload(struct net_bridge_port *p, struct net_device *dev, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb, bool tx_fwd_offload, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb) { } static inline int br_switchdev_port_replay(struct net_bridge_port *p, struct net_device *dev, const void *ctx, struct notifier_block *atomic_nb, struct notifier_block *blocking_nb, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb) { return false; } static inline void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb) { } static inline void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p, struct sk_buff *skb) { } static inline void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p, struct sk_buff *skb) { } static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p, struct sk_buff *skb) { } static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, const struct sk_buff *skb) { return true; } static inline int br_switchdev_set_port_flag(struct net_bridge_port *p, unsigned long flags, unsigned long mask, struct netlink_ext_ack *extack) { return 0; } static inline int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, bool changed, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static inline int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) { return -EOPNOTSUPP; } static inline void br_switchdev_fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, int type) { } static inline void br_switchdev_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, int type) { } static inline void br_switchdev_frame_unmark(struct sk_buff *skb) { } static inline void br_switchdev_init(struct net_bridge *br) { } #endif /* CONFIG_NET_SWITCHDEV */ /* br_arp_nd_proxy.c */ void br_recalculate_neigh_suppress_enabled(struct net_bridge *br); void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, u16 vid, struct net_bridge_port *p); void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, u16 vid, struct net_bridge_port *p, struct nd_msg *msg); struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m); bool br_is_neigh_suppress_enabled(const struct net_bridge_port *p, u16 vid); #endif
251 250 251 250 250 250 251 249 251 47 250 249 251 251 251 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 /* * Generic 1-bit or 8-bit source to 1-32 bit destination expansion * for frame buffer located in system RAM with packed pixels of any depth. * * Based almost entirely on cfbimgblt.c * * Copyright (C) April 2007 Antonino Daplas <adaplas@pol.net> * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive for * more details. */ #include <linux/module.h> #include <linux/string.h> #include <linux/fb.h> #include <asm/types.h> #define DEBUG #ifdef DEBUG #define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt,__func__,## args) #else #define DPRINTK(fmt, args...) #endif static const u32 cfb_tab8_be[] = { 0x00000000,0x000000ff,0x0000ff00,0x0000ffff, 0x00ff0000,0x00ff00ff,0x00ffff00,0x00ffffff, 0xff000000,0xff0000ff,0xff00ff00,0xff00ffff, 0xffff0000,0xffff00ff,0xffffff00,0xffffffff }; static const u32 cfb_tab8_le[] = { 0x00000000,0xff000000,0x00ff0000,0xffff0000, 0x0000ff00,0xff00ff00,0x00ffff00,0xffffff00, 0x000000ff,0xff0000ff,0x00ff00ff,0xffff00ff, 0x0000ffff,0xff00ffff,0x00ffffff,0xffffffff }; static const u32 cfb_tab16_be[] = { 0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff }; static const u32 cfb_tab16_le[] = { 0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff }; static const u32 cfb_tab32[] = { 0x00000000, 0xffffffff }; static void color_imageblit(const struct fb_image *image, struct fb_info *p, void *dst1, u32 start_index, u32 pitch_index) { /* Draw the penguin */ u32 *dst, *dst2; u32 color = 0, val, shift; int i, n, bpp = p->var.bits_per_pixel; u32 null_bits = 32 - bpp; u32 *palette = (u32 *) p->pseudo_palette; const u8 *src = image->data; dst2 = dst1; for (i = image->height; i--; ) { n = image->width; dst = dst1; shift = 0; val = 0; if (start_index) { u32 start_mask = ~(FB_SHIFT_HIGH(p, ~(u32)0, start_index)); val = *dst & start_mask; shift = start_index; } while (n--) { if (p->fix.visual == FB_VISUAL_TRUECOLOR || p->fix.visual == FB_VISUAL_DIRECTCOLOR ) color = palette[*src]; else color = *src; color <<= FB_LEFT_POS(p, bpp); val |= FB_SHIFT_HIGH(p, color, shift); if (shift >= null_bits) { *dst++ = val; val = (shift == null_bits) ? 0 : FB_SHIFT_LOW(p, color, 32 - shift); } shift += bpp; shift &= (32 - 1); src++; } if (shift) { u32 end_mask = FB_SHIFT_HIGH(p, ~(u32)0, shift); *dst &= end_mask; *dst |= val; } dst1 += p->fix.line_length; if (pitch_index) { dst2 += p->fix.line_length; dst1 = (u8 *)((long)dst2 & ~(sizeof(u32) - 1)); start_index += pitch_index; start_index &= 32 - 1; } } } static void slow_imageblit(const struct fb_image *image, struct fb_info *p, void *dst1, u32 fgcolor, u32 bgcolor, u32 start_index, u32 pitch_index) { u32 shift, color = 0, bpp = p->var.bits_per_pixel; u32 *dst, *dst2; u32 val, pitch = p->fix.line_length; u32 null_bits = 32 - bpp; u32 spitch = (image->width+7)/8; const u8 *src = image->data, *s; u32 i, j, l; dst2 = dst1; fgcolor <<= FB_LEFT_POS(p, bpp); bgcolor <<= FB_LEFT_POS(p, bpp); for (i = image->height; i--; ) { shift = val = 0; l = 8; j = image->width; dst = dst1; s = src; /* write leading bits */ if (start_index) { u32 start_mask = ~(FB_SHIFT_HIGH(p, ~(u32)0, start_index)); val = *dst & start_mask; shift = start_index; } while (j--) { l--; color = (*s & (1 << l)) ? fgcolor : bgcolor; val |= FB_SHIFT_HIGH(p, color, shift); /* Did the bitshift spill bits to the next long? */ if (shift >= null_bits) { *dst++ = val; val = (shift == null_bits) ? 0 : FB_SHIFT_LOW(p, color, 32 - shift); } shift += bpp; shift &= (32 - 1); if (!l) { l = 8; s++; } } /* write trailing bits */ if (shift) { u32 end_mask = FB_SHIFT_HIGH(p, ~(u32)0, shift); *dst &= end_mask; *dst |= val; } dst1 += pitch; src += spitch; if (pitch_index) { dst2 += pitch; dst1 = (u8 *)((long)dst2 & ~(sizeof(u32) - 1)); start_index += pitch_index; start_index &= 32 - 1; } } } /* * fast_imageblit - optimized monochrome color expansion * * Only if: bits_per_pixel == 8, 16, or 32 * image->width is divisible by pixel/dword (ppw); * fix->line_legth is divisible by 4; * beginning and end of a scanline is dword aligned */ static void fast_imageblit(const struct fb_image *image, struct fb_info *p, void *dst1, u32 fgcolor, u32 bgcolor) { u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; u32 ppw = 32/bpp, spitch = (image->width + 7)/8; u32 bit_mask, eorx, shift; const u8 *s = image->data, *src; u32 *dst; const u32 *tab; size_t tablen; u32 colortab[16]; int i, j, k; switch (bpp) { case 8: tab = fb_be_math(p) ? cfb_tab8_be : cfb_tab8_le; tablen = 16; break; case 16: tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le; tablen = 4; break; case 32: tab = cfb_tab32; tablen = 2; break; default: return; } for (i = ppw-1; i--; ) { fgx <<= bpp; bgx <<= bpp; fgx |= fgcolor; bgx |= bgcolor; } bit_mask = (1 << ppw) - 1; eorx = fgx ^ bgx; k = image->width/ppw; for (i = 0; i < tablen; ++i) colortab[i] = (tab[i] & eorx) ^ bgx; for (i = image->height; i--; ) { dst = dst1; shift = 8; src = s; /* * Manually unroll the per-line copying loop for better * performance. This works until we processed the last * completely filled source byte (inclusive). */ switch (ppw) { case 4: /* 8 bpp */ for (j = k; j >= 2; j -= 2, ++src) { *dst++ = colortab[(*src >> 4) & bit_mask]; *dst++ = colortab[(*src >> 0) & bit_mask]; } break; case 2: /* 16 bpp */ for (j = k; j >= 4; j -= 4, ++src) { *dst++ = colortab[(*src >> 6) & bit_mask]; *dst++ = colortab[(*src >> 4) & bit_mask]; *dst++ = colortab[(*src >> 2) & bit_mask]; *dst++ = colortab[(*src >> 0) & bit_mask]; } break; case 1: /* 32 bpp */ for (j = k; j >= 8; j -= 8, ++src) { *dst++ = colortab[(*src >> 7) & bit_mask]; *dst++ = colortab[(*src >> 6) & bit_mask]; *dst++ = colortab[(*src >> 5) & bit_mask]; *dst++ = colortab[(*src >> 4) & bit_mask]; *dst++ = colortab[(*src >> 3) & bit_mask]; *dst++ = colortab[(*src >> 2) & bit_mask]; *dst++ = colortab[(*src >> 1) & bit_mask]; *dst++ = colortab[(*src >> 0) & bit_mask]; } break; } /* * For image widths that are not a multiple of 8, there * are trailing pixels left on the current line. Print * them as well. */ for (; j--; ) { shift -= ppw; *dst++ = colortab[(*src >> shift) & bit_mask]; if (!shift) { shift = 8; ++src; } } dst1 += p->fix.line_length; s += spitch; } } void sys_imageblit(struct fb_info *p, const struct fb_image *image) { u32 fgcolor, bgcolor, start_index, bitstart, pitch_index = 0; u32 bpl = sizeof(u32), bpp = p->var.bits_per_pixel; u32 width = image->width; u32 dx = image->dx, dy = image->dy; void *dst1; if (p->state != FBINFO_STATE_RUNNING) return; if (!(p->flags & FBINFO_VIRTFB)) fb_warn_once(p, "Framebuffer is not in virtual address space."); bitstart = (dy * p->fix.line_length * 8) + (dx * bpp); start_index = bitstart & (32 - 1); pitch_index = (p->fix.line_length & (bpl - 1)) * 8; bitstart /= 8; bitstart &= ~(bpl - 1); dst1 = (void __force *)p->screen_base + bitstart; if (p->fbops->fb_sync) p->fbops->fb_sync(p); if (image->depth == 1) { if (p->fix.visual == FB_VISUAL_TRUECOLOR || p->fix.visual == FB_VISUAL_DIRECTCOLOR) { fgcolor = ((u32*)(p->pseudo_palette))[image->fg_color]; bgcolor = ((u32*)(p->pseudo_palette))[image->bg_color]; } else { fgcolor = image->fg_color; bgcolor = image->bg_color; } if (32 % bpp == 0 && !start_index && !pitch_index && ((width & (32/bpp-1)) == 0) && bpp >= 8 && bpp <= 32) fast_imageblit(image, p, dst1, fgcolor, bgcolor); else slow_imageblit(image, p, dst1, fgcolor, bgcolor, start_index, pitch_index); } else color_imageblit(image, p, dst1, start_index, pitch_index); } EXPORT_SYMBOL(sys_imageblit); MODULE_AUTHOR("Antonino Daplas <adaplas@pol.net>"); MODULE_DESCRIPTION("1-bit/8-bit to 1-32 bit color expansion (sys-to-sys)"); MODULE_LICENSE("GPL");
2 1 139 112 111 78 49 53 139 139 139 40 1 2 37 152 57 23 216 181 4 3 3 1 2 112 9 111 98 112 27 32 112 112 107 12 45 112 150 124 19 90 8 82 4 4 21 12 9 21 21 54 54 51 3 4 12 5 141 8 17 3 14 129 131 76 76 76 12 76 76 7 4 76 78 78 78 13 65 78 57 1 59 4 7 34 7 2 2 2 135 35 132 131 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 // SPDX-License-Identifier: GPL-2.0-or-later /* * ALSA sequencer Timing queue handling * Copyright (c) 1998-1999 by Frank van de Pol <fvdpol@coil.demon.nl> * * MAJOR CHANGES * Nov. 13, 1999 Takashi Iwai <iwai@ww.uni-erlangen.de> * - Queues are allocated dynamically via ioctl. * - When owner client is deleted, all owned queues are deleted, too. * - Owner of unlocked queue is kept unmodified even if it is * manipulated by other clients. * - Owner field in SET_QUEUE_OWNER ioctl must be identical with the * caller client. i.e. Changing owner to a third client is not * allowed. * * Aug. 30, 2000 Takashi Iwai * - Queues are managed in static array again, but with better way. * The API itself is identical. * - The queue is locked when struct snd_seq_queue pointer is returned via * queueptr(). This pointer *MUST* be released afterward by * queuefree(ptr). * - Addition of experimental sync support. */ #include <linux/init.h> #include <linux/slab.h> #include <sound/core.h> #include "seq_memory.h" #include "seq_queue.h" #include "seq_clientmgr.h" #include "seq_fifo.h" #include "seq_timer.h" #include "seq_info.h" /* list of allocated queues */ static struct snd_seq_queue *queue_list[SNDRV_SEQ_MAX_QUEUES]; static DEFINE_SPINLOCK(queue_list_lock); /* number of queues allocated */ static int num_queues; int snd_seq_queue_get_cur_queues(void) { return num_queues; } /*----------------------------------------------------------------*/ /* assign queue id and insert to list */ static int queue_list_add(struct snd_seq_queue *q) { int i; guard(spinlock_irqsave)(&queue_list_lock); for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if (! queue_list[i]) { queue_list[i] = q; q->queue = i; num_queues++; return i; } } return -1; } static struct snd_seq_queue *queue_list_remove(int id, int client) { struct snd_seq_queue *q; guard(spinlock_irqsave)(&queue_list_lock); q = queue_list[id]; if (q) { guard(spinlock)(&q->owner_lock); if (q->owner == client) { /* found */ q->klocked = 1; queue_list[id] = NULL; num_queues--; return q; } } return NULL; } /*----------------------------------------------------------------*/ /* create new queue (constructor) */ static struct snd_seq_queue *queue_new(int owner, int locked) { struct snd_seq_queue *q; q = kzalloc(sizeof(*q), GFP_KERNEL); if (!q) return NULL; spin_lock_init(&q->owner_lock); spin_lock_init(&q->check_lock); mutex_init(&q->timer_mutex); snd_use_lock_init(&q->use_lock); q->queue = -1; q->tickq = snd_seq_prioq_new(); q->timeq = snd_seq_prioq_new(); q->timer = snd_seq_timer_new(); if (q->tickq == NULL || q->timeq == NULL || q->timer == NULL) { snd_seq_prioq_delete(&q->tickq); snd_seq_prioq_delete(&q->timeq); snd_seq_timer_delete(&q->timer); kfree(q); return NULL; } q->owner = owner; q->locked = locked; q->klocked = 0; return q; } /* delete queue (destructor) */ static void queue_delete(struct snd_seq_queue *q) { /* stop and release the timer */ mutex_lock(&q->timer_mutex); snd_seq_timer_stop(q->timer); snd_seq_timer_close(q); mutex_unlock(&q->timer_mutex); /* wait until access free */ snd_use_lock_sync(&q->use_lock); /* release resources... */ snd_seq_prioq_delete(&q->tickq); snd_seq_prioq_delete(&q->timeq); snd_seq_timer_delete(&q->timer); kfree(q); } /*----------------------------------------------------------------*/ /* delete all existing queues */ void snd_seq_queues_delete(void) { int i; /* clear list */ for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if (queue_list[i]) queue_delete(queue_list[i]); } } static void queue_use(struct snd_seq_queue *queue, int client, int use); /* allocate a new queue - * return pointer to new queue or ERR_PTR(-errno) for error * The new queue's use_lock is set to 1. It is the caller's responsibility to * call snd_use_lock_free(&q->use_lock). */ struct snd_seq_queue *snd_seq_queue_alloc(int client, int locked, unsigned int info_flags) { struct snd_seq_queue *q; q = queue_new(client, locked); if (q == NULL) return ERR_PTR(-ENOMEM); q->info_flags = info_flags; queue_use(q, client, 1); snd_use_lock_use(&q->use_lock); if (queue_list_add(q) < 0) { snd_use_lock_free(&q->use_lock); queue_delete(q); return ERR_PTR(-ENOMEM); } return q; } /* delete a queue - queue must be owned by the client */ int snd_seq_queue_delete(int client, int queueid) { struct snd_seq_queue *q; if (queueid < 0 || queueid >= SNDRV_SEQ_MAX_QUEUES) return -EINVAL; q = queue_list_remove(queueid, client); if (q == NULL) return -EINVAL; queue_delete(q); return 0; } /* return pointer to queue structure for specified id */ struct snd_seq_queue *queueptr(int queueid) { struct snd_seq_queue *q; if (queueid < 0 || queueid >= SNDRV_SEQ_MAX_QUEUES) return NULL; guard(spinlock_irqsave)(&queue_list_lock); q = queue_list[queueid]; if (q) snd_use_lock_use(&q->use_lock); return q; } /* return the (first) queue matching with the specified name */ struct snd_seq_queue *snd_seq_queue_find_name(char *name) { int i; struct snd_seq_queue *q; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { q = queueptr(i); if (q) { if (strncmp(q->name, name, sizeof(q->name)) == 0) return q; queuefree(q); } } return NULL; } /* -------------------------------------------------------- */ #define MAX_CELL_PROCESSES_IN_QUEUE 1000 void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) { struct snd_seq_event_cell *cell; snd_seq_tick_time_t cur_tick; snd_seq_real_time_t cur_time; int processed = 0; if (q == NULL) return; /* make this function non-reentrant */ scoped_guard(spinlock_irqsave, &q->check_lock) { if (q->check_blocked) { q->check_again = 1; return; /* other thread is already checking queues */ } q->check_blocked = 1; } __again: /* Process tick queue... */ cur_tick = snd_seq_timer_get_cur_tick(q->timer); for (;;) { cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); if (++processed >= MAX_CELL_PROCESSES_IN_QUEUE) goto out; /* the rest processed at the next batch */ } /* Process time queue... */ cur_time = snd_seq_timer_get_cur_time(q->timer, false); for (;;) { cell = snd_seq_prioq_cell_out(q->timeq, &cur_time); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); if (++processed >= MAX_CELL_PROCESSES_IN_QUEUE) goto out; /* the rest processed at the next batch */ } out: /* free lock */ scoped_guard(spinlock_irqsave, &q->check_lock) { if (q->check_again) { q->check_again = 0; if (processed < MAX_CELL_PROCESSES_IN_QUEUE) goto __again; } q->check_blocked = 0; } } /* enqueue a event to singe queue */ int snd_seq_enqueue_event(struct snd_seq_event_cell *cell, int atomic, int hop) { int dest, err; struct snd_seq_queue *q; if (snd_BUG_ON(!cell)) return -EINVAL; dest = cell->event.queue; /* destination queue */ q = queueptr(dest); if (q == NULL) return -EINVAL; /* handle relative time stamps, convert them into absolute */ if ((cell->event.flags & SNDRV_SEQ_TIME_MODE_MASK) == SNDRV_SEQ_TIME_MODE_REL) { switch (cell->event.flags & SNDRV_SEQ_TIME_STAMP_MASK) { case SNDRV_SEQ_TIME_STAMP_TICK: cell->event.time.tick += q->timer->tick.cur_tick; break; case SNDRV_SEQ_TIME_STAMP_REAL: snd_seq_inc_real_time(&cell->event.time.time, &q->timer->cur_time); break; } cell->event.flags &= ~SNDRV_SEQ_TIME_MODE_MASK; cell->event.flags |= SNDRV_SEQ_TIME_MODE_ABS; } /* enqueue event in the real-time or midi queue */ switch (cell->event.flags & SNDRV_SEQ_TIME_STAMP_MASK) { case SNDRV_SEQ_TIME_STAMP_TICK: err = snd_seq_prioq_cell_in(q->tickq, cell); break; case SNDRV_SEQ_TIME_STAMP_REAL: default: err = snd_seq_prioq_cell_in(q->timeq, cell); break; } if (err < 0) { queuefree(q); /* unlock */ return err; } /* trigger dispatching */ snd_seq_check_queue(q, atomic, hop); queuefree(q); /* unlock */ return 0; } /*----------------------------------------------------------------*/ static inline int check_access(struct snd_seq_queue *q, int client) { return (q->owner == client) || (!q->locked && !q->klocked); } /* check if the client has permission to modify queue parameters. * if it does, lock the queue */ static int queue_access_lock(struct snd_seq_queue *q, int client) { int access_ok; guard(spinlock_irqsave)(&q->owner_lock); access_ok = check_access(q, client); if (access_ok) q->klocked = 1; return access_ok; } /* unlock the queue */ static inline void queue_access_unlock(struct snd_seq_queue *q) { guard(spinlock_irqsave)(&q->owner_lock); q->klocked = 0; } /* exported - only checking permission */ int snd_seq_queue_check_access(int queueid, int client) { struct snd_seq_queue *q = queueptr(queueid); int access_ok; if (! q) return 0; scoped_guard(spinlock_irqsave, &q->owner_lock) access_ok = check_access(q, client); queuefree(q); return access_ok; } /*----------------------------------------------------------------*/ /* * change queue's owner and permission */ int snd_seq_queue_set_owner(int queueid, int client, int locked) { struct snd_seq_queue *q = queueptr(queueid); if (q == NULL) return -EINVAL; if (! queue_access_lock(q, client)) { queuefree(q); return -EPERM; } scoped_guard(spinlock_irqsave, &q->owner_lock) { q->locked = locked ? 1 : 0; q->owner = client; } queue_access_unlock(q); queuefree(q); return 0; } /*----------------------------------------------------------------*/ /* open timer - * q->use mutex should be down before calling this function to avoid * confliction with snd_seq_queue_use() */ int snd_seq_queue_timer_open(int queueid) { int result = 0; struct snd_seq_queue *queue; struct snd_seq_timer *tmr; queue = queueptr(queueid); if (queue == NULL) return -EINVAL; tmr = queue->timer; result = snd_seq_timer_open(queue); if (result < 0) { snd_seq_timer_defaults(tmr); result = snd_seq_timer_open(queue); } queuefree(queue); return result; } /* close timer - * q->use mutex should be down before calling this function */ int snd_seq_queue_timer_close(int queueid) { struct snd_seq_queue *queue; int result = 0; queue = queueptr(queueid); if (queue == NULL) return -EINVAL; snd_seq_timer_close(queue); queuefree(queue); return result; } /* change queue tempo and ppq */ int snd_seq_queue_timer_set_tempo(int queueid, int client, struct snd_seq_queue_tempo *info) { struct snd_seq_queue *q = queueptr(queueid); int result; if (q == NULL) return -EINVAL; if (! queue_access_lock(q, client)) { queuefree(q); return -EPERM; } result = snd_seq_timer_set_tempo_ppq(q->timer, info->tempo, info->ppq, info->tempo_base); if (result >= 0 && info->skew_base > 0) result = snd_seq_timer_set_skew(q->timer, info->skew_value, info->skew_base); queue_access_unlock(q); queuefree(q); return result; } /* use or unuse this queue */ static void queue_use(struct snd_seq_queue *queue, int client, int use) { if (use) { if (!test_and_set_bit(client, queue->clients_bitmap)) queue->clients++; } else { if (test_and_clear_bit(client, queue->clients_bitmap)) queue->clients--; } if (queue->clients) { if (use && queue->clients == 1) snd_seq_timer_defaults(queue->timer); snd_seq_timer_open(queue); } else { snd_seq_timer_close(queue); } } /* use or unuse this queue - * if it is the first client, starts the timer. * if it is not longer used by any clients, stop the timer. */ int snd_seq_queue_use(int queueid, int client, int use) { struct snd_seq_queue *queue; queue = queueptr(queueid); if (queue == NULL) return -EINVAL; mutex_lock(&queue->timer_mutex); queue_use(queue, client, use); mutex_unlock(&queue->timer_mutex); queuefree(queue); return 0; } /* * check if queue is used by the client * return negative value if the queue is invalid. * return 0 if not used, 1 if used. */ int snd_seq_queue_is_used(int queueid, int client) { struct snd_seq_queue *q; int result; q = queueptr(queueid); if (q == NULL) return -EINVAL; /* invalid queue */ result = test_bit(client, q->clients_bitmap) ? 1 : 0; queuefree(q); return result; } /*----------------------------------------------------------------*/ /* final stage notification - * remove cells for no longer exist client (for non-owned queue) * or delete this queue (for owned queue) */ void snd_seq_queue_client_leave(int client) { int i; struct snd_seq_queue *q; /* delete own queues from queue list */ for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { q = queue_list_remove(i, client); if (q) queue_delete(q); } /* remove cells from existing queues - * they are not owned by this client */ for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { q = queueptr(i); if (!q) continue; if (test_bit(client, q->clients_bitmap)) { snd_seq_prioq_leave(q->tickq, client, 0); snd_seq_prioq_leave(q->timeq, client, 0); snd_seq_queue_use(q->queue, client, 0); } queuefree(q); } } /*----------------------------------------------------------------*/ /* remove cells from all queues */ void snd_seq_queue_client_leave_cells(int client) { int i; struct snd_seq_queue *q; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { q = queueptr(i); if (!q) continue; snd_seq_prioq_leave(q->tickq, client, 0); snd_seq_prioq_leave(q->timeq, client, 0); queuefree(q); } } /* remove cells based on flush criteria */ void snd_seq_queue_remove_cells(int client, struct snd_seq_remove_events *info) { int i; struct snd_seq_queue *q; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { q = queueptr(i); if (!q) continue; if (test_bit(client, q->clients_bitmap) && (! (info->remove_mode & SNDRV_SEQ_REMOVE_DEST) || q->queue == info->queue)) { snd_seq_prioq_remove_events(q->tickq, client, info); snd_seq_prioq_remove_events(q->timeq, client, info); } queuefree(q); } } /*----------------------------------------------------------------*/ /* * send events to all subscribed ports */ static void queue_broadcast_event(struct snd_seq_queue *q, struct snd_seq_event *ev, int atomic, int hop) { struct snd_seq_event sev; sev = *ev; sev.flags = SNDRV_SEQ_TIME_STAMP_TICK|SNDRV_SEQ_TIME_MODE_ABS; sev.time.tick = q->timer->tick.cur_tick; sev.queue = q->queue; sev.data.queue.queue = q->queue; /* broadcast events from Timer port */ sev.source.client = SNDRV_SEQ_CLIENT_SYSTEM; sev.source.port = SNDRV_SEQ_PORT_SYSTEM_TIMER; sev.dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS; snd_seq_kernel_client_dispatch(SNDRV_SEQ_CLIENT_SYSTEM, &sev, atomic, hop); } /* * process a received queue-control event. * this function is exported for seq_sync.c. */ static void snd_seq_queue_process_event(struct snd_seq_queue *q, struct snd_seq_event *ev, int atomic, int hop) { switch (ev->type) { case SNDRV_SEQ_EVENT_START: snd_seq_prioq_leave(q->tickq, ev->source.client, 1); snd_seq_prioq_leave(q->timeq, ev->source.client, 1); if (! snd_seq_timer_start(q->timer)) queue_broadcast_event(q, ev, atomic, hop); break; case SNDRV_SEQ_EVENT_CONTINUE: if (! snd_seq_timer_continue(q->timer)) queue_broadcast_event(q, ev, atomic, hop); break; case SNDRV_SEQ_EVENT_STOP: snd_seq_timer_stop(q->timer); queue_broadcast_event(q, ev, atomic, hop); break; case SNDRV_SEQ_EVENT_TEMPO: snd_seq_timer_set_tempo(q->timer, ev->data.queue.param.value); queue_broadcast_event(q, ev, atomic, hop); break; case SNDRV_SEQ_EVENT_SETPOS_TICK: if (snd_seq_timer_set_position_tick(q->timer, ev->data.queue.param.time.tick) == 0) { queue_broadcast_event(q, ev, atomic, hop); } break; case SNDRV_SEQ_EVENT_SETPOS_TIME: if (snd_seq_timer_set_position_time(q->timer, ev->data.queue.param.time.time) == 0) { queue_broadcast_event(q, ev, atomic, hop); } break; case SNDRV_SEQ_EVENT_QUEUE_SKEW: if (snd_seq_timer_set_skew(q->timer, ev->data.queue.param.skew.value, ev->data.queue.param.skew.base) == 0) { queue_broadcast_event(q, ev, atomic, hop); } break; } } /* * Queue control via timer control port: * this function is exported as a callback of timer port. */ int snd_seq_control_queue(struct snd_seq_event *ev, int atomic, int hop) { struct snd_seq_queue *q; if (snd_BUG_ON(!ev)) return -EINVAL; q = queueptr(ev->data.queue.queue); if (q == NULL) return -EINVAL; if (! queue_access_lock(q, ev->source.client)) { queuefree(q); return -EPERM; } snd_seq_queue_process_event(q, ev, atomic, hop); queue_access_unlock(q); queuefree(q); return 0; } /*----------------------------------------------------------------*/ #ifdef CONFIG_SND_PROC_FS /* exported to seq_info.c */ void snd_seq_info_queues_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { int i, bpm; struct snd_seq_queue *q; struct snd_seq_timer *tmr; bool locked; int owner; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { q = queueptr(i); if (!q) continue; tmr = q->timer; if (tmr->tempo) bpm = (60000 * tmr->tempo_base) / tmr->tempo; else bpm = 0; scoped_guard(spinlock_irq, &q->owner_lock) { locked = q->locked; owner = q->owner; } snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name); snd_iprintf(buffer, "owned by client : %d\n", owner); snd_iprintf(buffer, "lock status : %s\n", locked ? "Locked" : "Free"); snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq)); snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq)); snd_iprintf(buffer, "timer state : %s\n", tmr->running ? "Running" : "Stopped"); snd_iprintf(buffer, "timer PPQ : %d\n", tmr->ppq); snd_iprintf(buffer, "current tempo : %d\n", tmr->tempo); snd_iprintf(buffer, "tempo base : %d ns\n", tmr->tempo_base); snd_iprintf(buffer, "current BPM : %d\n", bpm); snd_iprintf(buffer, "current time : %d.%09d s\n", tmr->cur_time.tv_sec, tmr->cur_time.tv_nsec); snd_iprintf(buffer, "current tick : %d\n", tmr->tick.cur_tick); snd_iprintf(buffer, "\n"); queuefree(q); } } #endif /* CONFIG_SND_PROC_FS */
15 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 /* * netfilter module to limit the number of parallel tcp * connections per IP address. * (c) 2000 Gerd Knorr <kraxel@bytesex.org> * Nov 2002: Martin Bene <martin.bene@icomedias.com>: * only ignore TIME_WAIT or gone connections * (C) CC Computer Consultants GmbH, 2007 * * based on ... * * Kernel module to match connection tracking information. * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_connlimit.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_count.h> static bool connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct net *net = xt_net(par); const struct xt_connlimit_info *info = par->matchinfo; struct nf_conntrack_tuple tuple; const struct nf_conntrack_tuple *tuple_ptr = &tuple; const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int connections; u32 key[5]; ct = nf_ct_get(skb, &ctinfo); if (ct != NULL) { tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; zone = nf_ct_zone(ct); } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), xt_family(par), net, &tuple)) { goto hotdrop; } if (xt_family(par) == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); union nf_inet_addr addr; unsigned int i; memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? &iph->daddr : &iph->saddr, sizeof(addr.ip6)); for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i) addr.ip6[i] &= info->mask.ip6[i]; memcpy(key, &addr, sizeof(addr.ip6)); key[4] = zone->id; } else { const struct iphdr *iph = ip_hdr(skb); key[0] = (info->flags & XT_CONNLIMIT_DADDR) ? (__force __u32)iph->daddr : (__force __u32)iph->saddr; key[0] &= (__force __u32)info->mask.ip; key[1] = zone->id; } connections = nf_conncount_count(net, info->data, key, tuple_ptr, zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT); hotdrop: par->hotdrop = true; return false; } static int connlimit_mt_check(const struct xt_mtchk_param *par) { struct xt_connlimit_info *info = par->matchinfo; unsigned int keylen; keylen = sizeof(u32); if (par->family == NFPROTO_IPV6) keylen += sizeof(struct in6_addr); else keylen += sizeof(struct in_addr); /* init private data */ info->data = nf_conncount_init(par->net, par->family, keylen); return PTR_ERR_OR_ZERO(info->data); } static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_connlimit_info *info = par->matchinfo; nf_conncount_destroy(par->net, par->family, info->data); } static struct xt_match connlimit_mt_reg __read_mostly = { .name = "connlimit", .revision = 1, .family = NFPROTO_UNSPEC, .checkentry = connlimit_mt_check, .match = connlimit_mt, .matchsize = sizeof(struct xt_connlimit_info), .usersize = offsetof(struct xt_connlimit_info, data), .destroy = connlimit_mt_destroy, .me = THIS_MODULE, }; static int __init connlimit_mt_init(void) { return xt_register_match(&connlimit_mt_reg); } static void __exit connlimit_mt_exit(void) { xt_unregister_match(&connlimit_mt_reg); } module_init(connlimit_mt_init); module_exit(connlimit_mt_exit); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: Number of connections matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_connlimit"); MODULE_ALIAS("ip6t_connlimit");
17 4 48 6 53 26 26 17 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 // SPDX-License-Identifier: GPL-2.0 #include <linux/buffer_head.h> #include <linux/slab.h> #include "minix.h" enum {DEPTH = 3, DIRECT = 7}; /* Only double indirect */ typedef u16 block_t; /* 16 bit, host order */ static inline unsigned long block_to_cpu(block_t n) { return n; } static inline block_t cpu_to_block(unsigned long n) { return n; } static inline block_t *i_data(struct inode *inode) { return (block_t *)minix_i(inode)->u.i1_data; } static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) { int n = 0; if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, inode->i_sb->s_bdev); return 0; } if ((u64)block * BLOCK_SIZE >= inode->i_sb->s_maxbytes) return 0; if (block < 7) { offsets[n++] = block; } else if ((block -= 7) < 512) { offsets[n++] = 7; offsets[n++] = block; } else { block -= 512; offsets[n++] = 8; offsets[n++] = block>>9; offsets[n++] = block & 511; } return n; } #include "itree_common.c" int V1_minix_get_block(struct inode * inode, long block, struct buffer_head *bh_result, int create) { return get_block(inode, block, bh_result, create); } void V1_minix_truncate(struct inode * inode) { truncate(inode); } unsigned V1_minix_blocks(loff_t size, struct super_block *sb) { return nblocks(size, sb); }
485 37 493 235 426 498 477 479 19 19 5 5 5 5 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 /* * Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin * cleaned up code to current version of sparse and added the slicing-by-8 * algorithm to the closely similar existing slicing-by-4 algorithm. * * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com> * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks! * Code was from the public domain, copyright abandoned. Code was * subsequently included in the kernel, thus was re-licensed under the * GNU GPL v2. * * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com> * Same crc32 function was used in 5 other places in the kernel. * I made one version, and deleted the others. * There are various incantations of crc32(). Some use a seed of 0 or ~0. * Some xor at the end with ~0. The generic crc32() function takes * seed as an argument, and doesn't xor at the end. Then individual * users can do whatever they need. * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0. * fs/jffs2 uses seed 0, doesn't xor with ~0. * fs/partitions/efi.c uses seed ~0, xor's with ~0. * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ /* see: Documentation/staging/crc32.rst for a description of algorithms */ #include <linux/crc32.h> #include <linux/crc32poly.h> #include <linux/module.h> #include <linux/types.h> #include <linux/sched.h> #include "crc32defs.h" #if CRC_LE_BITS > 8 # define tole(x) ((__force u32) cpu_to_le32(x)) #else # define tole(x) (x) #endif #if CRC_BE_BITS > 8 # define tobe(x) ((__force u32) cpu_to_be32(x)) #else # define tobe(x) (x) #endif #include "crc32table.h" MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); MODULE_DESCRIPTION("Various CRC32 calculations"); MODULE_LICENSE("GPL"); #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 /* implements slicing-by-4 or slicing-by-8 algorithm */ static inline u32 __pure crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) { # ifdef __LITTLE_ENDIAN # define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8) # define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \ t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255]) # define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \ t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255]) # else # define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) # define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \ t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255]) # define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \ t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255]) # endif const u32 *b; size_t rem_len; # ifdef CONFIG_X86 size_t i; # endif const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3]; # if CRC_LE_BITS != 32 const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7]; # endif u32 q; /* Align it */ if (unlikely((long)buf & 3 && len)) { do { DO_CRC(*buf++); } while ((--len) && ((long)buf)&3); } # if CRC_LE_BITS == 32 rem_len = len & 3; len = len >> 2; # else rem_len = len & 7; len = len >> 3; # endif b = (const u32 *)buf; # ifdef CONFIG_X86 --b; for (i = 0; i < len; i++) { # else for (--b; len; --len) { # endif q = crc ^ *++b; /* use pre increment for speed */ # if CRC_LE_BITS == 32 crc = DO_CRC4; # else crc = DO_CRC8; q = *++b; crc ^= DO_CRC4; # endif } len = rem_len; /* And the last few bytes */ if (len) { u8 *p = (u8 *)(b + 1) - 1; # ifdef CONFIG_X86 for (i = 0; i < len; i++) DO_CRC(*++p); /* use pre increment for speed */ # else do { DO_CRC(*++p); /* use pre increment for speed */ } while (--len); # endif } return crc; #undef DO_CRC #undef DO_CRC4 #undef DO_CRC8 } #endif /** * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II * CRC32/CRC32C * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for other * uses, or the previous crc32/crc32c value if computing incrementally. * @p: pointer to buffer over which CRC32/CRC32C is run * @len: length of buffer @p * @tab: little-endian Ethernet table * @polynomial: CRC32/CRC32c LE polynomial */ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, size_t len, const u32 (*tab)[256], u32 polynomial) { #if CRC_LE_BITS == 1 int i; while (len--) { crc ^= *p++; for (i = 0; i < 8; i++) crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0); } # elif CRC_LE_BITS == 2 while (len--) { crc ^= *p++; crc = (crc >> 2) ^ tab[0][crc & 3]; crc = (crc >> 2) ^ tab[0][crc & 3]; crc = (crc >> 2) ^ tab[0][crc & 3]; crc = (crc >> 2) ^ tab[0][crc & 3]; } # elif CRC_LE_BITS == 4 while (len--) { crc ^= *p++; crc = (crc >> 4) ^ tab[0][crc & 15]; crc = (crc >> 4) ^ tab[0][crc & 15]; } # elif CRC_LE_BITS == 8 /* aka Sarwate algorithm */ while (len--) { crc ^= *p++; crc = (crc >> 8) ^ tab[0][crc & 255]; } # else crc = (__force u32) __cpu_to_le32(crc); crc = crc32_body(crc, p, len, tab); crc = __le32_to_cpu((__force __le32)crc); #endif return crc; } #if CRC_LE_BITS == 1 u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); } u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); } #else u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE); } u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE); } #endif EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be); /* * This multiplies the polynomials x and y modulo the given modulus. * This follows the "little-endian" CRC convention that the lsbit * represents the highest power of x, and the msbit represents x^0. */ static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus) { u32 product = x & 1 ? y : 0; int i; for (i = 0; i < 31; i++) { product = (product >> 1) ^ (product & 1 ? modulus : 0); x >>= 1; product ^= x & 1 ? y : 0; } return product; } /** * crc32_generic_shift - Append @len 0 bytes to crc, in logarithmic time * @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient) * @len: The number of bytes. @crc is multiplied by x^(8*@len) * @polynomial: The modulus used to reduce the result to 32 bits. * * It's possible to parallelize CRC computations by computing a CRC * over separate ranges of a buffer, then summing them. * This shifts the given CRC by 8*len bits (i.e. produces the same effect * as appending len bytes of zero to the data), in time proportional * to log(len). */ static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len, u32 polynomial) { u32 power = polynomial; /* CRC of x^32 */ int i; /* Shift up to 32 bits in the simple linear way */ for (i = 0; i < 8 * (int)(len & 3); i++) crc = (crc >> 1) ^ (crc & 1 ? polynomial : 0); len >>= 2; if (!len) return crc; for (;;) { /* "power" is x^(2^i), modulo the polynomial */ if (len & 1) crc = gf2_multiply(crc, power, polynomial); len >>= 1; if (!len) break; /* Square power, advancing to x^(2^(i+1)) */ power = gf2_multiply(power, power, polynomial); } return crc; } u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len) { return crc32_generic_shift(crc, len, CRC32_POLY_LE); } u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len) { return crc32_generic_shift(crc, len, CRC32C_POLY_LE); } EXPORT_SYMBOL(crc32_le_shift); EXPORT_SYMBOL(__crc32c_le_shift); /** * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for * other uses, or the previous crc32 value if computing incrementally. * @p: pointer to buffer over which CRC32 is run * @len: length of buffer @p * @tab: big-endian Ethernet table * @polynomial: CRC32 BE polynomial */ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, size_t len, const u32 (*tab)[256], u32 polynomial) { #if CRC_BE_BITS == 1 int i; while (len--) { crc ^= *p++ << 24; for (i = 0; i < 8; i++) crc = (crc << 1) ^ ((crc & 0x80000000) ? polynomial : 0); } # elif CRC_BE_BITS == 2 while (len--) { crc ^= *p++ << 24; crc = (crc << 2) ^ tab[0][crc >> 30]; crc = (crc << 2) ^ tab[0][crc >> 30]; crc = (crc << 2) ^ tab[0][crc >> 30]; crc = (crc << 2) ^ tab[0][crc >> 30]; } # elif CRC_BE_BITS == 4 while (len--) { crc ^= *p++ << 24; crc = (crc << 4) ^ tab[0][crc >> 28]; crc = (crc << 4) ^ tab[0][crc >> 28]; } # elif CRC_BE_BITS == 8 while (len--) { crc ^= *p++ << 24; crc = (crc << 8) ^ tab[0][crc >> 24]; } # else crc = (__force u32) __cpu_to_be32(crc); crc = crc32_body(crc, p, len, tab); crc = __be32_to_cpu((__force __be32)crc); # endif return crc; } #if CRC_BE_BITS == 1 u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) { return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE); } #else u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) { return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE); } #endif EXPORT_SYMBOL(crc32_be);
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 // SPDX-License-Identifier: GPL-2.0-only /* * Driver for the mt9m111 sensor * * Copyright (C) 2008 Erik Andrén * Copyright (C) 2007 Ilyes Gouta. Based on the m5603x Linux Driver Project. * Copyright (C) 2005 m5603x Linux Driver Project <m5602@x3ng.com.br> * * Portions of code to USB interface and ALi driver software, * Copyright (c) 2006 Willem Duinker * v4l2 interface modeled after the V4L2 driver * for SN9C10x PC Camera Controllers */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "m5602_mt9m111.h" static int mt9m111_s_ctrl(struct v4l2_ctrl *ctrl); static void mt9m111_dump_registers(struct sd *sd); static const unsigned char preinit_mt9m111[][4] = { {BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00}, {BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00}, {BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00}, {BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00}, {BRIDGE, M5602_XB_SENSOR_TYPE, 0x0d, 0x00}, {BRIDGE, M5602_XB_SENSOR_CTRL, 0x00, 0x00}, {BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00}, {BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00}, {SENSOR, MT9M111_PAGE_MAP, 0x00, 0x00}, {SENSOR, MT9M111_SC_RESET, MT9M111_RESET | MT9M111_RESTART | MT9M111_ANALOG_STANDBY | MT9M111_CHIP_DISABLE, MT9M111_SHOW_BAD_FRAMES | MT9M111_RESTART_BAD_FRAMES | MT9M111_SYNCHRONIZE_CHANGES}, {BRIDGE, M5602_XB_GPIO_DIR, 0x05, 0x00}, {BRIDGE, M5602_XB_GPIO_DAT, 0x04, 0x00}, {BRIDGE, M5602_XB_GPIO_EN_H, 0x3e, 0x00}, {BRIDGE, M5602_XB_GPIO_DIR_H, 0x3e, 0x00}, {BRIDGE, M5602_XB_GPIO_DAT_H, 0x02, 0x00}, {BRIDGE, M5602_XB_GPIO_EN_L, 0xff, 0x00}, {BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00}, {BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00}, {BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00}, {BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00}, {BRIDGE, M5602_XB_GPIO_DIR, 0x07, 0x00}, {BRIDGE, M5602_XB_GPIO_DAT, 0x0b, 0x00}, {BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00}, {BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00}, {BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a, 0x00} }; static const unsigned char init_mt9m111[][4] = { {BRIDGE, M5602_XB_MCU_CLK_DIV, 0x02, 0x00}, {BRIDGE, M5602_XB_MCU_CLK_CTRL, 0xb0, 0x00}, {BRIDGE, M5602_XB_SEN_CLK_DIV, 0x00, 0x00}, {BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00}, {BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00}, {BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00}, {BRIDGE, M5602_XB_GPIO_EN_H, 0x06, 0x00}, {BRIDGE, M5602_XB_GPIO_EN_L, 0x00, 0x00}, {BRIDGE, M5602_XB_GPIO_DAT, 0x04, 0x00}, {BRIDGE, M5602_XB_GPIO_DIR_H, 0x3e, 0x00}, {BRIDGE, M5602_XB_GPIO_DIR_L, 0xff, 0x00}, {BRIDGE, M5602_XB_GPIO_DAT_H, 0x02, 0x00}, {BRIDGE, M5602_XB_GPIO_DAT_L, 0x00, 0x00}, {BRIDGE, M5602_XB_GPIO_DIR, 0x07, 0x00}, {BRIDGE, M5602_XB_GPIO_DAT, 0x0b, 0x00}, {BRIDGE, M5602_XB_I2C_CLK_DIV, 0x0a, 0x00}, {SENSOR, MT9M111_SC_RESET, 0x00, 0x29}, {SENSOR, MT9M111_PAGE_MAP, 0x00, 0x00}, {SENSOR, MT9M111_SC_RESET, 0x00, 0x08}, {SENSOR, MT9M111_PAGE_MAP, 0x00, 0x01}, {SENSOR, MT9M111_CP_OPERATING_MODE_CTL, 0x00, MT9M111_CP_OPERATING_MODE_CTL}, {SENSOR, MT9M111_CP_LENS_CORRECTION_1, 0x04, 0x2a}, {SENSOR, MT9M111_CP_DEFECT_CORR_CONTEXT_A, 0x00, MT9M111_2D_DEFECT_CORRECTION_ENABLE}, {SENSOR, MT9M111_CP_DEFECT_CORR_CONTEXT_B, 0x00, MT9M111_2D_DEFECT_CORRECTION_ENABLE}, {SENSOR, MT9M111_CP_LUMA_OFFSET, 0x00, 0x00}, {SENSOR, MT9M111_CP_LUMA_CLIP, 0xff, 0x00}, {SENSOR, MT9M111_CP_OUTPUT_FORMAT_CTL2_CONTEXT_A, 0x14, 0x00}, {SENSOR, MT9M111_CP_OUTPUT_FORMAT_CTL2_CONTEXT_B, 0x14, 0x00}, {SENSOR, 0xcd, 0x00, 0x0e}, {SENSOR, 0xd0, 0x00, 0x40}, {SENSOR, MT9M111_PAGE_MAP, 0x00, 0x02}, {SENSOR, MT9M111_CC_AUTO_EXPOSURE_PARAMETER_18, 0x00, 0x00}, {SENSOR, MT9M111_CC_AWB_PARAMETER_7, 0xef, 0x03}, {SENSOR, MT9M111_PAGE_MAP, 0x00, 0x00}, {SENSOR, 0x33, 0x03, 0x49}, {SENSOR, 0x34, 0xc0, 0x19}, {SENSOR, 0x3f, 0x20, 0x20}, {SENSOR, 0x40, 0x20, 0x20}, {SENSOR, 0x5a, 0xc0, 0x0a}, {SENSOR, 0x70, 0x7b, 0x0a}, {SENSOR, 0x71, 0xff, 0x00}, {SENSOR, 0x72, 0x19, 0x0e}, {SENSOR, 0x73, 0x18, 0x0f}, {SENSOR, 0x74, 0x57, 0x32}, {SENSOR, 0x75, 0x56, 0x34}, {SENSOR, 0x76, 0x73, 0x35}, {SENSOR, 0x77, 0x30, 0x12}, {SENSOR, 0x78, 0x79, 0x02}, {SENSOR, 0x79, 0x75, 0x06}, {SENSOR, 0x7a, 0x77, 0x0a}, {SENSOR, 0x7b, 0x78, 0x09}, {SENSOR, 0x7c, 0x7d, 0x06}, {SENSOR, 0x7d, 0x31, 0x10}, {SENSOR, 0x7e, 0x00, 0x7e}, {SENSOR, 0x80, 0x59, 0x04}, {SENSOR, 0x81, 0x59, 0x04}, {SENSOR, 0x82, 0x57, 0x0a}, {SENSOR, 0x83, 0x58, 0x0b}, {SENSOR, 0x84, 0x47, 0x0c}, {SENSOR, 0x85, 0x48, 0x0e}, {SENSOR, 0x86, 0x5b, 0x02}, {SENSOR, 0x87, 0x00, 0x5c}, {SENSOR, MT9M111_CONTEXT_CONTROL, 0x00, MT9M111_SEL_CONTEXT_B}, {SENSOR, 0x60, 0x00, 0x80}, {SENSOR, 0x61, 0x00, 0x00}, {SENSOR, 0x62, 0x00, 0x00}, {SENSOR, 0x63, 0x00, 0x00}, {SENSOR, 0x64, 0x00, 0x00}, {SENSOR, MT9M111_SC_ROWSTART, 0x00, 0x0d}, /* 13 */ {SENSOR, MT9M111_SC_COLSTART, 0x00, 0x12}, /* 18 */ {SENSOR, MT9M111_SC_WINDOW_HEIGHT, 0x04, 0x00}, /* 1024 */ {SENSOR, MT9M111_SC_WINDOW_WIDTH, 0x05, 0x10}, /* 1296 */ {SENSOR, MT9M111_SC_HBLANK_CONTEXT_B, 0x01, 0x60}, /* 352 */ {SENSOR, MT9M111_SC_VBLANK_CONTEXT_B, 0x00, 0x11}, /* 17 */ {SENSOR, MT9M111_SC_HBLANK_CONTEXT_A, 0x01, 0x60}, /* 352 */ {SENSOR, MT9M111_SC_VBLANK_CONTEXT_A, 0x00, 0x11}, /* 17 */ {SENSOR, MT9M111_SC_R_MODE_CONTEXT_A, 0x01, 0x0f}, /* 271 */ {SENSOR, 0x30, 0x04, 0x00}, /* Set number of blank rows chosen to 400 */ {SENSOR, MT9M111_SC_SHUTTER_WIDTH, 0x01, 0x90}, }; static const unsigned char start_mt9m111[][4] = { {BRIDGE, M5602_XB_SEN_CLK_DIV, 0x06, 0x00}, {BRIDGE, M5602_XB_SEN_CLK_CTRL, 0xb0, 0x00}, {BRIDGE, M5602_XB_ADC_CTRL, 0xc0, 0x00}, {BRIDGE, M5602_XB_SENSOR_TYPE, 0x09, 0x00}, {BRIDGE, M5602_XB_LINE_OF_FRAME_H, 0x81, 0x00}, {BRIDGE, M5602_XB_PIX_OF_LINE_H, 0x82, 0x00}, {BRIDGE, M5602_XB_SIG_INI, 0x01, 0x00}, {BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00}, {BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00}, {BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00}, {BRIDGE, M5602_XB_VSYNC_PARA, 0x00, 0x00}, }; static struct v4l2_pix_format mt9m111_modes[] = { { 640, 480, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .sizeimage = 640 * 480, .bytesperline = 640, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0 } }; static const struct v4l2_ctrl_ops mt9m111_ctrl_ops = { .s_ctrl = mt9m111_s_ctrl, }; static const struct v4l2_ctrl_config mt9m111_greenbal_cfg = { .ops = &mt9m111_ctrl_ops, .id = M5602_V4L2_CID_GREEN_BALANCE, .name = "Green Balance", .type = V4L2_CTRL_TYPE_INTEGER, .min = 0, .max = 0x7ff, .step = 1, .def = MT9M111_GREEN_GAIN_DEFAULT, .flags = V4L2_CTRL_FLAG_SLIDER, }; int mt9m111_probe(struct sd *sd) { u8 data[2] = {0x00, 0x00}; int i, err; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; if (force_sensor) { if (force_sensor == MT9M111_SENSOR) { pr_info("Forcing a %s sensor\n", mt9m111.name); goto sensor_found; } /* If we want to force another sensor, don't try to probe this * one */ return -ENODEV; } gspca_dbg(gspca_dev, D_PROBE, "Probing for a mt9m111 sensor\n"); /* Do the preinit */ for (i = 0; i < ARRAY_SIZE(preinit_mt9m111); i++) { if (preinit_mt9m111[i][0] == BRIDGE) { err = m5602_write_bridge(sd, preinit_mt9m111[i][1], preinit_mt9m111[i][2]); } else { data[0] = preinit_mt9m111[i][2]; data[1] = preinit_mt9m111[i][3]; err = m5602_write_sensor(sd, preinit_mt9m111[i][1], data, 2); } if (err < 0) return err; } if (m5602_read_sensor(sd, MT9M111_SC_CHIPVER, data, 2)) return -ENODEV; if ((data[0] == 0x14) && (data[1] == 0x3a)) { pr_info("Detected a mt9m111 sensor\n"); goto sensor_found; } return -ENODEV; sensor_found: sd->gspca_dev.cam.cam_mode = mt9m111_modes; sd->gspca_dev.cam.nmodes = ARRAY_SIZE(mt9m111_modes); return 0; } int mt9m111_init(struct sd *sd) { int i, err = 0; /* Init the sensor */ for (i = 0; i < ARRAY_SIZE(init_mt9m111) && !err; i++) { u8 data[2]; if (init_mt9m111[i][0] == BRIDGE) { err = m5602_write_bridge(sd, init_mt9m111[i][1], init_mt9m111[i][2]); } else { data[0] = init_mt9m111[i][2]; data[1] = init_mt9m111[i][3]; err = m5602_write_sensor(sd, init_mt9m111[i][1], data, 2); } } if (dump_sensor) mt9m111_dump_registers(sd); return 0; } int mt9m111_init_controls(struct sd *sd) { struct v4l2_ctrl_handler *hdl = &sd->gspca_dev.ctrl_handler; sd->gspca_dev.vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 7); sd->auto_white_bal = v4l2_ctrl_new_std(hdl, &mt9m111_ctrl_ops, V4L2_CID_AUTO_WHITE_BALANCE, 0, 1, 1, 0); sd->green_bal = v4l2_ctrl_new_custom(hdl, &mt9m111_greenbal_cfg, NULL); sd->red_bal = v4l2_ctrl_new_std(hdl, &mt9m111_ctrl_ops, V4L2_CID_RED_BALANCE, 0, 0x7ff, 1, MT9M111_RED_GAIN_DEFAULT); sd->blue_bal = v4l2_ctrl_new_std(hdl, &mt9m111_ctrl_ops, V4L2_CID_BLUE_BALANCE, 0, 0x7ff, 1, MT9M111_BLUE_GAIN_DEFAULT); v4l2_ctrl_new_std(hdl, &mt9m111_ctrl_ops, V4L2_CID_GAIN, 0, (INITIAL_MAX_GAIN - 1) * 2 * 2 * 2, 1, MT9M111_DEFAULT_GAIN); sd->hflip = v4l2_ctrl_new_std(hdl, &mt9m111_ctrl_ops, V4L2_CID_HFLIP, 0, 1, 1, 0); sd->vflip = v4l2_ctrl_new_std(hdl, &mt9m111_ctrl_ops, V4L2_CID_VFLIP, 0, 1, 1, 0); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } v4l2_ctrl_auto_cluster(4, &sd->auto_white_bal, 0, false); v4l2_ctrl_cluster(2, &sd->hflip); return 0; } int mt9m111_start(struct sd *sd) { int i, err = 0; u8 data[2]; struct cam *cam = &sd->gspca_dev.cam; struct gspca_dev *gspca_dev = (struct gspca_dev *)sd; int width = cam->cam_mode[sd->gspca_dev.curr_mode].width - 1; int height = cam->cam_mode[sd->gspca_dev.curr_mode].height; for (i = 0; i < ARRAY_SIZE(start_mt9m111) && !err; i++) { if (start_mt9m111[i][0] == BRIDGE) { err = m5602_write_bridge(sd, start_mt9m111[i][1], start_mt9m111[i][2]); } else { data[0] = start_mt9m111[i][2]; data[1] = start_mt9m111[i][3]; err = m5602_write_sensor(sd, start_mt9m111[i][1], data, 2); } } if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_VSYNC_PARA, (height >> 8) & 0xff); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_VSYNC_PARA, (height & 0xff)); if (err < 0) return err; for (i = 0; i < 2 && !err; i++) err = m5602_write_bridge(sd, M5602_XB_VSYNC_PARA, 0); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_SIG_INI, 0); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_SIG_INI, 2); if (err < 0) return err; for (i = 0; i < 2 && !err; i++) err = m5602_write_bridge(sd, M5602_XB_HSYNC_PARA, 0); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_HSYNC_PARA, (width >> 8) & 0xff); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_HSYNC_PARA, width & 0xff); if (err < 0) return err; err = m5602_write_bridge(sd, M5602_XB_SIG_INI, 0); if (err < 0) return err; switch (width) { case 640: gspca_dbg(gspca_dev, D_CONF, "Configuring camera for VGA mode\n"); break; case 320: gspca_dbg(gspca_dev, D_CONF, "Configuring camera for QVGA mode\n"); break; } return err; } void mt9m111_disconnect(struct sd *sd) { sd->sensor = NULL; } static int mt9m111_set_hvflip(struct gspca_dev *gspca_dev) { int err; u8 data[2] = {0x00, 0x00}; struct sd *sd = (struct sd *) gspca_dev; int hflip; int vflip; gspca_dbg(gspca_dev, D_CONF, "Set hvflip to %d %d\n", sd->hflip->val, sd->vflip->val); /* The mt9m111 is flipped by default */ hflip = !sd->hflip->val; vflip = !sd->vflip->val; /* Set the correct page map */ err = m5602_write_sensor(sd, MT9M111_PAGE_MAP, data, 2); if (err < 0) return err; data[0] = MT9M111_RMB_OVER_SIZED; if (gspca_dev->pixfmt.width == 640) { data[1] = MT9M111_RMB_ROW_SKIP_2X | MT9M111_RMB_COLUMN_SKIP_2X | (hflip << 1) | vflip; } else { data[1] = MT9M111_RMB_ROW_SKIP_4X | MT9M111_RMB_COLUMN_SKIP_4X | (hflip << 1) | vflip; } err = m5602_write_sensor(sd, MT9M111_SC_R_MODE_CONTEXT_B, data, 2); return err; } static int mt9m111_set_auto_white_balance(struct gspca_dev *gspca_dev, __s32 val) { struct sd *sd = (struct sd *) gspca_dev; int err; u8 data[2]; err = m5602_read_sensor(sd, MT9M111_CP_OPERATING_MODE_CTL, data, 2); if (err < 0) return err; data[1] = ((data[1] & 0xfd) | ((val & 0x01) << 1)); err = m5602_write_sensor(sd, MT9M111_CP_OPERATING_MODE_CTL, data, 2); gspca_dbg(gspca_dev, D_CONF, "Set auto white balance %d\n", val); return err; } static int mt9m111_set_gain(struct gspca_dev *gspca_dev, __s32 val) { int err, tmp; u8 data[2] = {0x00, 0x00}; struct sd *sd = (struct sd *) gspca_dev; /* Set the correct page map */ err = m5602_write_sensor(sd, MT9M111_PAGE_MAP, data, 2); if (err < 0) return err; if (val >= INITIAL_MAX_GAIN * 2 * 2 * 2) return -EINVAL; if ((val >= INITIAL_MAX_GAIN * 2 * 2) && (val < (INITIAL_MAX_GAIN - 1) * 2 * 2 * 2)) tmp = (1 << 10) | (val << 9) | (val << 8) | (val / 8); else if ((val >= INITIAL_MAX_GAIN * 2) && (val < INITIAL_MAX_GAIN * 2 * 2)) tmp = (1 << 9) | (1 << 8) | (val / 4); else if ((val >= INITIAL_MAX_GAIN) && (val < INITIAL_MAX_GAIN * 2)) tmp = (1 << 8) | (val / 2); else tmp = val; data[1] = (tmp & 0xff); data[0] = (tmp & 0xff00) >> 8; gspca_dbg(gspca_dev, D_CONF, "tmp=%d, data[1]=%d, data[0]=%d\n", tmp, data[1], data[0]); err = m5602_write_sensor(sd, MT9M111_SC_GLOBAL_GAIN, data, 2); return err; } static int mt9m111_set_green_balance(struct gspca_dev *gspca_dev, __s32 val) { int err; u8 data[2]; struct sd *sd = (struct sd *) gspca_dev; data[1] = (val & 0xff); data[0] = (val & 0xff00) >> 8; gspca_dbg(gspca_dev, D_CONF, "Set green balance %d\n", val); err = m5602_write_sensor(sd, MT9M111_SC_GREEN_1_GAIN, data, 2); if (err < 0) return err; return m5602_write_sensor(sd, MT9M111_SC_GREEN_2_GAIN, data, 2); } static int mt9m111_set_blue_balance(struct gspca_dev *gspca_dev, __s32 val) { u8 data[2]; struct sd *sd = (struct sd *) gspca_dev; data[1] = (val & 0xff); data[0] = (val & 0xff00) >> 8; gspca_dbg(gspca_dev, D_CONF, "Set blue balance %d\n", val); return m5602_write_sensor(sd, MT9M111_SC_BLUE_GAIN, data, 2); } static int mt9m111_set_red_balance(struct gspca_dev *gspca_dev, __s32 val) { u8 data[2]; struct sd *sd = (struct sd *) gspca_dev; data[1] = (val & 0xff); data[0] = (val & 0xff00) >> 8; gspca_dbg(gspca_dev, D_CONF, "Set red balance %d\n", val); return m5602_write_sensor(sd, MT9M111_SC_RED_GAIN, data, 2); } static int mt9m111_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *) gspca_dev; int err; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_AUTO_WHITE_BALANCE: err = mt9m111_set_auto_white_balance(gspca_dev, ctrl->val); if (err || ctrl->val) return err; err = mt9m111_set_green_balance(gspca_dev, sd->green_bal->val); if (err) return err; err = mt9m111_set_red_balance(gspca_dev, sd->red_bal->val); if (err) return err; err = mt9m111_set_blue_balance(gspca_dev, sd->blue_bal->val); break; case V4L2_CID_GAIN: err = mt9m111_set_gain(gspca_dev, ctrl->val); break; case V4L2_CID_HFLIP: err = mt9m111_set_hvflip(gspca_dev); break; default: return -EINVAL; } return err; } static void mt9m111_dump_registers(struct sd *sd) { u8 address, value[2] = {0x00, 0x00}; pr_info("Dumping the mt9m111 register state\n"); pr_info("Dumping the mt9m111 sensor core registers\n"); value[1] = MT9M111_SENSOR_CORE; m5602_write_sensor(sd, MT9M111_PAGE_MAP, value, 2); for (address = 0; address < 0xff; address++) { m5602_read_sensor(sd, address, value, 2); pr_info("register 0x%x contains 0x%x%x\n", address, value[0], value[1]); } pr_info("Dumping the mt9m111 color pipeline registers\n"); value[1] = MT9M111_COLORPIPE; m5602_write_sensor(sd, MT9M111_PAGE_MAP, value, 2); for (address = 0; address < 0xff; address++) { m5602_read_sensor(sd, address, value, 2); pr_info("register 0x%x contains 0x%x%x\n", address, value[0], value[1]); } pr_info("Dumping the mt9m111 camera control registers\n"); value[1] = MT9M111_CAMERA_CONTROL; m5602_write_sensor(sd, MT9M111_PAGE_MAP, value, 2); for (address = 0; address < 0xff; address++) { m5602_read_sensor(sd, address, value, 2); pr_info("register 0x%x contains 0x%x%x\n", address, value[0], value[1]); } pr_info("mt9m111 register state dump complete\n"); }
5 5 1 5 1 1 1 1 1 1 1 1 1 1 1 3 3 1 1 8 2 6 3 1 1 2 2 2 1 1 3 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 // SPDX-License-Identifier: GPL-2.0-only /* * vivid-vbi-cap.c - vbi capture support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/videodev2.h> #include <media/v4l2-common.h> #include "vivid-core.h" #include "vivid-kthread-cap.h" #include "vivid-vbi-cap.h" #include "vivid-vbi-gen.h" #include "vivid-vid-common.h" static void vivid_sliced_vbi_cap_fill(struct vivid_dev *dev, unsigned seqnr) { struct vivid_vbi_gen_data *vbi_gen = &dev->vbi_gen; bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; vivid_vbi_gen_sliced(vbi_gen, is_60hz, seqnr); if (!is_60hz) { if (vivid_vid_can_loop(dev)) { if (dev->vbi_out_have_wss) { vbi_gen->data[12].data[0] = dev->vbi_out_wss[0]; vbi_gen->data[12].data[1] = dev->vbi_out_wss[1]; } else { vbi_gen->data[12].id = 0; } } else { switch (tpg_g_video_aspect(&dev->tpg)) { case TPG_VIDEO_ASPECT_14X9_CENTRE: vbi_gen->data[12].data[0] = 0x01; break; case TPG_VIDEO_ASPECT_16X9_CENTRE: vbi_gen->data[12].data[0] = 0x0b; break; case TPG_VIDEO_ASPECT_16X9_ANAMORPHIC: vbi_gen->data[12].data[0] = 0x07; break; case TPG_VIDEO_ASPECT_4X3: default: vbi_gen->data[12].data[0] = 0x08; break; } } } else if (vivid_vid_can_loop(dev) && is_60hz) { if (dev->vbi_out_have_cc[0]) { vbi_gen->data[0].data[0] = dev->vbi_out_cc[0][0]; vbi_gen->data[0].data[1] = dev->vbi_out_cc[0][1]; } else { vbi_gen->data[0].id = 0; } if (dev->vbi_out_have_cc[1]) { vbi_gen->data[1].data[0] = dev->vbi_out_cc[1][0]; vbi_gen->data[1].data[1] = dev->vbi_out_cc[1][1]; } else { vbi_gen->data[1].id = 0; } } } static void vivid_g_fmt_vbi_cap(struct vivid_dev *dev, struct v4l2_vbi_format *vbi) { bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; vbi->sampling_rate = 27000000; vbi->offset = 24; vbi->samples_per_line = 1440; vbi->sample_format = V4L2_PIX_FMT_GREY; vbi->start[0] = is_60hz ? V4L2_VBI_ITU_525_F1_START + 9 : V4L2_VBI_ITU_625_F1_START + 5; vbi->start[1] = is_60hz ? V4L2_VBI_ITU_525_F2_START + 9 : V4L2_VBI_ITU_625_F2_START + 5; vbi->count[0] = vbi->count[1] = is_60hz ? 12 : 18; vbi->flags = dev->vbi_cap_interlaced ? V4L2_VBI_INTERLACED : 0; vbi->reserved[0] = 0; vbi->reserved[1] = 0; } void vivid_raw_vbi_cap_process(struct vivid_dev *dev, struct vivid_buffer *buf) { struct v4l2_vbi_format vbi; u8 *vbuf = vb2_plane_vaddr(&buf->vb.vb2_buf, 0); vivid_g_fmt_vbi_cap(dev, &vbi); buf->vb.sequence = dev->vbi_cap_seq_count; if (dev->field_cap == V4L2_FIELD_ALTERNATE) buf->vb.sequence /= 2; vivid_sliced_vbi_cap_fill(dev, buf->vb.sequence); memset(vbuf, 0x10, vb2_plane_size(&buf->vb.vb2_buf, 0)); if (!VIVID_INVALID_SIGNAL(dev->std_signal_mode[dev->input])) vivid_vbi_gen_raw(&dev->vbi_gen, &vbi, vbuf); } void vivid_sliced_vbi_cap_process(struct vivid_dev *dev, struct vivid_buffer *buf) { struct v4l2_sliced_vbi_data *vbuf = vb2_plane_vaddr(&buf->vb.vb2_buf, 0); buf->vb.sequence = dev->vbi_cap_seq_count; if (dev->field_cap == V4L2_FIELD_ALTERNATE) buf->vb.sequence /= 2; vivid_sliced_vbi_cap_fill(dev, buf->vb.sequence); memset(vbuf, 0, vb2_plane_size(&buf->vb.vb2_buf, 0)); if (!VIVID_INVALID_SIGNAL(dev->std_signal_mode[dev->input])) { unsigned i; for (i = 0; i < 25; i++) vbuf[i] = dev->vbi_gen.data[i]; } } static int vbi_cap_queue_setup(struct vb2_queue *vq, unsigned *nbuffers, unsigned *nplanes, unsigned sizes[], struct device *alloc_devs[]) { struct vivid_dev *dev = vb2_get_drv_priv(vq); bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; unsigned size = vq->type == V4L2_BUF_TYPE_SLICED_VBI_CAPTURE ? 36 * sizeof(struct v4l2_sliced_vbi_data) : 1440 * 2 * (is_60hz ? 12 : 18); if (!vivid_is_sdtv_cap(dev)) return -EINVAL; if (*nplanes) return sizes[0] < size ? -EINVAL : 0; sizes[0] = size; *nplanes = 1; return 0; } static int vbi_cap_buf_prepare(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; unsigned size = vb->vb2_queue->type == V4L2_BUF_TYPE_SLICED_VBI_CAPTURE ? 36 * sizeof(struct v4l2_sliced_vbi_data) : 1440 * 2 * (is_60hz ? 12 : 18); dprintk(dev, 1, "%s\n", __func__); if (dev->buf_prepare_error) { /* * Error injection: test what happens if buf_prepare() returns * an error. */ dev->buf_prepare_error = false; return -EINVAL; } if (vb2_plane_size(vb, 0) < size) { dprintk(dev, 1, "%s data will not fit into plane (%lu < %u)\n", __func__, vb2_plane_size(vb, 0), size); return -EINVAL; } vb2_set_plane_payload(vb, 0, size); return 0; } static void vbi_cap_buf_queue(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); struct vivid_buffer *buf = container_of(vbuf, struct vivid_buffer, vb); dprintk(dev, 1, "%s\n", __func__); spin_lock(&dev->slock); list_add_tail(&buf->list, &dev->vbi_cap_active); spin_unlock(&dev->slock); } static int vbi_cap_start_streaming(struct vb2_queue *vq, unsigned count) { struct vivid_dev *dev = vb2_get_drv_priv(vq); int err; dprintk(dev, 1, "%s\n", __func__); dev->vbi_cap_seq_count = 0; if (dev->start_streaming_error) { dev->start_streaming_error = false; err = -EINVAL; } else { err = vivid_start_generating_vid_cap(dev, &dev->vbi_cap_streaming); } if (err) { struct vivid_buffer *buf, *tmp; list_for_each_entry_safe(buf, tmp, &dev->vbi_cap_active, list) { list_del(&buf->list); vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_QUEUED); } } return err; } /* abort streaming and wait for last buffer */ static void vbi_cap_stop_streaming(struct vb2_queue *vq) { struct vivid_dev *dev = vb2_get_drv_priv(vq); dprintk(dev, 1, "%s\n", __func__); vivid_stop_generating_vid_cap(dev, &dev->vbi_cap_streaming); } static void vbi_cap_buf_request_complete(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); v4l2_ctrl_request_complete(vb->req_obj.req, &dev->ctrl_hdl_vbi_cap); } const struct vb2_ops vivid_vbi_cap_qops = { .queue_setup = vbi_cap_queue_setup, .buf_prepare = vbi_cap_buf_prepare, .buf_queue = vbi_cap_buf_queue, .start_streaming = vbi_cap_start_streaming, .stop_streaming = vbi_cap_stop_streaming, .buf_request_complete = vbi_cap_buf_request_complete, .wait_prepare = vb2_ops_wait_prepare, .wait_finish = vb2_ops_wait_finish, }; int vidioc_g_fmt_vbi_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_vbi_format *vbi = &f->fmt.vbi; if (!vivid_is_sdtv_cap(dev) || !dev->has_raw_vbi_cap) return -EINVAL; vivid_g_fmt_vbi_cap(dev, vbi); return 0; } int vidioc_s_fmt_vbi_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); int ret = vidioc_g_fmt_vbi_cap(file, priv, f); if (ret) return ret; if (f->type != V4L2_BUF_TYPE_VBI_CAPTURE && vb2_is_busy(&dev->vb_vbi_cap_q)) return -EBUSY; return 0; } void vivid_fill_service_lines(struct v4l2_sliced_vbi_format *vbi, u32 service_set) { vbi->io_size = sizeof(struct v4l2_sliced_vbi_data) * 36; vbi->service_set = service_set; memset(vbi->service_lines, 0, sizeof(vbi->service_lines)); memset(vbi->reserved, 0, sizeof(vbi->reserved)); if (vbi->service_set == 0) return; if (vbi->service_set & V4L2_SLICED_CAPTION_525) { vbi->service_lines[0][21] = V4L2_SLICED_CAPTION_525; vbi->service_lines[1][21] = V4L2_SLICED_CAPTION_525; } if (vbi->service_set & V4L2_SLICED_WSS_625) { unsigned i; for (i = 7; i <= 18; i++) vbi->service_lines[0][i] = vbi->service_lines[1][i] = V4L2_SLICED_TELETEXT_B; vbi->service_lines[0][23] = V4L2_SLICED_WSS_625; } } int vidioc_g_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_format *fmt) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_sliced_vbi_format *vbi = &fmt->fmt.sliced; if (!vivid_is_sdtv_cap(dev) || !dev->has_sliced_vbi_cap) return -EINVAL; vivid_fill_service_lines(vbi, dev->service_set_cap); return 0; } int vidioc_try_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_format *fmt) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_sliced_vbi_format *vbi = &fmt->fmt.sliced; bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; u32 service_set = vbi->service_set; if (!vivid_is_sdtv_cap(dev) || !dev->has_sliced_vbi_cap) return -EINVAL; service_set &= is_60hz ? V4L2_SLICED_CAPTION_525 : V4L2_SLICED_WSS_625 | V4L2_SLICED_TELETEXT_B; vivid_fill_service_lines(vbi, service_set); return 0; } int vidioc_s_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_format *fmt) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_sliced_vbi_format *vbi = &fmt->fmt.sliced; int ret = vidioc_try_fmt_sliced_vbi_cap(file, fh, fmt); if (ret) return ret; if (fmt->type != V4L2_BUF_TYPE_SLICED_VBI_CAPTURE && vb2_is_busy(&dev->vb_vbi_cap_q)) return -EBUSY; dev->service_set_cap = vbi->service_set; return 0; } int vidioc_g_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_sliced_vbi_cap *cap) { struct vivid_dev *dev = video_drvdata(file); struct video_device *vdev = video_devdata(file); bool is_60hz; if (vdev->vfl_dir == VFL_DIR_RX) { is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; if (!vivid_is_sdtv_cap(dev) || !dev->has_sliced_vbi_cap || cap->type != V4L2_BUF_TYPE_SLICED_VBI_CAPTURE) return -EINVAL; } else { is_60hz = dev->std_out & V4L2_STD_525_60; if (!vivid_is_svid_out(dev) || !dev->has_sliced_vbi_out || cap->type != V4L2_BUF_TYPE_SLICED_VBI_OUTPUT) return -EINVAL; } cap->service_set = is_60hz ? V4L2_SLICED_CAPTION_525 : V4L2_SLICED_WSS_625 | V4L2_SLICED_TELETEXT_B; if (is_60hz) { cap->service_lines[0][21] = V4L2_SLICED_CAPTION_525; cap->service_lines[1][21] = V4L2_SLICED_CAPTION_525; } else { unsigned i; for (i = 7; i <= 18; i++) cap->service_lines[0][i] = cap->service_lines[1][i] = V4L2_SLICED_TELETEXT_B; cap->service_lines[0][23] = V4L2_SLICED_WSS_625; } return 0; }
25 7 8 11 78 78 14 14 14 14 9 5 14 14 14 1 13 30 30 1 29 25 26 14 12 2 3 12 2 10 29 29 29 2 26 49 49 2 47 40 7 40 12 28 27 27 27 6 6 3 3 2 55 2 123 3 35 1 1 1 309 310 22 231 309 13 271 65 10 220 230 227 2 1 230 217 189 4 4 4 1 1 38 38 38 32 6 32 32 1 6 4 1 5 4 28 27 27 27 4 4 2 2 2 308 308 276 276 20 229 45 13 250 53 217 218 152 151 217 43 43 15 78 78 78 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS An implementation of the IP virtual server support for the * LINUX operating system. IPVS is now implemented as a module * over the Netfilter framework. IPVS can be used to build a * high-performance and highly available server based on a * cluster of servers. * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * Julian Anastasov <ja@ssi.bg> * * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese, * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms * and others. * * Changes: * Paul `Rusty' Russell properly handle non-linear skbs * Harald Welte don't use nfcache */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/ip.h> #include <linux/tcp.h> #include <linux/sctp.h> #include <linux/icmp.h> #include <linux/slab.h> #include <net/ip.h> #include <net/tcp.h> #include <net/udp.h> #include <net/icmp.h> /* for icmp_send */ #include <net/gue.h> #include <net/gre.h> #include <net/route.h> #include <net/ip6_checksum.h> #include <net/netns/generic.h> /* net_generic() */ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #ifdef CONFIG_IP_VS_IPV6 #include <net/ipv6.h> #include <linux/netfilter_ipv6.h> #include <net/ip6_route.h> #endif #include <net/ip_vs.h> #include <linux/indirect_call_wrapper.h> EXPORT_SYMBOL(register_ip_vs_scheduler); EXPORT_SYMBOL(unregister_ip_vs_scheduler); EXPORT_SYMBOL(ip_vs_proto_name); EXPORT_SYMBOL(ip_vs_conn_new); EXPORT_SYMBOL(ip_vs_conn_in_get); EXPORT_SYMBOL(ip_vs_conn_out_get); #ifdef CONFIG_IP_VS_PROTO_TCP EXPORT_SYMBOL(ip_vs_tcp_conn_listen); #endif EXPORT_SYMBOL(ip_vs_conn_put); #ifdef CONFIG_IP_VS_DEBUG EXPORT_SYMBOL(ip_vs_get_debug_level); #endif EXPORT_SYMBOL(ip_vs_new_conn_out); #if defined(CONFIG_IP_VS_PROTO_TCP) && defined(CONFIG_IP_VS_PROTO_UDP) #define SNAT_CALL(f, ...) \ INDIRECT_CALL_2(f, tcp_snat_handler, udp_snat_handler, __VA_ARGS__) #elif defined(CONFIG_IP_VS_PROTO_TCP) #define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, tcp_snat_handler, __VA_ARGS__) #elif defined(CONFIG_IP_VS_PROTO_UDP) #define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, udp_snat_handler, __VA_ARGS__) #else #define SNAT_CALL(f, ...) f(__VA_ARGS__) #endif static unsigned int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); /* ID used in ICMP lookups */ #define icmp_id(icmph) (((icmph)->un).echo.id) #define icmpv6_id(icmph) (icmph->icmp6_dataun.u_echo.identifier) const char *ip_vs_proto_name(unsigned int proto) { static char buf[20]; switch (proto) { case IPPROTO_IP: return "IP"; case IPPROTO_UDP: return "UDP"; case IPPROTO_TCP: return "TCP"; case IPPROTO_SCTP: return "SCTP"; case IPPROTO_ICMP: return "ICMP"; #ifdef CONFIG_IP_VS_IPV6 case IPPROTO_ICMPV6: return "ICMPv6"; #endif default: sprintf(buf, "IP_%u", proto); return buf; } } void ip_vs_init_hash_table(struct list_head *table, int rows) { while (--rows >= 0) INIT_LIST_HEAD(&table[rows]); } static inline void ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; struct netns_ipvs *ipvs = cp->ipvs; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; struct ip_vs_service *svc; local_bh_disable(); s = this_cpu_ptr(dest->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.inpkts); u64_stats_add(&s->cnt.inbytes, skb->len); u64_stats_update_end(&s->syncp); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.inpkts); u64_stats_add(&s->cnt.inbytes, skb->len); u64_stats_update_end(&s->syncp); s = this_cpu_ptr(ipvs->tot_stats->s.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.inpkts); u64_stats_add(&s->cnt.inbytes, skb->len); u64_stats_update_end(&s->syncp); local_bh_enable(); } } static inline void ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; struct netns_ipvs *ipvs = cp->ipvs; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; struct ip_vs_service *svc; local_bh_disable(); s = this_cpu_ptr(dest->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.outpkts); u64_stats_add(&s->cnt.outbytes, skb->len); u64_stats_update_end(&s->syncp); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.outpkts); u64_stats_add(&s->cnt.outbytes, skb->len); u64_stats_update_end(&s->syncp); s = this_cpu_ptr(ipvs->tot_stats->s.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.outpkts); u64_stats_add(&s->cnt.outbytes, skb->len); u64_stats_update_end(&s->syncp); local_bh_enable(); } } static inline void ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) { struct netns_ipvs *ipvs = svc->ipvs; struct ip_vs_cpu_stats *s; local_bh_disable(); s = this_cpu_ptr(cp->dest->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.conns); u64_stats_update_end(&s->syncp); s = this_cpu_ptr(svc->stats.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.conns); u64_stats_update_end(&s->syncp); s = this_cpu_ptr(ipvs->tot_stats->s.cpustats); u64_stats_update_begin(&s->syncp); u64_stats_inc(&s->cnt.conns); u64_stats_update_end(&s->syncp); local_bh_enable(); } static inline void ip_vs_set_state(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) { if (likely(pd->pp->state_transition)) pd->pp->state_transition(cp, direction, skb, pd); } static inline int ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, struct sk_buff *skb, int protocol, const union nf_inet_addr *caddr, __be16 cport, const union nf_inet_addr *vaddr, __be16 vport, struct ip_vs_conn_param *p) { ip_vs_conn_fill_param(svc->ipvs, svc->af, protocol, caddr, cport, vaddr, vport, p); p->pe = rcu_dereference(svc->pe); if (p->pe && p->pe->fill_param) return p->pe->fill_param(p, skb); return 0; } /* * IPVS persistent scheduling function * It creates a connection entry according to its template if exists, * or selects a server and creates a connection entry plus a template. * Locking: we are svc user (svc->refcnt), so we hold all dests too * Protocols supported: TCP, UDP */ static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, struct sk_buff *skb, __be16 src_port, __be16 dst_port, int *ignored, struct ip_vs_iphdr *iph) { struct ip_vs_conn *cp = NULL; struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport = 0; /* destination port to forward */ unsigned int flags; struct ip_vs_conn_param param; const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; union nf_inet_addr snet; /* source network of the client, after masking */ const union nf_inet_addr *src_addr, *dst_addr; if (likely(!ip_vs_iph_inverse(iph))) { src_addr = &iph->saddr; dst_addr = &iph->daddr; } else { src_addr = &iph->daddr; dst_addr = &iph->saddr; } /* Mask saddr with the netmask to adjust template granularity */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) ipv6_addr_prefix(&snet.in6, &src_addr->in6, (__force __u32) svc->netmask); else #endif snet.ip = src_addr->ip & svc->netmask; IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " "mnet %s\n", IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port), IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port), IP_VS_DBG_ADDR(svc->af, &snet)); /* * As far as we know, FTP is a very complicated network protocol, and * it uses control connection and data connections. For active FTP, * FTP server initialize data connection to the client, its source port * is often 20. For passive FTP, FTP server tells the clients the port * that it passively listens to, and the client issues the data * connection. In the tunneling or direct routing mode, the load * balancer is on the client-to-server half of connection, the port * number is unknown to the load balancer. So, a conn template like * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP * service, and a template like <caddr, 0, vaddr, vport, daddr, dport> * is created for other persistent services. */ { int protocol = iph->protocol; const union nf_inet_addr *vaddr = dst_addr; __be16 vport = 0; if (dst_port == svc->port) { /* non-FTP template: * <protocol, caddr, 0, vaddr, vport, daddr, dport> * FTP template: * <protocol, caddr, 0, vaddr, 0, daddr, 0> */ if (svc->port != FTPPORT) vport = dst_port; } else { /* Note: persistent fwmark-based services and * persistent port zero service are handled here. * fwmark template: * <IPPROTO_IP,caddr,0,fwmark,0,daddr,0> * port zero template: * <protocol,caddr,0,vaddr,0,daddr,0> */ if (svc->fwmark) { protocol = IPPROTO_IP; vaddr = &fwmark; } } /* return *ignored = -1 so NF_DROP can be used */ if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, vaddr, vport, &param) < 0) { *ignored = -1; return NULL; } } /* Check if a template already exists */ ct = ip_vs_ct_in_get(&param); if (!ct || !ip_vs_check_template(ct, NULL)) { struct ip_vs_scheduler *sched; /* * No template found or the dest of the connection * template is not available. * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); if (sched) { /* read svc->sched_data after svc->scheduler */ smp_rmb(); dest = sched->schedule(svc, skb, iph); } else { dest = NULL; } if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); *ignored = 0; return NULL; } if (dst_port == svc->port && svc->port != FTPPORT) dport = dest->port; /* Create a template * This adds param.pe_data to the template, * and thus param.pe_data will be destroyed * when the template expires */ ct = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, IP_VS_CONN_F_TEMPLATE, dest, skb->mark); if (ct == NULL) { kfree(param.pe_data); *ignored = -1; return NULL; } ct->timeout = svc->timeout; } else { /* set destination with the found template */ dest = ct->dest; kfree(param.pe_data); } dport = dst_port; if (dport == svc->port && dest->port) dport = dest->port; flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; /* * Create a new connection according to the template */ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, src_addr, src_port, dst_addr, dst_port, &param); cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { ip_vs_conn_put(ct); *ignored = -1; return NULL; } /* * Add its control */ ip_vs_control_add(cp, ct); ip_vs_conn_put(ct); ip_vs_conn_stats(cp, svc); return cp; } /* * IPVS main scheduling function * It selects a server according to the virtual service, and * creates a connection entry. * Protocols supported: TCP, UDP * * Usage of *ignored * * 1 : protocol tried to schedule (eg. on SYN), found svc but the * svc/scheduler decides that this packet should be accepted with * NF_ACCEPT because it must not be scheduled. * * 0 : scheduler can not find destination, so try bypass or * return ICMP and then NF_DROP (ip_vs_leave). * * -1 : scheduler tried to schedule but fatal error occurred, eg. * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param * failure such as missing Call-ID, ENOMEM on skb_linearize * or pe_data. In this case we should return NF_DROP without * any attempts to send ICMP with ip_vs_leave. */ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *ignored, struct ip_vs_iphdr *iph) { struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; struct ip_vs_scheduler *sched; struct ip_vs_dest *dest; __be16 _ports[2], *pptr, cport, vport; const void *caddr, *vaddr; unsigned int flags; *ignored = 1; /* * IPv6 frags, only the first hit here. */ pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (pptr == NULL) return NULL; if (likely(!ip_vs_iph_inverse(iph))) { cport = pptr[0]; caddr = &iph->saddr; vport = pptr[1]; vaddr = &iph->daddr; } else { cport = pptr[1]; caddr = &iph->daddr; vport = pptr[0]; vaddr = &iph->saddr; } /* * FTPDATA needs this check when using local real server. * Never schedule Active FTPDATA connections from real server. * For LVS-NAT they must be already created. For other methods * with persistence the connection is created on SYN+ACK. */ if (cport == FTPDATA) { IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, "Not scheduling FTPDATA"); return NULL; } /* * Do not schedule replies from local real server. */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) { iph->hdr_flags ^= IP_VS_HDR_INVERSE; cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, svc->ipvs, svc->af, skb, iph); iph->hdr_flags ^= IP_VS_HDR_INVERSE; if (cp) { IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, "Not scheduling reply for existing" " connection"); __ip_vs_conn_put(cp); return NULL; } } /* * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) return ip_vs_sched_persist(svc, skb, cport, vport, ignored, iph); *ignored = 0; /* * Non-persistent service */ if (!svc->fwmark && vport != svc->port) { if (!svc->port) pr_err("Schedule: port zero only supported " "in persistent services, " "check your ipvs configuration\n"); return NULL; } sched = rcu_dereference(svc->scheduler); if (sched) { /* read svc->sched_data after svc->scheduler */ smp_rmb(); dest = sched->schedule(svc, skb, iph); } else { dest = NULL; } if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; } flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; /* * Create a connection entry. */ { struct ip_vs_conn_param p; ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, caddr, cport, vaddr, vport, &p); cp = ip_vs_conn_new(&p, dest->af, &dest->addr, dest->port ? dest->port : vport, flags, dest, skb->mark); if (!cp) { *ignored = -1; return NULL; } } IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " "d:%s:%u conn->flags:%X conn->refcnt:%d\n", ip_vs_fwd_tag(cp), IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), cp->flags, refcount_read(&cp->refcnt)); ip_vs_conn_stats(cp, svc); return cp; } static inline int ip_vs_addr_is_unicast(struct net *net, int af, union nf_inet_addr *addr) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST; #endif return (inet_addr_type(net, addr->ip) == RTN_UNICAST); } /* * Pass or drop the packet. * Called by ip_vs_in, when the virtual service is available but * no destination is available for a new connection. */ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph) { __be16 _ports[2], *pptr, dport; struct netns_ipvs *ipvs = svc->ipvs; struct net *net = ipvs->net; pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (!pptr) return NF_DROP; dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0]; /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create a cache_bypass connection entry */ if (sysctl_cache_bypass(ipvs) && svc->fwmark && !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) && ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) { int ret; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; /* create a new connection entry */ IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { struct ip_vs_conn_param p; ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, &iph->saddr, pptr[0], &iph->daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, svc->af, &daddr, 0, IP_VS_CONN_F_BYPASS | flags, NULL, skb->mark); if (!cp) return NF_DROP; } /* statistics */ ip_vs_in_stats(cp, skb); /* set state */ ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); /* transmit the first SYN packet */ ret = cp->packet_xmit(skb, cp, pd->pp, iph); /* do not touch skb anymore */ if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control) atomic_inc(&cp->control->in_pkts); else atomic_inc(&cp->in_pkts); ip_vs_conn_put(cp); return ret; } /* * When the virtual ftp service is presented, packets destined * for other services on the VIP may get here (except services * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ if (svc->port == FTPPORT && dport != FTPPORT) return NF_ACCEPT; if (unlikely(ip_vs_iph_icmp(iph))) return NF_DROP; /* * Notify the client that the destination is unreachable, and * release the socket buffer. * Since it is in IP layer, the TCP socket is not actually * created, the TCP RST packet cannot be sent, instead that * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) { if (!skb->dev) skb->dev = net->loopback_dev; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); } else #endif icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); return NF_DROP; } #ifdef CONFIG_SYSCTL static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return ipvs->sysctl_snat_reroute; } static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return ipvs->sysctl_nat_icmp_send; } #else static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; } static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; } #endif __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); } static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum) { if (NF_INET_LOCAL_IN == hooknum) return IP_DEFRAG_VS_IN; if (NF_INET_FORWARD == hooknum) return IP_DEFRAG_VS_FWD; return IP_DEFRAG_VS_OUT; } static inline int ip_vs_gather_frags(struct netns_ipvs *ipvs, struct sk_buff *skb, u_int32_t user) { int err; local_bh_disable(); err = ip_defrag(ipvs->net, skb, user); local_bh_enable(); if (!err) ip_send_check(ip_hdr(skb)); return err; } static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, unsigned int hooknum) { if (!sysctl_snat_reroute(ipvs)) return 0; /* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */ if (NF_INET_LOCAL_IN == hooknum) return 0; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { struct dst_entry *dst = skb_dst(skb); if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0) return 1; } else #endif if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0) return 1; return 0; } /* * Packet has been made sufficiently writable in caller * - inout: 1=in->out, 0=out->in */ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int inout) { struct iphdr *iph = ip_hdr(skb); unsigned int icmp_offset = iph->ihl*4; struct icmphdr *icmph = (struct icmphdr *)(skb_network_header(skb) + icmp_offset); struct iphdr *ciph = (struct iphdr *)(icmph + 1); if (inout) { iph->saddr = cp->vaddr.ip; ip_send_check(iph); ciph->daddr = cp->vaddr.ip; ip_send_check(ciph); } else { iph->daddr = cp->daddr.ip; ip_send_check(iph); ciph->saddr = cp->daddr.ip; ip_send_check(ciph); } /* the TCP/UDP/SCTP port */ if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol || IPPROTO_SCTP == ciph->protocol) { __be16 *ports = (void *)ciph + ciph->ihl*4; if (inout) ports[1] = cp->vport; else ports[0] = cp->dport; } /* And finally the ICMP checksum */ icmph->checksum = 0; icmph->checksum = ip_vs_checksum_complete(skb, icmp_offset); skb->ip_summed = CHECKSUM_UNNECESSARY; if (inout) IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph, "Forwarding altered outgoing ICMP"); else IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph, "Forwarding altered incoming ICMP"); } #ifdef CONFIG_IP_VS_IPV6 void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int inout) { struct ipv6hdr *iph = ipv6_hdr(skb); unsigned int icmp_offset = 0; unsigned int offs = 0; /* header offset*/ int protocol; struct icmp6hdr *icmph; struct ipv6hdr *ciph; unsigned short fragoffs; ipv6_find_hdr(skb, &icmp_offset, IPPROTO_ICMPV6, &fragoffs, NULL); icmph = (struct icmp6hdr *)(skb_network_header(skb) + icmp_offset); offs = icmp_offset + sizeof(struct icmp6hdr); ciph = (struct ipv6hdr *)(skb_network_header(skb) + offs); protocol = ipv6_find_hdr(skb, &offs, -1, &fragoffs, NULL); if (inout) { iph->saddr = cp->vaddr.in6; ciph->daddr = cp->vaddr.in6; } else { iph->daddr = cp->daddr.in6; ciph->saddr = cp->daddr.in6; } /* the TCP/UDP/SCTP port */ if (!fragoffs && (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol || IPPROTO_SCTP == protocol)) { __be16 *ports = (void *)(skb_network_header(skb) + offs); IP_VS_DBG(11, "%s() changed port %d to %d\n", __func__, ntohs(inout ? ports[1] : ports[0]), ntohs(inout ? cp->vport : cp->dport)); if (inout) ports[1] = cp->vport; else ports[0] = cp->dport; } /* And finally the ICMP checksum */ icmph->icmp6_cksum = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, skb->len - icmp_offset, IPPROTO_ICMPV6, 0); skb->csum_start = skb_network_header(skb) - skb->head + icmp_offset; skb->csum_offset = offsetof(struct icmp6hdr, icmp6_cksum); skb->ip_summed = CHECKSUM_PARTIAL; if (inout) IP_VS_DBG_PKT(11, AF_INET6, pp, skb, (void *)ciph - (void *)iph, "Forwarding altered outgoing ICMPv6"); else IP_VS_DBG_PKT(11, AF_INET6, pp, skb, (void *)ciph - (void *)iph, "Forwarding altered incoming ICMPv6"); } #endif /* Handle relevant response ICMP messages - forward to the right * destination host. */ static int handle_response_icmp(int af, struct sk_buff *skb, union nf_inet_addr *snet, __u8 protocol, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, unsigned int offset, unsigned int ihl, unsigned int hooknum) { unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) goto after_nat; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { /* Failed checksum! */ IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n", IP_VS_DBG_ADDR(af, snet)); goto out; } if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol || IPPROTO_SCTP == protocol) offset += 2 * sizeof(__u16); if (skb_ensure_writable(skb, offset)) goto out; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ip_vs_nat_icmp_v6(skb, pp, cp, 1); else #endif ip_vs_nat_icmp(skb, pp, cp, 1); if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto out; after_nat: /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); skb->ipvs_property = 1; if (!(cp->flags & IP_VS_CONN_F_NFCT)) ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); verdict = NF_ACCEPT; out: __ip_vs_conn_put(cp); return verdict; } /* * Handle ICMP messages in the inside-to-outside direction (outgoing). * Find any that might be relevant, check against existing connections. * Currently handles error types - unreachable, quench, ttl exceeded. */ static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, unsigned int hooknum) { struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl; union nf_inet_addr snet; *related = 1; /* reassemble IP fragments */ if (ip_is_fragment(ip_hdr(skb))) { if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } iph = ip_hdr(skb); offset = ihl = iph->ihl * 4; ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %pI4->%pI4\n", ic->type, ntohs(icmp_id(ic)), &iph->saddr, &iph->daddr); /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if ((ic->type != ICMP_DEST_UNREACH) && (ic->type != ICMP_SOURCE_QUENCH) && (ic->type != ICMP_TIME_EXCEEDED)) { *related = 0; return NF_ACCEPT; } /* Now find the contained IP header */ offset += sizeof(_icmph); cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ pp = ip_vs_proto_get(cih->protocol); if (!pp) return NF_ACCEPT; /* Is the embedded protocol header present? */ if (unlikely(cih->frag_off & htons(IP_OFFSET) && pp->dont_defrag)) return NF_ACCEPT; IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking outgoing ICMP for"); ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph); /* The embedded headers contain source and dest in reverse order */ cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, ipvs, AF_INET, skb, &ciph); if (!cp) return NF_ACCEPT; snet.ip = iph->saddr; return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, pp, ciph.len, ihl, hooknum); } #ifdef CONFIG_IP_VS_IPV6 static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, unsigned int hooknum, struct ip_vs_iphdr *ipvsh) { struct icmp6hdr _icmph, *ic; struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; union nf_inet_addr snet; unsigned int offset; *related = 1; ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) { *related = 0; return NF_ACCEPT; } /* Fragment header that is before ICMP header tells us that: * it's not an error message since they can't be fragmented. */ if (ipvsh->flags & IP6_FH_F_FRAG) return NF_DROP; IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), &ipvsh->saddr, &ipvsh->daddr); if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph), true, &ciph)) return NF_ACCEPT; /* The packet looks wrong, ignore */ pp = ip_vs_proto_get(ciph.protocol); if (!pp) return NF_ACCEPT; /* The embedded headers contain source and dest in reverse order */ cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, ipvs, AF_INET6, skb, &ciph); if (!cp) return NF_ACCEPT; snet.in6 = ciph.saddr.in6; offset = ciph.len; return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, pp, offset, sizeof(struct ipv6hdr), hooknum); } #endif /* * Check if sctp chunc is ABORT chunk */ static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len) { struct sctp_chunkhdr *sch, schunk; sch = skb_header_pointer(skb, nh_len + sizeof(struct sctphdr), sizeof(schunk), &schunk); if (sch == NULL) return 0; if (sch->type == SCTP_CID_ABORT) return 1; return 0; } static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) { struct tcphdr _tcph, *th; th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph); if (th == NULL) return 0; return th->rst; } static inline bool is_new_conn(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { switch (iph->protocol) { case IPPROTO_TCP: { struct tcphdr _tcph, *th; th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); if (th == NULL) return false; return th->syn; } case IPPROTO_SCTP: { struct sctp_chunkhdr *sch, schunk; sch = skb_header_pointer(skb, iph->len + sizeof(struct sctphdr), sizeof(schunk), &schunk); if (sch == NULL) return false; return sch->type == SCTP_CID_INIT; } default: return false; } } static inline bool is_new_conn_expected(const struct ip_vs_conn *cp, int conn_reuse_mode) { /* Controlled (FTP DATA or persistence)? */ if (cp->control) return false; switch (cp->protocol) { case IPPROTO_TCP: return (cp->state == IP_VS_TCP_S_TIME_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE) || ((conn_reuse_mode & 2) && (cp->state == IP_VS_TCP_S_FIN_WAIT) && (cp->flags & IP_VS_CONN_F_NOOUTPUT)); case IPPROTO_SCTP: return cp->state == IP_VS_SCTP_S_CLOSED; default: return false; } } /* Generic function to create new connections for outgoing RS packets * * Pre-requisites for successful connection creation: * 1) Virtual Service is NOT fwmark based: * In fwmark-VS actual vaddr and vport are unknown to IPVS * 2) Real Server and Virtual Service were NOT configured without port: * This is to allow match of different VS to the same RS ip-addr */ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct sk_buff *skb, const struct ip_vs_iphdr *iph, __be16 dport, __be16 cport) { struct ip_vs_conn_param param; struct ip_vs_conn *ct = NULL, *cp = NULL; const union nf_inet_addr *vaddr, *daddr, *caddr; union nf_inet_addr snet; __be16 vport; unsigned int flags; vaddr = &svc->addr; vport = svc->port; daddr = &iph->saddr; caddr = &iph->daddr; /* check pre-requisites are satisfied */ if (svc->fwmark) return NULL; if (!vport || !dport) return NULL; /* for persistent service first create connection template */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) { /* apply netmask the same way ingress-side does */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) ipv6_addr_prefix(&snet.in6, &caddr->in6, (__force __u32)svc->netmask); else #endif snet.ip = caddr->ip & svc->netmask; /* fill params and create template if not existent */ if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol, &snet, 0, vaddr, vport, &param) < 0) return NULL; ct = ip_vs_ct_in_get(&param); /* check if template exists and points to the same dest */ if (!ct || !ip_vs_check_template(ct, dest)) { ct = ip_vs_conn_new(&param, dest->af, daddr, dport, IP_VS_CONN_F_TEMPLATE, dest, 0); if (!ct) { kfree(param.pe_data); return NULL; } ct->timeout = svc->timeout; } else { kfree(param.pe_data); } } /* connection flags */ flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; /* create connection */ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, caddr, cport, vaddr, vport, &param); cp = ip_vs_conn_new(&param, dest->af, daddr, dport, flags, dest, 0); if (!cp) { if (ct) ip_vs_conn_put(ct); return NULL; } if (ct) { ip_vs_control_add(cp, ct); ip_vs_conn_put(ct); } ip_vs_conn_stats(cp, svc); /* return connection (will be used to handle outgoing packet) */ IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u " "d:%s:%u conn->flags:%X conn->refcnt:%d\n", ip_vs_fwd_tag(cp), IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), cp->flags, refcount_read(&cp->refcnt)); return cp; } /* Handle outgoing packets which are considered requests initiated by * real servers, so that subsequent responses from external client can be * routed to the right real server. * Used also for outgoing responses in OPS mode. * * Connection management is handled by persistent-engine specific callback. */ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, struct netns_ipvs *ipvs, int af, struct sk_buff *skb, const struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_conn *cp = NULL; __be16 _ports[2], *pptr; if (hooknum == NF_INET_LOCAL_IN) return NULL; pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports); if (!pptr) return NULL; dest = ip_vs_find_real_service(ipvs, af, iph->protocol, &iph->saddr, pptr[0]); if (dest) { struct ip_vs_service *svc; struct ip_vs_pe *pe; svc = rcu_dereference(dest->svc); if (svc) { pe = rcu_dereference(svc->pe); if (pe && pe->conn_out) cp = pe->conn_out(svc, dest, skb, iph, pptr[0], pptr[1]); } } return cp; } /* Handle response packets: rewrite addresses and send away... */ static unsigned int handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph, unsigned int hooknum) { struct ip_vs_protocol *pp = pd->pp; if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) goto after_nat; IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); if (skb_ensure_writable(skb, iph->len)) goto drop; /* mangle the packet */ if (pp->snat_handler && !SNAT_CALL(pp->snat_handler, skb, pp, cp, iph)) goto drop; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ipv6_hdr(skb)->saddr = cp->vaddr.in6; else #endif { ip_hdr(skb)->saddr = cp->vaddr.ip; ip_send_check(ip_hdr(skb)); } /* * nf_iterate does not expect change in the skb->dst->dev. * It looks like it is not fatal to enable this code for hooks * where our handlers are at the end of the chain list and * when all next handlers use skb->dst->dev and not outdev. * It will definitely route properly the inout NAT traffic * when multiple paths are used. */ /* For policy routing, packets originating from this * machine itself may be routed differently to packets * passing through. We want this packet to be routed as * if it came from this machine itself. So re-compute * the routing information. */ if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto drop; IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT"); after_nat: ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); skb->ipvs_property = 1; if (!(cp->flags & IP_VS_CONN_F_NFCT)) ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); ip_vs_conn_put(cp); return NF_ACCEPT; drop: ip_vs_conn_put(cp); kfree_skb(skb); return NF_STOLEN; } /* * Check if outgoing packet belongs to the established ip_vs_conn. */ static unsigned int ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct netns_ipvs *ipvs = net_ipvs(state->net); unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int af = state->pf; struct sock *sk; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) return NF_ACCEPT; sk = skb_to_full_sk(skb); /* Bad... Do not break raw sockets */ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT && af == AF_INET)) { if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk)) return NF_ACCEPT; } if (unlikely(!skb_dst(skb))) return NF_ACCEPT; if (!ipvs->enable) return NF_ACCEPT; ip_vs_fill_iph_skb(af, skb, false, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_out_icmp_v6(ipvs, skb, &related, hooknum, &iph); if (related) return verdict; } } else #endif if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related; int verdict = ip_vs_out_icmp(ipvs, skb, &related, hooknum); if (related) return verdict; } pd = ip_vs_proto_data_get(ipvs, iph.protocol); if (unlikely(!pd)) return NF_ACCEPT; pp = pd->pp; /* reassemble IP fragments */ #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET) #endif if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; ip_vs_fill_iph_skb(AF_INET, skb, false, &iph); } /* * Check if the packet belongs to an existing entry */ cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, ipvs, af, skb, &iph); if (likely(cp)) return handle_response(af, skb, pd, cp, &iph, hooknum); /* Check for real-server-started requests */ if (atomic_read(&ipvs->conn_out_counter)) { /* Currently only for UDP: * connection oriented protocols typically use * ephemeral ports for outgoing connections, so * related incoming responses would not match any VS */ if (pp->protocol == IPPROTO_UDP) { cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph); if (likely(cp)) return handle_response(af, skb, pd, cp, &iph, hooknum); } } if (sysctl_nat_icmp_send(ipvs) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { __be16 _ports[2], *pptr; pptr = frag_safe_skb_hp(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr, pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST * packet or not TCP packet. */ if ((iph.protocol != IPPROTO_TCP && iph.protocol != IPPROTO_SCTP) || ((iph.protocol == IPPROTO_TCP && !is_tcp_reset(skb, iph.len)) || (iph.protocol == IPPROTO_SCTP && !is_sctp_abort(skb, iph.len)))) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (!skb->dev) skb->dev = ipvs->net->loopback_dev; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); } else #endif icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); return NF_DROP; } } } IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; } static unsigned int ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { struct ip_vs_protocol *pp = pd->pp; if (!iph->fragoffs) { /* No (second) fragments need to enter here, as nf_defrag_ipv6 * replayed fragment zero will already have created the cp */ /* Schedule and create new connection entry into cpp */ if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph)) return 0; } if (unlikely(!*cpp)) { /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, iph->off, "ip_vs_in: packet continues traversal as normal"); /* Fragment couldn't be mapped to a conn entry */ if (iph->fragoffs) IP_VS_DBG_PKT(7, af, pp, skb, iph->off, "unhandled fragment"); *verdict = NF_ACCEPT; return 0; } return 1; } /* Check the UDP tunnel and return its header length */ static int ipvs_udp_decap(struct netns_ipvs *ipvs, struct sk_buff *skb, unsigned int offset, __u16 af, const union nf_inet_addr *daddr, __u8 *proto) { struct udphdr _udph, *udph; struct ip_vs_dest *dest; udph = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); if (!udph) goto unk; offset += sizeof(struct udphdr); dest = ip_vs_find_tunnel(ipvs, af, daddr, udph->dest); if (!dest) goto unk; if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { struct guehdr _gueh, *gueh; gueh = skb_header_pointer(skb, offset, sizeof(_gueh), &_gueh); if (!gueh) goto unk; if (gueh->control != 0 || gueh->version != 0) goto unk; /* Later we can support also IPPROTO_IPV6 */ if (gueh->proto_ctype != IPPROTO_IPIP) goto unk; *proto = gueh->proto_ctype; return sizeof(struct udphdr) + sizeof(struct guehdr) + (gueh->hlen << 2); } unk: return 0; } /* Check the GRE tunnel and return its header length */ static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb, unsigned int offset, __u16 af, const union nf_inet_addr *daddr, __u8 *proto) { struct gre_base_hdr _greh, *greh; struct ip_vs_dest *dest; greh = skb_header_pointer(skb, offset, sizeof(_greh), &_greh); if (!greh) goto unk; dest = ip_vs_find_tunnel(ipvs, af, daddr, 0); if (!dest) goto unk; if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { IP_TUNNEL_DECLARE_FLAGS(flags); __be16 type; /* Only support version 0 and C (csum) */ if ((greh->flags & ~GRE_CSUM) != 0) goto unk; type = greh->protocol; /* Later we can support also IPPROTO_IPV6 */ if (type != htons(ETH_P_IP)) goto unk; *proto = IPPROTO_IPIP; gre_flags_to_tnl_flags(flags, greh->flags); return gre_calc_hlen(flags); } unk: return 0; } /* * Handle ICMP messages in the outside-to-inside direction (incoming). * Find any that might be relevant, check against existing connections, * forward to the right destination host if relevant. * Currently handles error types - unreachable, quench, ttl exceeded. */ static int ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, unsigned int hooknum) { struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; unsigned int offset, offset2, ihl, verdict; bool tunnel, new_cp = false; union nf_inet_addr *raddr; char *outer_proto = "IPIP"; *related = 1; /* reassemble IP fragments */ if (ip_is_fragment(ip_hdr(skb))) { if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } iph = ip_hdr(skb); offset = ihl = iph->ihl * 4; ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; IP_VS_DBG(12, "Incoming ICMP (%d,%d) %pI4->%pI4\n", ic->type, ntohs(icmp_id(ic)), &iph->saddr, &iph->daddr); /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if ((ic->type != ICMP_DEST_UNREACH) && (ic->type != ICMP_SOURCE_QUENCH) && (ic->type != ICMP_TIME_EXCEEDED)) { *related = 0; return NF_ACCEPT; } /* Now find the contained IP header */ offset += sizeof(_icmph); cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ raddr = (union nf_inet_addr *)&cih->daddr; /* Special case for errors for IPIP/UDP/GRE tunnel packets */ tunnel = false; if (cih->protocol == IPPROTO_IPIP) { struct ip_vs_dest *dest; if (unlikely(cih->frag_off & htons(IP_OFFSET))) return NF_ACCEPT; /* Error for our IPIP must arrive at LOCAL_IN */ if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL)) return NF_ACCEPT; dest = ip_vs_find_tunnel(ipvs, AF_INET, raddr, 0); /* Only for known tunnel */ if (!dest || dest->tun_type != IP_VS_CONN_F_TUNNEL_TYPE_IPIP) return NF_ACCEPT; offset += cih->ihl * 4; cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ tunnel = true; } else if ((cih->protocol == IPPROTO_UDP || /* Can be UDP encap */ cih->protocol == IPPROTO_GRE) && /* Can be GRE encap */ /* Error for our tunnel must arrive at LOCAL_IN */ (skb_rtable(skb)->rt_flags & RTCF_LOCAL)) { __u8 iproto; int ulen; /* Non-first fragment has no UDP/GRE header */ if (unlikely(cih->frag_off & htons(IP_OFFSET))) return NF_ACCEPT; offset2 = offset + cih->ihl * 4; if (cih->protocol == IPPROTO_UDP) { ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET, raddr, &iproto); outer_proto = "UDP"; } else { ulen = ipvs_gre_decap(ipvs, skb, offset2, AF_INET, raddr, &iproto); outer_proto = "GRE"; } if (ulen > 0) { /* Skip IP and UDP/GRE tunnel headers */ offset = offset2 + ulen; /* Now we should be at the original IP header */ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); if (cih && cih->version == 4 && cih->ihl >= 5 && iproto == IPPROTO_IPIP) tunnel = true; else return NF_ACCEPT; } } pd = ip_vs_proto_data_get(ipvs, cih->protocol); if (!pd) return NF_ACCEPT; pp = pd->pp; /* Is the embedded protocol header present? */ if (unlikely(cih->frag_off & htons(IP_OFFSET) && pp->dont_defrag)) return NF_ACCEPT; IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking incoming ICMP for"); offset2 = offset; ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !tunnel, &ciph); offset = ciph.len; /* The embedded headers contain source and dest in reverse order. * For IPIP/UDP/GRE tunnel this is error for request, not for reply. */ cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, ipvs, AF_INET, skb, &ciph); if (!cp) { int v; if (tunnel || !sysctl_schedule_icmp(ipvs)) return NF_ACCEPT; if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph)) return v; new_cp = true; } verdict = NF_DROP; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { /* Failed checksum! */ IP_VS_DBG(1, "Incoming ICMP: failed checksum from %pI4!\n", &iph->saddr); goto out; } if (tunnel) { __be32 info = ic->un.gateway; __u8 type = ic->type; __u8 code = ic->code; /* Update the MTU */ if (ic->type == ICMP_DEST_UNREACH && ic->code == ICMP_FRAG_NEEDED) { struct ip_vs_dest *dest = cp->dest; u32 mtu = ntohs(ic->un.frag.mtu); __be16 frag_off = cih->frag_off; /* Strip outer IP and ICMP, go to IPIP/UDP/GRE header */ if (pskb_pull(skb, ihl + sizeof(_icmph)) == NULL) goto ignore_tunnel; offset2 -= ihl + sizeof(_icmph); skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for %s %pI4->%pI4: mtu=%u\n", outer_proto, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); ipv4_update_pmtu(skb, ipvs->net, mtu, 0, 0); /* Client uses PMTUD? */ if (!(frag_off & htons(IP_DF))) goto ignore_tunnel; /* Prefer the resulting PMTU */ if (dest) { struct ip_vs_dest_dst *dest_dst; dest_dst = rcu_dereference(dest->dest_dst); if (dest_dst) mtu = dst_mtu(dest_dst->dst_cache); } if (mtu > 68 + sizeof(struct iphdr)) mtu -= sizeof(struct iphdr); info = htonl(mtu); } /* Strip outer IP, ICMP and IPIP/UDP/GRE, go to IP header of * original request. */ if (pskb_pull(skb, offset2) == NULL) goto ignore_tunnel; skb_reset_network_header(skb); IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, type, code, ntohl(info)); icmp_send(skb, type, code, info); /* ICMP can be shorter but anyways, account it */ ip_vs_out_stats(cp, skb); ignore_tunnel: consume_skb(skb); verdict = NF_STOLEN; goto out; } /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol || IPPROTO_SCTP == cih->protocol) offset += 2 * sizeof(__u16); verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph); out: if (likely(!new_cp)) __ip_vs_conn_put(cp); else ip_vs_conn_put(cp); return verdict; } #ifdef CONFIG_IP_VS_IPV6 static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, unsigned int hooknum, struct ip_vs_iphdr *iph) { struct icmp6hdr _icmph, *ic; struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; unsigned int offset, verdict; bool new_cp = false; *related = 1; ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) { *related = 0; return NF_ACCEPT; } /* Fragment header that is before ICMP header tells us that: * it's not an error message since they can't be fragmented. */ if (iph->flags & IP6_FH_F_FRAG) return NF_DROP; IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), &iph->saddr, &iph->daddr); offset = iph->len + sizeof(_icmph); if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph)) return NF_ACCEPT; pd = ip_vs_proto_data_get(ipvs, ciph.protocol); if (!pd) return NF_ACCEPT; pp = pd->pp; /* Cannot handle fragmented embedded protocol */ if (ciph.fragoffs) return NF_ACCEPT; IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, "Checking incoming ICMPv6 for"); /* The embedded headers contain source and dest in reverse order * if not from localhost */ cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, ipvs, AF_INET6, skb, &ciph); if (!cp) { int v; if (!sysctl_schedule_icmp(ipvs)) return NF_ACCEPT; if (!ip_vs_try_to_schedule(ipvs, AF_INET6, skb, pd, &v, &cp, &ciph)) return v; new_cp = true; } /* VS/TUN, VS/DR and LOCALNODE just let it go */ if ((hooknum == NF_INET_LOCAL_OUT) && (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) { verdict = NF_ACCEPT; goto out; } /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); /* Need to mangle contained IPv6 header in ICMPv6 packet */ offset = ciph.len; if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol || IPPROTO_SCTP == ciph.protocol) offset += 2 * sizeof(__u16); /* Also mangle ports */ verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph); out: if (likely(!new_cp)) __ip_vs_conn_put(cp); else ip_vs_conn_put(cp); return verdict; } #endif /* * Check if it's for virtual services, look it up, * and send it on its way... */ static unsigned int ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct netns_ipvs *ipvs = net_ipvs(state->net); unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, pkts; struct sock *sk; int af = state->pf; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) return NF_ACCEPT; /* * Big tappo: * - remote client: only PACKET_HOST * - route: used for struct net when skb->dev is unset */ if (unlikely((skb->pkt_type != PACKET_HOST && hooknum != NF_INET_LOCAL_OUT) || !skb_dst(skb))) { ip_vs_fill_iph_skb(af, skb, false, &iph); IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" " ignored in hook %u\n", skb->pkt_type, iph.protocol, IP_VS_DBG_ADDR(af, &iph.daddr), hooknum); return NF_ACCEPT; } /* ipvs enabled in this netns ? */ if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; ip_vs_fill_iph_skb(af, skb, false, &iph); /* Bad... Do not break raw sockets */ sk = skb_to_full_sk(skb); if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT && af == AF_INET)) { if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk)) return NF_ACCEPT; } #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_in_icmp_v6(ipvs, skb, &related, hooknum, &iph); if (related) return verdict; } } else #endif if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related; int verdict = ip_vs_in_icmp(ipvs, skb, &related, hooknum); if (related) return verdict; } /* Protocol supported? */ pd = ip_vs_proto_data_get(ipvs, iph.protocol); if (unlikely(!pd)) { /* The only way we'll see this packet again is if it's * encapsulated, so mark it with ipvs_property=1 so we * skip it if we're ignoring tunneled packets */ if (sysctl_ignore_tunneled(ipvs)) skb->ipvs_property = 1; return NF_ACCEPT; } pp = pd->pp; /* * Check if the packet belongs to an existing connection entry */ cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, ipvs, af, skb, &iph); if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) { int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); bool old_ct = false, resched = false; if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && unlikely(!atomic_read(&cp->dest->weight))) { resched = true; old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); } else if (conn_reuse_mode && is_new_conn_expected(cp, conn_reuse_mode)) { old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); if (!atomic_read(&cp->n_control)) { resched = true; } else { /* Do not reschedule controlling connection * that uses conntrack while it is still * referenced by controlled connection(s). */ resched = !old_ct; } } if (resched) { if (!old_ct) cp->flags &= ~IP_VS_CONN_F_NFCT; if (!atomic_read(&cp->n_control)) ip_vs_conn_expire_now(cp); __ip_vs_conn_put(cp); if (old_ct) return NF_DROP; cp = NULL; } } /* Check the server status */ if (cp && cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ if (sysctl_expire_nodest_conn(ipvs)) { bool old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); if (!old_ct) cp->flags &= ~IP_VS_CONN_F_NFCT; ip_vs_conn_expire_now(cp); __ip_vs_conn_put(cp); if (old_ct) return NF_DROP; cp = NULL; } else { __ip_vs_conn_put(cp); return NF_DROP; } } if (unlikely(!cp)) { int v; if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph)) return v; } IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet"); ip_vs_in_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); if (cp->packet_xmit) ret = cp->packet_xmit(skb, cp, pp, &iph); /* do not touch skb anymore */ else { IP_VS_DBG_RL("warning: packet_xmit is null"); ret = NF_ACCEPT; } /* Increase its packet counter and check if it is needed * to be synchronized * * Sync connection if it is about to close to * encorage the standby servers to update the connections timeout * * For ONE_PKT let ip_vs_sync_conn() do the filter work. */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) pkts = sysctl_sync_threshold(ipvs); else pkts = atomic_inc_return(&cp->in_pkts); if (ipvs->sync_state & IP_VS_STATE_MASTER) ip_vs_sync_conn(ipvs, cp, pkts); else if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control) /* increment is done inside ip_vs_sync_conn too */ atomic_inc(&cp->control->in_pkts); ip_vs_conn_put(cp); return ret; } /* * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP * related packets destined for 0.0.0.0/0. * When fwmark-based virtual service is used, such as transparent * cache cluster, TCP packets can be marked and routed to ip_vs_in, * but ICMP destined for 0.0.0.0/0 cannot not be easily marked and * sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain * and send them to ip_vs_in_icmp. */ static unsigned int ip_vs_forward_icmp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct netns_ipvs *ipvs = net_ipvs(state->net); int r; /* ipvs enabled in this netns ? */ if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; if (state->pf == NFPROTO_IPV4) { if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; #ifdef CONFIG_IP_VS_IPV6 } else { struct ip_vs_iphdr iphdr; ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); if (iphdr.protocol != IPPROTO_ICMPV6) return NF_ACCEPT; return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); #endif } return ip_vs_in_icmp(ipvs, skb, &r, state->hook); } static const struct nf_hook_ops ip_vs_ops4[] = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { .hook = ip_vs_in_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { .hook = ip_vs_in_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ { .hook = ip_vs_forward_icmp, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = 99, }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = 100, }, }; #ifdef CONFIG_IP_VS_IPV6 static const struct nf_hook_ops ip_vs_ops6[] = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { .hook = ip_vs_in_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { .hook = ip_vs_in_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ { .hook = ip_vs_forward_icmp, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 99, }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 100, }, }; #endif int ip_vs_register_hooks(struct netns_ipvs *ipvs, unsigned int af) { const struct nf_hook_ops *ops; unsigned int count; unsigned int afmask; int ret = 0; if (af == AF_INET6) { #ifdef CONFIG_IP_VS_IPV6 ops = ip_vs_ops6; count = ARRAY_SIZE(ip_vs_ops6); afmask = 2; #else return -EINVAL; #endif } else { ops = ip_vs_ops4; count = ARRAY_SIZE(ip_vs_ops4); afmask = 1; } if (!(ipvs->hooks_afmask & afmask)) { ret = nf_register_net_hooks(ipvs->net, ops, count); if (ret >= 0) ipvs->hooks_afmask |= afmask; } return ret; } void ip_vs_unregister_hooks(struct netns_ipvs *ipvs, unsigned int af) { const struct nf_hook_ops *ops; unsigned int count; unsigned int afmask; if (af == AF_INET6) { #ifdef CONFIG_IP_VS_IPV6 ops = ip_vs_ops6; count = ARRAY_SIZE(ip_vs_ops6); afmask = 2; #else return; #endif } else { ops = ip_vs_ops4; count = ARRAY_SIZE(ip_vs_ops4); afmask = 1; } if (ipvs->hooks_afmask & afmask) { nf_unregister_net_hooks(ipvs->net, ops, count); ipvs->hooks_afmask &= ~afmask; } } /* * Initialize IP Virtual Server netns mem. */ static int __net_init __ip_vs_init(struct net *net) { struct netns_ipvs *ipvs; ipvs = net_generic(net, ip_vs_net_id); if (ipvs == NULL) return -ENOMEM; /* Hold the beast until a service is registered */ ipvs->enable = 0; ipvs->net = net; /* Counters used for creating unique names */ ipvs->gen = atomic_read(&ipvs_netns_cnt); atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; if (ip_vs_estimator_net_init(ipvs) < 0) goto estimator_fail; if (ip_vs_control_net_init(ipvs) < 0) goto control_fail; if (ip_vs_protocol_net_init(ipvs) < 0) goto protocol_fail; if (ip_vs_app_net_init(ipvs) < 0) goto app_fail; if (ip_vs_conn_net_init(ipvs) < 0) goto conn_fail; if (ip_vs_sync_net_init(ipvs) < 0) goto sync_fail; return 0; /* * Error handling */ sync_fail: ip_vs_conn_net_cleanup(ipvs); conn_fail: ip_vs_app_net_cleanup(ipvs); app_fail: ip_vs_protocol_net_cleanup(ipvs); protocol_fail: ip_vs_control_net_cleanup(ipvs); control_fail: ip_vs_estimator_net_cleanup(ipvs); estimator_fail: net->ipvs = NULL; return -ENOMEM; } static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list) { struct netns_ipvs *ipvs; struct net *net; ip_vs_service_nets_cleanup(net_list); /* ip_vs_flush() with locks */ list_for_each_entry(net, net_list, exit_list) { ipvs = net_ipvs(net); ip_vs_conn_net_cleanup(ipvs); ip_vs_app_net_cleanup(ipvs); ip_vs_protocol_net_cleanup(ipvs); ip_vs_control_net_cleanup(ipvs); ip_vs_estimator_net_cleanup(ipvs); IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen); net->ipvs = NULL; } } static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list) { struct netns_ipvs *ipvs; struct net *net; list_for_each_entry(net, net_list, exit_list) { ipvs = net_ipvs(net); ip_vs_unregister_hooks(ipvs, AF_INET); ip_vs_unregister_hooks(ipvs, AF_INET6); ipvs->enable = 0; /* Disable packet reception */ smp_wmb(); ip_vs_sync_net_cleanup(ipvs); } } static struct pernet_operations ipvs_core_ops = { .init = __ip_vs_init, .exit_batch = __ip_vs_cleanup_batch, .id = &ip_vs_net_id, .size = sizeof(struct netns_ipvs), }; static struct pernet_operations ipvs_core_dev_ops = { .exit_batch = __ip_vs_dev_cleanup_batch, }; /* * Initialize IP Virtual Server */ static int __init ip_vs_init(void) { int ret; ret = ip_vs_control_init(); if (ret < 0) { pr_err("can't setup control.\n"); goto exit; } ip_vs_protocol_init(); ret = ip_vs_conn_init(); if (ret < 0) { pr_err("can't setup connection table.\n"); goto cleanup_protocol; } ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ if (ret < 0) goto cleanup_conn; ret = register_pernet_device(&ipvs_core_dev_ops); if (ret < 0) goto cleanup_sub; ret = ip_vs_register_nl_ioctl(); if (ret < 0) { pr_err("can't register netlink/ioctl.\n"); goto cleanup_dev; } pr_info("ipvs loaded.\n"); return ret; cleanup_dev: unregister_pernet_device(&ipvs_core_dev_ops); cleanup_sub: unregister_pernet_subsys(&ipvs_core_ops); cleanup_conn: ip_vs_conn_cleanup(); cleanup_protocol: ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); exit: return ret; } static void __exit ip_vs_cleanup(void) { ip_vs_unregister_nl_ioctl(); unregister_pernet_device(&ipvs_core_dev_ops); unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ ip_vs_conn_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); /* common rcu_barrier() used by: * - ip_vs_control_cleanup() */ rcu_barrier(); pr_info("ipvs unloaded.\n"); } module_init(ip_vs_init); module_exit(ip_vs_cleanup); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Virtual Server");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 /* SPDX-License-Identifier: GPL-2.0 */ /* * BlueZ - Bluetooth protocol stack for Linux * * Copyright (C) 2021 Intel Corporation */ #include <asm/unaligned.h> void eir_create(struct hci_dev *hdev, u8 *data); u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len); u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len); u8 eir_append_service_data(u8 *eir, u16 eir_len, u16 uuid, u8 *data, u8 data_len); static inline u16 eir_precalc_len(u8 data_len) { return sizeof(u8) * 2 + data_len; } static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) { eir[eir_len++] = sizeof(type) + data_len; eir[eir_len++] = type; memcpy(&eir[eir_len], data, data_len); eir_len += data_len; return eir_len; } static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) { eir[eir_len++] = sizeof(type) + sizeof(data); eir[eir_len++] = type; put_unaligned_le16(data, &eir[eir_len]); eir_len += sizeof(data); return eir_len; } static inline u16 eir_skb_put_data(struct sk_buff *skb, u8 type, u8 *data, u8 data_len) { u8 *eir; u16 eir_len; eir_len = eir_precalc_len(data_len); eir = skb_put(skb, eir_len); WARN_ON(sizeof(type) + data_len > U8_MAX); eir[0] = sizeof(type) + data_len; eir[1] = type; memcpy(&eir[2], data, data_len); return eir_len; } static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type, size_t *data_len) { size_t parsed = 0; if (eir_len < 2) return NULL; while (parsed < eir_len - 1) { u8 field_len = eir[0]; if (field_len == 0) break; parsed += field_len + 1; if (parsed > eir_len) break; if (eir[1] != type) { eir += field_len + 1; continue; } /* Zero length data */ if (field_len == 1) return NULL; if (data_len) *data_len = field_len - 1; return &eir[2]; } return NULL; } void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len);
115 68 2 113 3 114 110 47 95 96 94 21 50 95 46 45 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 /* mpicoder.c - Coder for the external representation of MPIs * Copyright (C) 1998, 1999 Free Software Foundation, Inc. * * This file is part of GnuPG. * * GnuPG is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * GnuPG is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #include <linux/bitops.h> #include <linux/count_zeros.h> #include <linux/byteorder/generic.h> #include <linux/scatterlist.h> #include <linux/string.h> #include "mpi-internal.h" #define MAX_EXTERN_SCAN_BYTES (16*1024*1024) #define MAX_EXTERN_MPI_BITS 16384 /** * mpi_read_raw_data - Read a raw byte stream as a positive integer * @xbuffer: The data to read * @nbytes: The amount of data to read */ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes) { const uint8_t *buffer = xbuffer; int i, j; unsigned nbits, nlimbs; mpi_limb_t a; MPI val = NULL; while (nbytes > 0 && buffer[0] == 0) { buffer++; nbytes--; } nbits = nbytes * 8; if (nbits > MAX_EXTERN_MPI_BITS) { pr_info("MPI: mpi too large (%u bits)\n", nbits); return NULL; } if (nbytes > 0) nbits -= count_leading_zeros(buffer[0]) - (BITS_PER_LONG - 8); nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) return NULL; val->nbits = nbits; val->sign = 0; val->nlimbs = nlimbs; if (nbytes > 0) { i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; i %= BYTES_PER_MPI_LIMB; for (j = nlimbs; j > 0; j--) { a = 0; for (; i < BYTES_PER_MPI_LIMB; i++) { a <<= 8; a |= *buffer++; } i = 0; val->d[j - 1] = a; } } return val; } EXPORT_SYMBOL_GPL(mpi_read_raw_data); MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) { const uint8_t *buffer = xbuffer; unsigned int nbits, nbytes; MPI val; if (*ret_nread < 2) return ERR_PTR(-EINVAL); nbits = buffer[0] << 8 | buffer[1]; if (nbits > MAX_EXTERN_MPI_BITS) { pr_info("MPI: mpi too large (%u bits)\n", nbits); return ERR_PTR(-EINVAL); } nbytes = DIV_ROUND_UP(nbits, 8); if (nbytes + 2 > *ret_nread) { pr_info("MPI: mpi larger than buffer nbytes=%u ret_nread=%u\n", nbytes, *ret_nread); return ERR_PTR(-EINVAL); } val = mpi_read_raw_data(buffer + 2, nbytes); if (!val) return ERR_PTR(-ENOMEM); *ret_nread = nbytes + 2; return val; } EXPORT_SYMBOL_GPL(mpi_read_from_buffer); /**************** * Fill the mpi VAL from the hex string in STR. */ int mpi_fromstr(MPI val, const char *str) { int sign = 0; int prepend_zero = 0; int i, j, c, c1, c2; unsigned int nbits, nbytes, nlimbs; mpi_limb_t a; if (*str == '-') { sign = 1; str++; } /* Skip optional hex prefix. */ if (*str == '0' && str[1] == 'x') str += 2; nbits = strlen(str); if (nbits > MAX_EXTERN_SCAN_BYTES) { mpi_clear(val); return -EINVAL; } nbits *= 4; if ((nbits % 8)) prepend_zero = 1; nbytes = (nbits+7) / 8; nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB; if (val->alloced < nlimbs) mpi_resize(val, nlimbs); i = BYTES_PER_MPI_LIMB - (nbytes % BYTES_PER_MPI_LIMB); i %= BYTES_PER_MPI_LIMB; j = val->nlimbs = nlimbs; val->sign = sign; for (; j > 0; j--) { a = 0; for (; i < BYTES_PER_MPI_LIMB; i++) { if (prepend_zero) { c1 = '0'; prepend_zero = 0; } else c1 = *str++; if (!c1) { mpi_clear(val); return -EINVAL; } c2 = *str++; if (!c2) { mpi_clear(val); return -EINVAL; } if (c1 >= '0' && c1 <= '9') c = c1 - '0'; else if (c1 >= 'a' && c1 <= 'f') c = c1 - 'a' + 10; else if (c1 >= 'A' && c1 <= 'F') c = c1 - 'A' + 10; else { mpi_clear(val); return -EINVAL; } c <<= 4; if (c2 >= '0' && c2 <= '9') c |= c2 - '0'; else if (c2 >= 'a' && c2 <= 'f') c |= c2 - 'a' + 10; else if (c2 >= 'A' && c2 <= 'F') c |= c2 - 'A' + 10; else { mpi_clear(val); return -EINVAL; } a <<= 8; a |= c; } i = 0; val->d[j-1] = a; } return 0; } EXPORT_SYMBOL_GPL(mpi_fromstr); MPI mpi_scanval(const char *string) { MPI a; a = mpi_alloc(0); if (!a) return NULL; if (mpi_fromstr(a, string)) { mpi_free(a); return NULL; } mpi_normalize(a); return a; } EXPORT_SYMBOL_GPL(mpi_scanval); static int count_lzeros(MPI a) { mpi_limb_t alimb; int i, lzeros = 0; for (i = a->nlimbs - 1; i >= 0; i--) { alimb = a->d[i]; if (alimb == 0) { lzeros += sizeof(mpi_limb_t); } else { lzeros += count_leading_zeros(alimb) / 8; break; } } return lzeros; } /** * mpi_read_buffer() - read MPI to a buffer provided by user (msb first) * * @a: a multi precision integer * @buf: buffer to which the output will be written to. Needs to be at * least mpi_get_size(a) long. * @buf_len: size of the buf. * @nbytes: receives the actual length of the data written on success and * the data to-be-written on -EOVERFLOW in case buf_len was too * small. * @sign: if not NULL, it will be set to the sign of a. * * Return: 0 on success or error code in case of error */ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, int *sign) { uint8_t *p; #if BYTES_PER_MPI_LIMB == 4 __be32 alimb; #elif BYTES_PER_MPI_LIMB == 8 __be64 alimb; #else #error please implement for this limb size. #endif unsigned int n = mpi_get_size(a); int i, lzeros; if (!buf || !nbytes) return -EINVAL; if (sign) *sign = a->sign; lzeros = count_lzeros(a); if (buf_len < n - lzeros) { *nbytes = n - lzeros; return -EOVERFLOW; } p = buf; *nbytes = n - lzeros; for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB, lzeros %= BYTES_PER_MPI_LIMB; i >= 0; i--) { #if BYTES_PER_MPI_LIMB == 4 alimb = cpu_to_be32(a->d[i]); #elif BYTES_PER_MPI_LIMB == 8 alimb = cpu_to_be64(a->d[i]); #else #error please implement for this limb size. #endif memcpy(p, (u8 *)&alimb + lzeros, BYTES_PER_MPI_LIMB - lzeros); p += BYTES_PER_MPI_LIMB - lzeros; lzeros = 0; } return 0; } EXPORT_SYMBOL_GPL(mpi_read_buffer); /* * mpi_get_buffer() - Returns an allocated buffer with the MPI (msb first). * Caller must free the return string. * This function does return a 0 byte buffer with nbytes set to zero if the * value of A is zero. * * @a: a multi precision integer. * @nbytes: receives the length of this buffer. * @sign: if not NULL, it will be set to the sign of the a. * * Return: Pointer to MPI buffer or NULL on error */ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign) { uint8_t *buf; unsigned int n; int ret; if (!nbytes) return NULL; n = mpi_get_size(a); if (!n) n++; buf = kmalloc(n, GFP_KERNEL); if (!buf) return NULL; ret = mpi_read_buffer(a, buf, n, nbytes, sign); if (ret) { kfree(buf); return NULL; } return buf; } EXPORT_SYMBOL_GPL(mpi_get_buffer); /** * mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first) * * This function works in the same way as the mpi_read_buffer, but it * takes an sgl instead of u8 * buf. * * @a: a multi precision integer * @sgl: scatterlist to write to. Needs to be at least * mpi_get_size(a) long. * @nbytes: the number of bytes to write. Leading bytes will be * filled with zero. * @sign: if not NULL, it will be set to the sign of a. * * Return: 0 on success or error code in case of error */ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes, int *sign) { u8 *p, *p2; #if BYTES_PER_MPI_LIMB == 4 __be32 alimb; #elif BYTES_PER_MPI_LIMB == 8 __be64 alimb; #else #error please implement for this limb size. #endif unsigned int n = mpi_get_size(a); struct sg_mapping_iter miter; int i, x, buf_len; int nents; if (sign) *sign = a->sign; if (nbytes < n) return -EOVERFLOW; nents = sg_nents_for_len(sgl, nbytes); if (nents < 0) return -EINVAL; sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC | SG_MITER_TO_SG); sg_miter_next(&miter); buf_len = miter.length; p2 = miter.addr; while (nbytes > n) { i = min_t(unsigned, nbytes - n, buf_len); memset(p2, 0, i); p2 += i; nbytes -= i; buf_len -= i; if (!buf_len) { sg_miter_next(&miter); buf_len = miter.length; p2 = miter.addr; } } for (i = a->nlimbs - 1; i >= 0; i--) { #if BYTES_PER_MPI_LIMB == 4 alimb = a->d[i] ? cpu_to_be32(a->d[i]) : 0; #elif BYTES_PER_MPI_LIMB == 8 alimb = a->d[i] ? cpu_to_be64(a->d[i]) : 0; #else #error please implement for this limb size. #endif p = (u8 *)&alimb; for (x = 0; x < sizeof(alimb); x++) { *p2++ = *p++; if (!--buf_len) { sg_miter_next(&miter); buf_len = miter.length; p2 = miter.addr; } } } sg_miter_stop(&miter); return 0; } EXPORT_SYMBOL_GPL(mpi_write_to_sgl); /* * mpi_read_raw_from_sgl() - Function allocates an MPI and populates it with * data from the sgl * * This function works in the same way as the mpi_read_raw_data, but it * takes an sgl instead of void * buffer. i.e. it allocates * a new MPI and reads the content of the sgl to the MPI. * * @sgl: scatterlist to read from * @nbytes: number of bytes to read * * Return: Pointer to a new MPI or NULL on error */ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) { struct sg_mapping_iter miter; unsigned int nbits, nlimbs; int x, j, z, lzeros, ents; unsigned int len; const u8 *buff; mpi_limb_t a; MPI val = NULL; ents = sg_nents_for_len(sgl, nbytes); if (ents < 0) return NULL; sg_miter_start(&miter, sgl, ents, SG_MITER_ATOMIC | SG_MITER_FROM_SG); lzeros = 0; len = 0; while (nbytes > 0) { while (len && !*buff) { lzeros++; len--; buff++; } if (len && *buff) break; sg_miter_next(&miter); buff = miter.addr; len = miter.length; nbytes -= lzeros; lzeros = 0; } miter.consumed = lzeros; nbytes -= lzeros; nbits = nbytes * 8; if (nbits > MAX_EXTERN_MPI_BITS) { sg_miter_stop(&miter); pr_info("MPI: mpi too large (%u bits)\n", nbits); return NULL; } if (nbytes > 0) nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8); sg_miter_stop(&miter); nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) return NULL; val->nbits = nbits; val->sign = 0; val->nlimbs = nlimbs; if (nbytes == 0) return val; j = nlimbs - 1; a = 0; z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; z %= BYTES_PER_MPI_LIMB; while (sg_miter_next(&miter)) { buff = miter.addr; len = min_t(unsigned, miter.length, nbytes); nbytes -= len; for (x = 0; x < len; x++) { a <<= 8; a |= *buff++; if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) { val->d[j--] = a; a = 0; } } z += x; } return val; } EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl); /* Perform a two's complement operation on buffer P of size N bytes. */ static void twocompl(unsigned char *p, unsigned int n) { int i; for (i = n-1; i >= 0 && !p[i]; i--) ; if (i >= 0) { if ((p[i] & 0x01)) p[i] = (((p[i] ^ 0xfe) | 0x01) & 0xff); else if ((p[i] & 0x02)) p[i] = (((p[i] ^ 0xfc) | 0x02) & 0xfe); else if ((p[i] & 0x04)) p[i] = (((p[i] ^ 0xf8) | 0x04) & 0xfc); else if ((p[i] & 0x08)) p[i] = (((p[i] ^ 0xf0) | 0x08) & 0xf8); else if ((p[i] & 0x10)) p[i] = (((p[i] ^ 0xe0) | 0x10) & 0xf0); else if ((p[i] & 0x20)) p[i] = (((p[i] ^ 0xc0) | 0x20) & 0xe0); else if ((p[i] & 0x40)) p[i] = (((p[i] ^ 0x80) | 0x40) & 0xc0); else p[i] = 0x80; for (i--; i >= 0; i--) p[i] ^= 0xff; } } int mpi_print(enum gcry_mpi_format format, unsigned char *buffer, size_t buflen, size_t *nwritten, MPI a) { unsigned int nbits = mpi_get_nbits(a); size_t len; size_t dummy_nwritten; int negative; if (!nwritten) nwritten = &dummy_nwritten; /* Libgcrypt does no always care to set clear the sign if the value * is 0. For printing this is a bit of a surprise, in particular * because if some of the formats don't support negative numbers but * should be able to print a zero. Thus we need this extra test * for a negative number. */ if (a->sign && mpi_cmp_ui(a, 0)) negative = 1; else negative = 0; len = buflen; *nwritten = 0; if (format == GCRYMPI_FMT_STD) { unsigned char *tmp; int extra = 0; unsigned int n; tmp = mpi_get_buffer(a, &n, NULL); if (!tmp) return -EINVAL; if (negative) { twocompl(tmp, n); if (!(*tmp & 0x80)) { /* Need to extend the sign. */ n++; extra = 2; } } else if (n && (*tmp & 0x80)) { /* Positive but the high bit of the returned buffer is set. * Thus we need to print an extra leading 0x00 so that the * output is interpreted as a positive number. */ n++; extra = 1; } if (buffer && n > len) { /* The provided buffer is too short. */ kfree(tmp); return -E2BIG; } if (buffer) { unsigned char *s = buffer; if (extra == 1) *s++ = 0; else if (extra) *s++ = 0xff; memcpy(s, tmp, n-!!extra); } kfree(tmp); *nwritten = n; return 0; } else if (format == GCRYMPI_FMT_USG) { unsigned int n = (nbits + 7)/8; /* Note: We ignore the sign for this format. */ /* FIXME: for performance reasons we should put this into * mpi_aprint because we can then use the buffer directly. */ if (buffer && n > len) return -E2BIG; if (buffer) { unsigned char *tmp; tmp = mpi_get_buffer(a, &n, NULL); if (!tmp) return -EINVAL; memcpy(buffer, tmp, n); kfree(tmp); } *nwritten = n; return 0; } else if (format == GCRYMPI_FMT_PGP) { unsigned int n = (nbits + 7)/8; /* The PGP format can only handle unsigned integers. */ if (negative) return -EINVAL; if (buffer && n+2 > len) return -E2BIG; if (buffer) { unsigned char *tmp; unsigned char *s = buffer; s[0] = nbits >> 8; s[1] = nbits; tmp = mpi_get_buffer(a, &n, NULL); if (!tmp) return -EINVAL; memcpy(s+2, tmp, n); kfree(tmp); } *nwritten = n+2; return 0; } else if (format == GCRYMPI_FMT_SSH) { unsigned char *tmp; int extra = 0; unsigned int n; tmp = mpi_get_buffer(a, &n, NULL); if (!tmp) return -EINVAL; if (negative) { twocompl(tmp, n); if (!(*tmp & 0x80)) { /* Need to extend the sign. */ n++; extra = 2; } } else if (n && (*tmp & 0x80)) { n++; extra = 1; } if (buffer && n+4 > len) { kfree(tmp); return -E2BIG; } if (buffer) { unsigned char *s = buffer; *s++ = n >> 24; *s++ = n >> 16; *s++ = n >> 8; *s++ = n; if (extra == 1) *s++ = 0; else if (extra) *s++ = 0xff; memcpy(s, tmp, n-!!extra); } kfree(tmp); *nwritten = 4+n; return 0; } else if (format == GCRYMPI_FMT_HEX) { unsigned char *tmp; int i; int extra = 0; unsigned int n = 0; tmp = mpi_get_buffer(a, &n, NULL); if (!tmp) return -EINVAL; if (!n || (*tmp & 0x80)) extra = 2; if (buffer && 2*n + extra + negative + 1 > len) { kfree(tmp); return -E2BIG; } if (buffer) { unsigned char *s = buffer; if (negative) *s++ = '-'; if (extra) { *s++ = '0'; *s++ = '0'; } for (i = 0; i < n; i++) { unsigned int c = tmp[i]; *s++ = (c >> 4) < 10 ? '0'+(c>>4) : 'A'+(c>>4)-10; c &= 15; *s++ = c < 10 ? '0'+c : 'A'+c-10; } *s++ = 0; *nwritten = s - buffer; } else { *nwritten = 2*n + extra + negative + 1; } kfree(tmp); return 0; } else return -EINVAL; } EXPORT_SYMBOL_GPL(mpi_print);
77 77 76 77 413 1 3 2 353 64 2 2 1 63 39 35 8 13 56 6 65 5 4 55 56 11 17 10 7 30 6 11 11 18 3 2 39 417 405 11 4 17 15 2 17 13 5 13 5 1 12 11 17 11 6 7 12 3 11 9 8 434 12 2 1 417 4 409 2 13 18 31 29 408 50 8 3 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/plist.h> #include <linux/sched/task.h> #include <linux/sched/signal.h> #include <linux/freezer.h> #include "futex.h" /* * READ this before attempting to hack on futexes! * * Basic futex operation and ordering guarantees * ============================================= * * The waiter reads the futex value in user space and calls * futex_wait(). This function computes the hash bucket and acquires * the hash bucket lock. After that it reads the futex user space value * again and verifies that the data has not changed. If it has not changed * it enqueues itself into the hash bucket, releases the hash bucket lock * and schedules. * * The waker side modifies the user space value of the futex and calls * futex_wake(). This function computes the hash bucket and acquires the * hash bucket lock. Then it looks for waiters on that futex in the hash * bucket and wakes them. * * In futex wake up scenarios where no tasks are blocked on a futex, taking * the hb spinlock can be avoided and simply return. In order for this * optimization to work, ordering guarantees must exist so that the waiter * being added to the list is acknowledged when the list is concurrently being * checked by the waker, avoiding scenarios like the following: * * CPU 0 CPU 1 * val = *futex; * sys_futex(WAIT, futex, val); * futex_wait(futex, val); * uval = *futex; * *futex = newval; * sys_futex(WAKE, futex); * futex_wake(futex); * if (queue_empty()) * return; * if (uval == val) * lock(hash_bucket(futex)); * queue(); * unlock(hash_bucket(futex)); * schedule(); * * This would cause the waiter on CPU 0 to wait forever because it * missed the transition of the user space value from val to newval * and the waker did not find the waiter in the hash bucket queue. * * The correct serialization ensures that a waiter either observes * the changed user space value before blocking or is woken by a * concurrent waker: * * CPU 0 CPU 1 * val = *futex; * sys_futex(WAIT, futex, val); * futex_wait(futex, val); * * waiters++; (a) * smp_mb(); (A) <-- paired with -. * | * lock(hash_bucket(futex)); | * | * uval = *futex; | * | *futex = newval; * | sys_futex(WAKE, futex); * | futex_wake(futex); * | * `--------> smp_mb(); (B) * if (uval == val) * queue(); * unlock(hash_bucket(futex)); * schedule(); if (waiters) * lock(hash_bucket(futex)); * else wake_waiters(futex); * waiters--; (b) unlock(hash_bucket(futex)); * * Where (A) orders the waiters increment and the futex value read through * atomic operations (see futex_hb_waiters_inc) and where (B) orders the write * to futex and the waiters read (see futex_hb_waiters_pending()). * * This yields the following case (where X:=waiters, Y:=futex): * * X = Y = 0 * * w[X]=1 w[Y]=1 * MB MB * r[Y]=y r[X]=x * * Which guarantees that x==0 && y==0 is impossible; which translates back into * the guarantee that we cannot both miss the futex variable change and the * enqueue. * * Note that a new waiter is accounted for in (a) even when it is possible that * the wait call can return error, in which case we backtrack from it in (b). * Refer to the comment in futex_q_lock(). * * Similarly, in order to account for waiters being requeued on another * address we always increment the waiters for the destination bucket before * acquiring the lock. It then decrements them again after releasing it - * the code that actually moves the futex(es) between hash buckets (requeue_futex) * will do the additional required waiter count housekeeping. This is done for * double_lock_hb() and double_unlock_hb(), respectively. */ bool __futex_wake_mark(struct futex_q *q) { if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) return false; __futex_unqueue(q); /* * The waiting task can free the futex_q as soon as q->lock_ptr = NULL * is written, without taking any locks. This is possible in the event * of a spurious wakeup, for example. A memory barrier is required here * to prevent the following store to lock_ptr from getting ahead of the * plist_del in __futex_unqueue(). */ smp_store_release(&q->lock_ptr, NULL); return true; } /* * The hash bucket lock must be held when this is called. * Afterwards, the futex_q must not be accessed. Callers * must ensure to later call wake_up_q() for the actual * wakeups to occur. */ void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q) { struct task_struct *p = q->task; get_task_struct(p); if (!__futex_wake_mark(q)) { put_task_struct(p); return; } /* * Queue the task for later wakeup for after we've released * the hb->lock. */ wake_q_add_safe(wake_q, p); } /* * Wake up waiters matching bitset queued on this futex (uaddr). */ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) { struct futex_hash_bucket *hb; struct futex_q *this, *next; union futex_key key = FUTEX_KEY_INIT; DEFINE_WAKE_Q(wake_q); int ret; if (!bitset) return -EINVAL; ret = get_futex_key(uaddr, flags, &key, FUTEX_READ); if (unlikely(ret != 0)) return ret; if ((flags & FLAGS_STRICT) && !nr_wake) return 0; hb = futex_hash(&key); /* Make sure we really have tasks to wakeup */ if (!futex_hb_waiters_pending(hb)) return ret; spin_lock(&hb->lock); plist_for_each_entry_safe(this, next, &hb->chain, list) { if (futex_match (&this->key, &key)) { if (this->pi_state || this->rt_waiter) { ret = -EINVAL; break; } /* Check if one of the bits is set in both bitsets */ if (!(this->bitset & bitset)) continue; this->wake(&wake_q, this); if (++ret >= nr_wake) break; } } spin_unlock(&hb->lock); wake_up_q(&wake_q); return ret; } static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) { unsigned int op = (encoded_op & 0x70000000) >> 28; unsigned int cmp = (encoded_op & 0x0f000000) >> 24; int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11); int cmparg = sign_extend32(encoded_op & 0x00000fff, 11); int oldval, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) { if (oparg < 0 || oparg > 31) { char comm[sizeof(current->comm)]; /* * kill this print and return -EINVAL when userspace * is sane again */ pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n", get_task_comm(comm, current), oparg); oparg &= 31; } oparg = 1 << oparg; } pagefault_disable(); ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr); pagefault_enable(); if (ret) return ret; switch (cmp) { case FUTEX_OP_CMP_EQ: return oldval == cmparg; case FUTEX_OP_CMP_NE: return oldval != cmparg; case FUTEX_OP_CMP_LT: return oldval < cmparg; case FUTEX_OP_CMP_GE: return oldval >= cmparg; case FUTEX_OP_CMP_LE: return oldval <= cmparg; case FUTEX_OP_CMP_GT: return oldval > cmparg; default: return -ENOSYS; } } /* * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: */ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_wake2, int op) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; struct futex_hash_bucket *hb1, *hb2; struct futex_q *this, *next; int ret, op_ret; DEFINE_WAKE_Q(wake_q); retry: ret = get_futex_key(uaddr1, flags, &key1, FUTEX_READ); if (unlikely(ret != 0)) return ret; ret = get_futex_key(uaddr2, flags, &key2, FUTEX_WRITE); if (unlikely(ret != 0)) return ret; hb1 = futex_hash(&key1); hb2 = futex_hash(&key2); retry_private: double_lock_hb(hb1, hb2); op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { double_unlock_hb(hb1, hb2); if (!IS_ENABLED(CONFIG_MMU) || unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { /* * we don't get EFAULT from MMU faults if we don't have * an MMU, but we might get them from range checking */ ret = op_ret; return ret; } if (op_ret == -EFAULT) { ret = fault_in_user_writeable(uaddr2); if (ret) return ret; } cond_resched(); if (!(flags & FLAGS_SHARED)) goto retry_private; goto retry; } plist_for_each_entry_safe(this, next, &hb1->chain, list) { if (futex_match (&this->key, &key1)) { if (this->pi_state || this->rt_waiter) { ret = -EINVAL; goto out_unlock; } this->wake(&wake_q, this); if (++ret >= nr_wake) break; } } if (op_ret > 0) { op_ret = 0; plist_for_each_entry_safe(this, next, &hb2->chain, list) { if (futex_match (&this->key, &key2)) { if (this->pi_state || this->rt_waiter) { ret = -EINVAL; goto out_unlock; } this->wake(&wake_q, this); if (++op_ret >= nr_wake2) break; } } ret += op_ret; } out_unlock: double_unlock_hb(hb1, hb2); wake_up_q(&wake_q); return ret; } static long futex_wait_restart(struct restart_block *restart); /** * futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal * @hb: the futex hash bucket, must be locked by the caller * @q: the futex_q to queue up on * @timeout: the prepared hrtimer_sleeper, or null for no timeout */ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q, struct hrtimer_sleeper *timeout) { /* * The task state is guaranteed to be set before another task can * wake it. set_current_state() is implemented using smp_store_mb() and * futex_queue() calls spin_unlock() upon completion, both serializing * access to the hash list and forcing another memory barrier. */ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); futex_queue(q, hb); /* Arm the timer */ if (timeout) hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS); /* * If we have been removed from the hash list, then another task * has tried to wake us, and we can skip the call to schedule(). */ if (likely(!plist_node_empty(&q->list))) { /* * If the timer has already expired, current will already be * flagged for rescheduling. Only call schedule if there * is no timeout, or if it has yet to expire. */ if (!timeout || timeout->task) schedule(); } __set_current_state(TASK_RUNNING); } /** * futex_unqueue_multiple - Remove various futexes from their hash bucket * @v: The list of futexes to unqueue * @count: Number of futexes in the list * * Helper to unqueue a list of futexes. This can't fail. * * Return: * - >=0 - Index of the last futex that was awoken; * - -1 - No futex was awoken */ int futex_unqueue_multiple(struct futex_vector *v, int count) { int ret = -1, i; for (i = 0; i < count; i++) { if (!futex_unqueue(&v[i].q)) ret = i; } return ret; } /** * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes * @vs: The futex list to wait on * @count: The size of the list * @woken: Index of the last woken futex, if any. Used to notify the * caller that it can return this index to userspace (return parameter) * * Prepare multiple futexes in a single step and enqueue them. This may fail if * the futex list is invalid or if any futex was already awoken. On success the * task is ready to interruptible sleep. * * Return: * - 1 - One of the futexes was woken by another thread * - 0 - Success * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL */ int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken) { struct futex_hash_bucket *hb; bool retry = false; int ret, i; u32 uval; /* * Enqueuing multiple futexes is tricky, because we need to enqueue * each futex on the list before dealing with the next one to avoid * deadlocking on the hash bucket. But, before enqueuing, we need to * make sure that current->state is TASK_INTERRUPTIBLE, so we don't * lose any wake events, which cannot be done before the get_futex_key * of the next key, because it calls get_user_pages, which can sleep. * Thus, we fetch the list of futexes keys in two steps, by first * pinning all the memory keys in the futex key, and only then we read * each key and queue the corresponding futex. * * Private futexes doesn't need to recalculate hash in retry, so skip * get_futex_key() when retrying. */ retry: for (i = 0; i < count; i++) { if (!(vs[i].w.flags & FLAGS_SHARED) && retry) continue; ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr), vs[i].w.flags, &vs[i].q.key, FUTEX_READ); if (unlikely(ret)) return ret; } set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); for (i = 0; i < count; i++) { u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr; struct futex_q *q = &vs[i].q; u32 val = vs[i].w.val; hb = futex_q_lock(q); ret = futex_get_value_locked(&uval, uaddr); if (!ret && uval == val) { /* * The bucket lock can't be held while dealing with the * next futex. Queue each futex at this moment so hb can * be unlocked. */ futex_queue(q, hb); continue; } futex_q_unlock(hb); __set_current_state(TASK_RUNNING); /* * Even if something went wrong, if we find out that a futex * was woken, we don't return error and return this index to * userspace */ *woken = futex_unqueue_multiple(vs, i); if (*woken >= 0) return 1; if (ret) { /* * If we need to handle a page fault, we need to do so * without any lock and any enqueued futex (otherwise * we could lose some wakeup). So we do it here, after * undoing all the work done so far. In success, we * retry all the work. */ if (get_user(uval, uaddr)) return -EFAULT; retry = true; goto retry; } if (uval != val) return -EWOULDBLOCK; } return 0; } /** * futex_sleep_multiple - Check sleeping conditions and sleep * @vs: List of futexes to wait for * @count: Length of vs * @to: Timeout * * Sleep if and only if the timeout hasn't expired and no futex on the list has * been woken up. */ static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count, struct hrtimer_sleeper *to) { if (to && !to->task) return; for (; count; count--, vs++) { if (!READ_ONCE(vs->q.lock_ptr)) return; } schedule(); } /** * futex_wait_multiple - Prepare to wait on and enqueue several futexes * @vs: The list of futexes to wait on * @count: The number of objects * @to: Timeout before giving up and returning to userspace * * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function * sleeps on a group of futexes and returns on the first futex that is * wake, or after the timeout has elapsed. * * Return: * - >=0 - Hint to the futex that was awoken * - <0 - On error */ int futex_wait_multiple(struct futex_vector *vs, unsigned int count, struct hrtimer_sleeper *to) { int ret, hint = 0; if (to) hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); while (1) { ret = futex_wait_multiple_setup(vs, count, &hint); if (ret) { if (ret > 0) { /* A futex was woken during setup */ ret = hint; } return ret; } futex_sleep_multiple(vs, count, to); __set_current_state(TASK_RUNNING); ret = futex_unqueue_multiple(vs, count); if (ret >= 0) return ret; if (to && !to->task) return -ETIMEDOUT; else if (signal_pending(current)) return -ERESTARTSYS; /* * The final case is a spurious wakeup, for * which just retry. */ } } /** * futex_wait_setup() - Prepare to wait on a futex * @uaddr: the futex userspace address * @val: the expected value * @flags: futex flags (FLAGS_SHARED, etc.) * @q: the associated futex_q * @hb: storage for hash_bucket pointer to be returned to caller * * Setup the futex_q and locate the hash_bucket. Get the futex value and * compare it with the expected value. Handle atomic faults internally. * Return with the hb lock held on success, and unlocked on failure. * * Return: * - 0 - uaddr contains val and hb has been locked; * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked */ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, struct futex_q *q, struct futex_hash_bucket **hb) { u32 uval; int ret; /* * Access the page AFTER the hash-bucket is locked. * Order is important: * * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val); * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); } * * The basic logical guarantee of a futex is that it blocks ONLY * if cond(var) is known to be true at the time of blocking, for * any cond. If we locked the hash-bucket after testing *uaddr, that * would open a race condition where we could block indefinitely with * cond(var) false, which would violate the guarantee. * * On the other hand, we insert q and release the hash-bucket only * after testing *uaddr. This guarantees that futex_wait() will NOT * absorb a wakeup if *uaddr does not match the desired values * while the syscall executes. */ retry: ret = get_futex_key(uaddr, flags, &q->key, FUTEX_READ); if (unlikely(ret != 0)) return ret; retry_private: *hb = futex_q_lock(q); ret = futex_get_value_locked(&uval, uaddr); if (ret) { futex_q_unlock(*hb); ret = get_user(uval, uaddr); if (ret) return ret; if (!(flags & FLAGS_SHARED)) goto retry_private; goto retry; } if (uval != val) { futex_q_unlock(*hb); ret = -EWOULDBLOCK; } return ret; } int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, struct hrtimer_sleeper *to, u32 bitset) { struct futex_q q = futex_q_init; struct futex_hash_bucket *hb; int ret; if (!bitset) return -EINVAL; q.bitset = bitset; retry: /* * Prepare to wait on uaddr. On success, it holds hb->lock and q * is initialized. */ ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) return ret; /* futex_queue and wait for wakeup, timeout, or a signal. */ futex_wait_queue(hb, &q, to); /* If we were woken (and unqueued), we succeeded, whatever. */ if (!futex_unqueue(&q)) return 0; if (to && !to->task) return -ETIMEDOUT; /* * We expect signal_pending(current), but we might be the * victim of a spurious wakeup as well. */ if (!signal_pending(current)) goto retry; return -ERESTARTSYS; } int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) { struct hrtimer_sleeper timeout, *to; struct restart_block *restart; int ret; to = futex_setup_timer(abs_time, &timeout, flags, current->timer_slack_ns); ret = __futex_wait(uaddr, flags, val, to, bitset); /* No timeout, nothing to clean up. */ if (!to) return ret; hrtimer_cancel(&to->timer); destroy_hrtimer_on_stack(&to->timer); if (ret == -ERESTARTSYS) { restart = &current->restart_block; restart->futex.uaddr = uaddr; restart->futex.val = val; restart->futex.time = *abs_time; restart->futex.bitset = bitset; restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; return set_restart_fn(restart, futex_wait_restart); } return ret; } static long futex_wait_restart(struct restart_block *restart) { u32 __user *uaddr = restart->futex.uaddr; ktime_t t, *tp = NULL; if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { t = restart->futex.time; tp = &t; } restart->fn = do_no_restart_syscall; return (long)futex_wait(uaddr, restart->futex.flags, restart->futex.val, tp, restart->futex.bitset); }
10 3 3 1 4 7 3 3 1 11 11 7 3 14 3 3 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 // SPDX-License-Identifier: GPL-2.0-or-later /* * MOSCHIP MCS7830 based (7730/7830/7832) USB 2.0 Ethernet Devices * * based on usbnet.c, asix.c and the vendor provided mcs7830 driver * * Copyright (C) 2010 Andreas Mohr <andi@lisas.de> * Copyright (C) 2006 Arnd Bergmann <arnd@arndb.de> * Copyright (C) 2003-2005 David Hollis <dhollis@davehollis.com> * Copyright (C) 2005 Phil Chang <pchang23@sbcglobal.net> * Copyright (c) 2002-2003 TiVo Inc. * * Definitions gathered from MOSCHIP, Data Sheet_7830DA.pdf (thanks!). * * 2010-12-19: add 7832 USB PID ("functionality same as MCS7830"), * per active notification by manufacturer * * TODO: * - support HIF_REG_CONFIG_SLEEPMODE/HIF_REG_CONFIG_TXENABLE (via autopm?) * - implement ethtool_ops get_pauseparam/set_pauseparam * via HIF_REG_PAUSE_THRESHOLD (>= revision C only!) * - implement get_eeprom/[set_eeprom] * - switch PHY on/off on ifup/ifdown (perhaps in usbnet.c, via MII) * - mcs7830_get_regs() handling is weird: for rev 2 we return 32 regs, * can access only ~ 24, remaining user buffer is uninitialized garbage * - anything else? */ #include <linux/crc32.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/usb/usbnet.h> /* requests */ #define MCS7830_RD_BMREQ (USB_DIR_IN | USB_TYPE_VENDOR | \ USB_RECIP_DEVICE) #define MCS7830_WR_BMREQ (USB_DIR_OUT | USB_TYPE_VENDOR | \ USB_RECIP_DEVICE) #define MCS7830_RD_BREQ 0x0E #define MCS7830_WR_BREQ 0x0D #define MCS7830_CTRL_TIMEOUT 1000 #define MCS7830_MAX_MCAST 64 #define MCS7830_VENDOR_ID 0x9710 #define MCS7832_PRODUCT_ID 0x7832 #define MCS7830_PRODUCT_ID 0x7830 #define MCS7730_PRODUCT_ID 0x7730 #define SITECOM_VENDOR_ID 0x0DF6 #define LN_030_PRODUCT_ID 0x0021 #define MCS7830_MII_ADVERTISE (ADVERTISE_PAUSE_CAP | ADVERTISE_100FULL | \ ADVERTISE_100HALF | ADVERTISE_10FULL | \ ADVERTISE_10HALF | ADVERTISE_CSMA) /* HIF_REG_XX corresponding index value */ enum { HIF_REG_MULTICAST_HASH = 0x00, HIF_REG_PACKET_GAP1 = 0x08, HIF_REG_PACKET_GAP2 = 0x09, HIF_REG_PHY_DATA = 0x0a, HIF_REG_PHY_CMD1 = 0x0c, HIF_REG_PHY_CMD1_READ = 0x40, HIF_REG_PHY_CMD1_WRITE = 0x20, HIF_REG_PHY_CMD1_PHYADDR = 0x01, HIF_REG_PHY_CMD2 = 0x0d, HIF_REG_PHY_CMD2_PEND_FLAG_BIT = 0x80, HIF_REG_PHY_CMD2_READY_FLAG_BIT = 0x40, HIF_REG_CONFIG = 0x0e, /* hmm, spec sez: "R/W", "Except bit 3" (likely TXENABLE). */ HIF_REG_CONFIG_CFG = 0x80, HIF_REG_CONFIG_SPEED100 = 0x40, HIF_REG_CONFIG_FULLDUPLEX_ENABLE = 0x20, HIF_REG_CONFIG_RXENABLE = 0x10, HIF_REG_CONFIG_TXENABLE = 0x08, HIF_REG_CONFIG_SLEEPMODE = 0x04, HIF_REG_CONFIG_ALLMULTICAST = 0x02, HIF_REG_CONFIG_PROMISCUOUS = 0x01, HIF_REG_ETHERNET_ADDR = 0x0f, HIF_REG_FRAME_DROP_COUNTER = 0x15, /* 0..ff; reset: 0 */ HIF_REG_PAUSE_THRESHOLD = 0x16, HIF_REG_PAUSE_THRESHOLD_DEFAULT = 0, }; /* Trailing status byte in Ethernet Rx frame */ enum { MCS7830_RX_SHORT_FRAME = 0x01, /* < 64 bytes */ MCS7830_RX_LENGTH_ERROR = 0x02, /* framelen != Ethernet length field */ MCS7830_RX_ALIGNMENT_ERROR = 0x04, /* non-even number of nibbles */ MCS7830_RX_CRC_ERROR = 0x08, MCS7830_RX_LARGE_FRAME = 0x10, /* > 1518 bytes */ MCS7830_RX_FRAME_CORRECT = 0x20, /* frame is correct */ /* [7:6] reserved */ }; struct mcs7830_data { u8 multi_filter[8]; u8 config; }; static const char driver_name[] = "MOSCHIP usb-ethernet driver"; static int mcs7830_get_reg(struct usbnet *dev, u16 index, u16 size, void *data) { int ret; ret = usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ, 0x0000, index, data, size); if (ret < 0) return ret; else if (ret < size) return -ENODATA; return ret; } static int mcs7830_set_reg(struct usbnet *dev, u16 index, u16 size, const void *data) { return usbnet_write_cmd(dev, MCS7830_WR_BREQ, MCS7830_WR_BMREQ, 0x0000, index, data, size); } static void mcs7830_set_reg_async(struct usbnet *dev, u16 index, u16 size, void *data) { usbnet_write_cmd_async(dev, MCS7830_WR_BREQ, MCS7830_WR_BMREQ, 0x0000, index, data, size); } static int mcs7830_hif_get_mac_address(struct usbnet *dev, unsigned char *addr) { int ret = mcs7830_get_reg(dev, HIF_REG_ETHERNET_ADDR, ETH_ALEN, addr); if (ret < 0) return ret; return 0; } static int mcs7830_hif_set_mac_address(struct usbnet *dev, const unsigned char *addr) { int ret = mcs7830_set_reg(dev, HIF_REG_ETHERNET_ADDR, ETH_ALEN, addr); if (ret < 0) return ret; return 0; } static int mcs7830_set_mac_address(struct net_device *netdev, void *p) { int ret; struct usbnet *dev = netdev_priv(netdev); struct sockaddr *addr = p; if (netif_running(netdev)) return -EBUSY; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; ret = mcs7830_hif_set_mac_address(dev, addr->sa_data); if (ret < 0) return ret; /* it worked --> adopt it on netdev side */ eth_hw_addr_set(netdev, addr->sa_data); return 0; } static int mcs7830_read_phy(struct usbnet *dev, u8 index) { int ret; int i; __le16 val; u8 cmd[2] = { HIF_REG_PHY_CMD1_READ | HIF_REG_PHY_CMD1_PHYADDR, HIF_REG_PHY_CMD2_PEND_FLAG_BIT | index, }; mutex_lock(&dev->phy_mutex); /* write the MII command */ ret = mcs7830_set_reg(dev, HIF_REG_PHY_CMD1, 2, cmd); if (ret < 0) goto out; /* wait for the data to become valid, should be within < 1ms */ for (i = 0; i < 10; i++) { ret = mcs7830_get_reg(dev, HIF_REG_PHY_CMD1, 2, cmd); if ((ret < 0) || (cmd[1] & HIF_REG_PHY_CMD2_READY_FLAG_BIT)) break; ret = -EIO; msleep(1); } if (ret < 0) goto out; /* read actual register contents */ ret = mcs7830_get_reg(dev, HIF_REG_PHY_DATA, 2, &val); if (ret < 0) goto out; ret = le16_to_cpu(val); dev_dbg(&dev->udev->dev, "read PHY reg %02x: %04x (%d tries)\n", index, val, i); out: mutex_unlock(&dev->phy_mutex); return ret; } static int mcs7830_write_phy(struct usbnet *dev, u8 index, u16 val) { int ret; int i; __le16 le_val; u8 cmd[2] = { HIF_REG_PHY_CMD1_WRITE | HIF_REG_PHY_CMD1_PHYADDR, HIF_REG_PHY_CMD2_PEND_FLAG_BIT | (index & 0x1F), }; mutex_lock(&dev->phy_mutex); /* write the new register contents */ le_val = cpu_to_le16(val); ret = mcs7830_set_reg(dev, HIF_REG_PHY_DATA, 2, &le_val); if (ret < 0) goto out; /* write the MII command */ ret = mcs7830_set_reg(dev, HIF_REG_PHY_CMD1, 2, cmd); if (ret < 0) goto out; /* wait for the command to be accepted by the PHY */ for (i = 0; i < 10; i++) { ret = mcs7830_get_reg(dev, HIF_REG_PHY_CMD1, 2, cmd); if ((ret < 0) || (cmd[1] & HIF_REG_PHY_CMD2_READY_FLAG_BIT)) break; ret = -EIO; msleep(1); } if (ret < 0) goto out; ret = 0; dev_dbg(&dev->udev->dev, "write PHY reg %02x: %04x (%d tries)\n", index, val, i); out: mutex_unlock(&dev->phy_mutex); return ret; } /* * This algorithm comes from the original mcs7830 version 1.4 driver, * not sure if it is needed. */ static int mcs7830_set_autoneg(struct usbnet *dev, int ptrUserPhyMode) { int ret; /* Enable all media types */ ret = mcs7830_write_phy(dev, MII_ADVERTISE, MCS7830_MII_ADVERTISE); /* First reset BMCR */ if (!ret) ret = mcs7830_write_phy(dev, MII_BMCR, 0x0000); /* Enable Auto Neg */ if (!ret) ret = mcs7830_write_phy(dev, MII_BMCR, BMCR_ANENABLE); /* Restart Auto Neg (Keep the Enable Auto Neg Bit Set) */ if (!ret) ret = mcs7830_write_phy(dev, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART ); return ret; } /* * if we can read register 22, the chip revision is C or higher */ static int mcs7830_get_rev(struct usbnet *dev) { u8 dummy[2]; int ret; ret = mcs7830_get_reg(dev, HIF_REG_FRAME_DROP_COUNTER, 2, dummy); if (ret > 0) return 2; /* Rev C or later */ return 1; /* earlier revision */ } /* * On rev. C we need to set the pause threshold */ static void mcs7830_rev_C_fixup(struct usbnet *dev) { u8 pause_threshold = HIF_REG_PAUSE_THRESHOLD_DEFAULT; int retry; for (retry = 0; retry < 2; retry++) { if (mcs7830_get_rev(dev) == 2) { dev_info(&dev->udev->dev, "applying rev.C fixup\n"); mcs7830_set_reg(dev, HIF_REG_PAUSE_THRESHOLD, 1, &pause_threshold); } msleep(1); } } static int mcs7830_mdio_read(struct net_device *netdev, int phy_id, int location) { struct usbnet *dev = netdev_priv(netdev); return mcs7830_read_phy(dev, location); } static void mcs7830_mdio_write(struct net_device *netdev, int phy_id, int location, int val) { struct usbnet *dev = netdev_priv(netdev); mcs7830_write_phy(dev, location, val); } static int mcs7830_ioctl(struct net_device *net, struct ifreq *rq, int cmd) { struct usbnet *dev = netdev_priv(net); return generic_mii_ioctl(&dev->mii, if_mii(rq), cmd, NULL); } static inline struct mcs7830_data *mcs7830_get_data(struct usbnet *dev) { return (struct mcs7830_data *)&dev->data; } static void mcs7830_hif_update_multicast_hash(struct usbnet *dev) { struct mcs7830_data *data = mcs7830_get_data(dev); mcs7830_set_reg_async(dev, HIF_REG_MULTICAST_HASH, sizeof data->multi_filter, data->multi_filter); } static void mcs7830_hif_update_config(struct usbnet *dev) { /* implementation specific to data->config (argument needs to be heap-based anyway - USB DMA!) */ struct mcs7830_data *data = mcs7830_get_data(dev); mcs7830_set_reg_async(dev, HIF_REG_CONFIG, 1, &data->config); } static void mcs7830_data_set_multicast(struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct mcs7830_data *data = mcs7830_get_data(dev); memset(data->multi_filter, 0, sizeof data->multi_filter); data->config = HIF_REG_CONFIG_TXENABLE; /* this should not be needed, but it doesn't work otherwise */ data->config |= HIF_REG_CONFIG_ALLMULTICAST; if (net->flags & IFF_PROMISC) { data->config |= HIF_REG_CONFIG_PROMISCUOUS; } else if (net->flags & IFF_ALLMULTI || netdev_mc_count(net) > MCS7830_MAX_MCAST) { data->config |= HIF_REG_CONFIG_ALLMULTICAST; } else if (netdev_mc_empty(net)) { /* just broadcast and directed */ } else { /* We use the 20 byte dev->data * for our 8 byte filter buffer * to avoid allocating memory that * is tricky to free later */ struct netdev_hw_addr *ha; u32 crc_bits; /* Build the multicast hash filter. */ netdev_for_each_mc_addr(ha, net) { crc_bits = ether_crc(ETH_ALEN, ha->addr) >> 26; data->multi_filter[crc_bits >> 3] |= 1 << (crc_bits & 7); } } } static int mcs7830_apply_base_config(struct usbnet *dev) { int ret; /* re-configure known MAC (suspend case etc.) */ ret = mcs7830_hif_set_mac_address(dev, dev->net->dev_addr); if (ret) { dev_info(&dev->udev->dev, "Cannot set MAC address\n"); goto out; } /* Set up PHY */ ret = mcs7830_set_autoneg(dev, 0); if (ret) { dev_info(&dev->udev->dev, "Cannot set autoneg\n"); goto out; } mcs7830_hif_update_multicast_hash(dev); mcs7830_hif_update_config(dev); mcs7830_rev_C_fixup(dev); ret = 0; out: return ret; } /* credits go to asix_set_multicast */ static void mcs7830_set_multicast(struct net_device *net) { struct usbnet *dev = netdev_priv(net); mcs7830_data_set_multicast(net); mcs7830_hif_update_multicast_hash(dev); mcs7830_hif_update_config(dev); } static int mcs7830_get_regs_len(struct net_device *net) { struct usbnet *dev = netdev_priv(net); switch (mcs7830_get_rev(dev)) { case 1: return 21; case 2: return 32; } return 0; } static void mcs7830_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *drvinfo) { usbnet_get_drvinfo(net, drvinfo); } static void mcs7830_get_regs(struct net_device *net, struct ethtool_regs *regs, void *data) { struct usbnet *dev = netdev_priv(net); regs->version = mcs7830_get_rev(dev); mcs7830_get_reg(dev, 0, regs->len, data); } static const struct ethtool_ops mcs7830_ethtool_ops = { .get_drvinfo = mcs7830_get_drvinfo, .get_regs_len = mcs7830_get_regs_len, .get_regs = mcs7830_get_regs, /* common usbnet calls */ .get_link = usbnet_get_link, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .nway_reset = usbnet_nway_reset, .get_link_ksettings = usbnet_get_link_ksettings_mii, .set_link_ksettings = usbnet_set_link_ksettings_mii, }; static const struct net_device_ops mcs7830_netdev_ops = { .ndo_open = usbnet_open, .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, .ndo_eth_ioctl = mcs7830_ioctl, .ndo_set_rx_mode = mcs7830_set_multicast, .ndo_set_mac_address = mcs7830_set_mac_address, }; static int mcs7830_bind(struct usbnet *dev, struct usb_interface *udev) { struct net_device *net = dev->net; u8 addr[ETH_ALEN]; int ret; int retry; /* Initial startup: Gather MAC address setting from EEPROM */ ret = -EINVAL; for (retry = 0; retry < 5 && ret; retry++) ret = mcs7830_hif_get_mac_address(dev, addr); if (ret) { dev_warn(&dev->udev->dev, "Cannot read MAC address\n"); goto out; } eth_hw_addr_set(net, addr); mcs7830_data_set_multicast(net); ret = mcs7830_apply_base_config(dev); if (ret) goto out; net->ethtool_ops = &mcs7830_ethtool_ops; net->netdev_ops = &mcs7830_netdev_ops; /* reserve space for the status byte on rx */ dev->rx_urb_size = ETH_FRAME_LEN + 1; dev->mii.mdio_read = mcs7830_mdio_read; dev->mii.mdio_write = mcs7830_mdio_write; dev->mii.dev = net; dev->mii.phy_id_mask = 0x3f; dev->mii.reg_num_mask = 0x1f; dev->mii.phy_id = *((u8 *) net->dev_addr + 1); ret = usbnet_get_endpoints(dev, udev); out: return ret; } /* The chip always appends a status byte that we need to strip */ static int mcs7830_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { u8 status; /* This check is no longer done by usbnet */ if (skb->len < dev->net->hard_header_len) { dev_err(&dev->udev->dev, "unexpected tiny rx frame\n"); return 0; } skb_trim(skb, skb->len - 1); status = skb->data[skb->len]; if (status != MCS7830_RX_FRAME_CORRECT) { dev_dbg(&dev->udev->dev, "rx fixup status %x\n", status); /* hmm, perhaps usbnet.c already sees a globally visible frame error and increments rx_errors on its own already? */ dev->net->stats.rx_errors++; if (status & (MCS7830_RX_SHORT_FRAME |MCS7830_RX_LENGTH_ERROR |MCS7830_RX_LARGE_FRAME)) dev->net->stats.rx_length_errors++; if (status & MCS7830_RX_ALIGNMENT_ERROR) dev->net->stats.rx_frame_errors++; if (status & MCS7830_RX_CRC_ERROR) dev->net->stats.rx_crc_errors++; } return skb->len > 0; } static void mcs7830_status(struct usbnet *dev, struct urb *urb) { u8 *buf = urb->transfer_buffer; bool link, link_changed; if (urb->actual_length < 16) return; link = !(buf[1] == 0x20); link_changed = netif_carrier_ok(dev->net) != link; if (link_changed) { usbnet_link_change(dev, link, 0); netdev_dbg(dev->net, "Link Status is: %d\n", link); } } static const struct driver_info moschip_info = { .description = "MOSCHIP 7830/7832/7730 usb-NET adapter", .bind = mcs7830_bind, .rx_fixup = mcs7830_rx_fixup, .flags = FLAG_ETHER | FLAG_LINK_INTR, .status = mcs7830_status, .in = 1, .out = 2, }; static const struct driver_info sitecom_info = { .description = "Sitecom LN-30 usb-NET adapter", .bind = mcs7830_bind, .rx_fixup = mcs7830_rx_fixup, .flags = FLAG_ETHER | FLAG_LINK_INTR, .status = mcs7830_status, .in = 1, .out = 2, }; static const struct usb_device_id products[] = { { USB_DEVICE(MCS7830_VENDOR_ID, MCS7832_PRODUCT_ID), .driver_info = (unsigned long) &moschip_info, }, { USB_DEVICE(MCS7830_VENDOR_ID, MCS7830_PRODUCT_ID), .driver_info = (unsigned long) &moschip_info, }, { USB_DEVICE(MCS7830_VENDOR_ID, MCS7730_PRODUCT_ID), .driver_info = (unsigned long) &moschip_info, }, { USB_DEVICE(SITECOM_VENDOR_ID, LN_030_PRODUCT_ID), .driver_info = (unsigned long) &sitecom_info, }, {}, }; MODULE_DEVICE_TABLE(usb, products); static int mcs7830_reset_resume (struct usb_interface *intf) { /* YES, this function is successful enough that ethtool -d does show same output pre-/post-suspend */ struct usbnet *dev = usb_get_intfdata(intf); mcs7830_apply_base_config(dev); usbnet_resume(intf); return 0; } static struct usb_driver mcs7830_driver = { .name = driver_name, .id_table = products, .probe = usbnet_probe, .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .reset_resume = mcs7830_reset_resume, .disable_hub_initiated_lpm = 1, }; module_usb_driver(mcs7830_driver); MODULE_DESCRIPTION("USB to network adapter MCS7830)"); MODULE_LICENSE("GPL");
2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 /* * Cryptographic API. * * AES Cipher Algorithm. * * Based on Brian Gladman's code. * * Linux developers: * Alexander Kjeldaas <astor@fast.no> * Herbert Valerio Riedel <hvr@hvrlab.org> * Kyle McMartin <kyle@debian.org> * Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API). * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * --------------------------------------------------------------------------- * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK. * All rights reserved. * * LICENSE TERMS * * The free distribution and use of this software in both source and binary * form is allowed (with or without changes) provided that: * * 1. distributions of this source code include the above copyright * notice, this list of conditions and the following disclaimer; * * 2. distributions in binary form include the above copyright * notice, this list of conditions and the following disclaimer * in the documentation and/or other associated materials; * * 3. the copyright holder's name is not used to endorse products * built using this software without specific written permission. * * ALTERNATIVELY, provided that this notice is retained in full, this product * may be distributed under the terms of the GNU General Public License (GPL), * in which case the provisions of the GPL apply INSTEAD OF those given above. * * DISCLAIMER * * This software is provided 'as is' with no explicit or implied warranties * in respect of its properties, including, but not limited to, correctness * and/or fitness for purpose. * --------------------------------------------------------------------------- */ #include <crypto/aes.h> #include <crypto/algapi.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> #include <linux/errno.h> #include <asm/byteorder.h> #include <asm/unaligned.h> static inline u8 byte(const u32 x, const unsigned n) { return x >> (n << 3); } /* cacheline-aligned to facilitate prefetching into cache */ __visible const u32 crypto_ft_tab[4][256] ____cacheline_aligned = { { 0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591, 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, 0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb, 0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b, 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, 0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a, 0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f, 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, 0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b, 0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413, 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, 0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85, 0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511, 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, 0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1, 0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf, 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, 0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6, 0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b, 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, 0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8, 0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2, 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, 0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810, 0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697, 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, 0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c, 0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27, 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, 0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5, 0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0, 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c, }, { 0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154, 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, 0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a, 0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b, 0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b, 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, 0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f, 0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f, 0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5, 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, 0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f, 0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb, 0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397, 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, 0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed, 0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a, 0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194, 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, 0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3, 0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104, 0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d, 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, 0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39, 0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695, 0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83, 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, 0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76, 0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4, 0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b, 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, 0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0, 0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018, 0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751, 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, 0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85, 0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12, 0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9, 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, 0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7, 0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a, 0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8, 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, 0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a, }, { 0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5, 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, 0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76, 0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0, 0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0, 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, 0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc, 0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15, 0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a, 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, 0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75, 0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0, 0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784, 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, 0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b, 0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf, 0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485, 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, 0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8, 0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5, 0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2, 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, 0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917, 0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573, 0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388, 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, 0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db, 0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c, 0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79, 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, 0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9, 0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808, 0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6, 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, 0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a, 0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e, 0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e, 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, 0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794, 0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf, 0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868, 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, 0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16, }, { 0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5, 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, 0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676, 0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0, 0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0, 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, 0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc, 0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515, 0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a, 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, 0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575, 0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0, 0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484, 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, 0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b, 0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf, 0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585, 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, 0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8, 0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5, 0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2, 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, 0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717, 0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373, 0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888, 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, 0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb, 0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c, 0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979, 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, 0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9, 0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808, 0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6, 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, 0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a, 0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e, 0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e, 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, 0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494, 0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf, 0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868, 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, 0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616, } }; static const u32 crypto_fl_tab[4][256] ____cacheline_aligned = { { 0x00000063, 0x0000007c, 0x00000077, 0x0000007b, 0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5, 0x00000030, 0x00000001, 0x00000067, 0x0000002b, 0x000000fe, 0x000000d7, 0x000000ab, 0x00000076, 0x000000ca, 0x00000082, 0x000000c9, 0x0000007d, 0x000000fa, 0x00000059, 0x00000047, 0x000000f0, 0x000000ad, 0x000000d4, 0x000000a2, 0x000000af, 0x0000009c, 0x000000a4, 0x00000072, 0x000000c0, 0x000000b7, 0x000000fd, 0x00000093, 0x00000026, 0x00000036, 0x0000003f, 0x000000f7, 0x000000cc, 0x00000034, 0x000000a5, 0x000000e5, 0x000000f1, 0x00000071, 0x000000d8, 0x00000031, 0x00000015, 0x00000004, 0x000000c7, 0x00000023, 0x000000c3, 0x00000018, 0x00000096, 0x00000005, 0x0000009a, 0x00000007, 0x00000012, 0x00000080, 0x000000e2, 0x000000eb, 0x00000027, 0x000000b2, 0x00000075, 0x00000009, 0x00000083, 0x0000002c, 0x0000001a, 0x0000001b, 0x0000006e, 0x0000005a, 0x000000a0, 0x00000052, 0x0000003b, 0x000000d6, 0x000000b3, 0x00000029, 0x000000e3, 0x0000002f, 0x00000084, 0x00000053, 0x000000d1, 0x00000000, 0x000000ed, 0x00000020, 0x000000fc, 0x000000b1, 0x0000005b, 0x0000006a, 0x000000cb, 0x000000be, 0x00000039, 0x0000004a, 0x0000004c, 0x00000058, 0x000000cf, 0x000000d0, 0x000000ef, 0x000000aa, 0x000000fb, 0x00000043, 0x0000004d, 0x00000033, 0x00000085, 0x00000045, 0x000000f9, 0x00000002, 0x0000007f, 0x00000050, 0x0000003c, 0x0000009f, 0x000000a8, 0x00000051, 0x000000a3, 0x00000040, 0x0000008f, 0x00000092, 0x0000009d, 0x00000038, 0x000000f5, 0x000000bc, 0x000000b6, 0x000000da, 0x00000021, 0x00000010, 0x000000ff, 0x000000f3, 0x000000d2, 0x000000cd, 0x0000000c, 0x00000013, 0x000000ec, 0x0000005f, 0x00000097, 0x00000044, 0x00000017, 0x000000c4, 0x000000a7, 0x0000007e, 0x0000003d, 0x00000064, 0x0000005d, 0x00000019, 0x00000073, 0x00000060, 0x00000081, 0x0000004f, 0x000000dc, 0x00000022, 0x0000002a, 0x00000090, 0x00000088, 0x00000046, 0x000000ee, 0x000000b8, 0x00000014, 0x000000de, 0x0000005e, 0x0000000b, 0x000000db, 0x000000e0, 0x00000032, 0x0000003a, 0x0000000a, 0x00000049, 0x00000006, 0x00000024, 0x0000005c, 0x000000c2, 0x000000d3, 0x000000ac, 0x00000062, 0x00000091, 0x00000095, 0x000000e4, 0x00000079, 0x000000e7, 0x000000c8, 0x00000037, 0x0000006d, 0x0000008d, 0x000000d5, 0x0000004e, 0x000000a9, 0x0000006c, 0x00000056, 0x000000f4, 0x000000ea, 0x00000065, 0x0000007a, 0x000000ae, 0x00000008, 0x000000ba, 0x00000078, 0x00000025, 0x0000002e, 0x0000001c, 0x000000a6, 0x000000b4, 0x000000c6, 0x000000e8, 0x000000dd, 0x00000074, 0x0000001f, 0x0000004b, 0x000000bd, 0x0000008b, 0x0000008a, 0x00000070, 0x0000003e, 0x000000b5, 0x00000066, 0x00000048, 0x00000003, 0x000000f6, 0x0000000e, 0x00000061, 0x00000035, 0x00000057, 0x000000b9, 0x00000086, 0x000000c1, 0x0000001d, 0x0000009e, 0x000000e1, 0x000000f8, 0x00000098, 0x00000011, 0x00000069, 0x000000d9, 0x0000008e, 0x00000094, 0x0000009b, 0x0000001e, 0x00000087, 0x000000e9, 0x000000ce, 0x00000055, 0x00000028, 0x000000df, 0x0000008c, 0x000000a1, 0x00000089, 0x0000000d, 0x000000bf, 0x000000e6, 0x00000042, 0x00000068, 0x00000041, 0x00000099, 0x0000002d, 0x0000000f, 0x000000b0, 0x00000054, 0x000000bb, 0x00000016, }, { 0x00006300, 0x00007c00, 0x00007700, 0x00007b00, 0x0000f200, 0x00006b00, 0x00006f00, 0x0000c500, 0x00003000, 0x00000100, 0x00006700, 0x00002b00, 0x0000fe00, 0x0000d700, 0x0000ab00, 0x00007600, 0x0000ca00, 0x00008200, 0x0000c900, 0x00007d00, 0x0000fa00, 0x00005900, 0x00004700, 0x0000f000, 0x0000ad00, 0x0000d400, 0x0000a200, 0x0000af00, 0x00009c00, 0x0000a400, 0x00007200, 0x0000c000, 0x0000b700, 0x0000fd00, 0x00009300, 0x00002600, 0x00003600, 0x00003f00, 0x0000f700, 0x0000cc00, 0x00003400, 0x0000a500, 0x0000e500, 0x0000f100, 0x00007100, 0x0000d800, 0x00003100, 0x00001500, 0x00000400, 0x0000c700, 0x00002300, 0x0000c300, 0x00001800, 0x00009600, 0x00000500, 0x00009a00, 0x00000700, 0x00001200, 0x00008000, 0x0000e200, 0x0000eb00, 0x00002700, 0x0000b200, 0x00007500, 0x00000900, 0x00008300, 0x00002c00, 0x00001a00, 0x00001b00, 0x00006e00, 0x00005a00, 0x0000a000, 0x00005200, 0x00003b00, 0x0000d600, 0x0000b300, 0x00002900, 0x0000e300, 0x00002f00, 0x00008400, 0x00005300, 0x0000d100, 0x00000000, 0x0000ed00, 0x00002000, 0x0000fc00, 0x0000b100, 0x00005b00, 0x00006a00, 0x0000cb00, 0x0000be00, 0x00003900, 0x00004a00, 0x00004c00, 0x00005800, 0x0000cf00, 0x0000d000, 0x0000ef00, 0x0000aa00, 0x0000fb00, 0x00004300, 0x00004d00, 0x00003300, 0x00008500, 0x00004500, 0x0000f900, 0x00000200, 0x00007f00, 0x00005000, 0x00003c00, 0x00009f00, 0x0000a800, 0x00005100, 0x0000a300, 0x00004000, 0x00008f00, 0x00009200, 0x00009d00, 0x00003800, 0x0000f500, 0x0000bc00, 0x0000b600, 0x0000da00, 0x00002100, 0x00001000, 0x0000ff00, 0x0000f300, 0x0000d200, 0x0000cd00, 0x00000c00, 0x00001300, 0x0000ec00, 0x00005f00, 0x00009700, 0x00004400, 0x00001700, 0x0000c400, 0x0000a700, 0x00007e00, 0x00003d00, 0x00006400, 0x00005d00, 0x00001900, 0x00007300, 0x00006000, 0x00008100, 0x00004f00, 0x0000dc00, 0x00002200, 0x00002a00, 0x00009000, 0x00008800, 0x00004600, 0x0000ee00, 0x0000b800, 0x00001400, 0x0000de00, 0x00005e00, 0x00000b00, 0x0000db00, 0x0000e000, 0x00003200, 0x00003a00, 0x00000a00, 0x00004900, 0x00000600, 0x00002400, 0x00005c00, 0x0000c200, 0x0000d300, 0x0000ac00, 0x00006200, 0x00009100, 0x00009500, 0x0000e400, 0x00007900, 0x0000e700, 0x0000c800, 0x00003700, 0x00006d00, 0x00008d00, 0x0000d500, 0x00004e00, 0x0000a900, 0x00006c00, 0x00005600, 0x0000f400, 0x0000ea00, 0x00006500, 0x00007a00, 0x0000ae00, 0x00000800, 0x0000ba00, 0x00007800, 0x00002500, 0x00002e00, 0x00001c00, 0x0000a600, 0x0000b400, 0x0000c600, 0x0000e800, 0x0000dd00, 0x00007400, 0x00001f00, 0x00004b00, 0x0000bd00, 0x00008b00, 0x00008a00, 0x00007000, 0x00003e00, 0x0000b500, 0x00006600, 0x00004800, 0x00000300, 0x0000f600, 0x00000e00, 0x00006100, 0x00003500, 0x00005700, 0x0000b900, 0x00008600, 0x0000c100, 0x00001d00, 0x00009e00, 0x0000e100, 0x0000f800, 0x00009800, 0x00001100, 0x00006900, 0x0000d900, 0x00008e00, 0x00009400, 0x00009b00, 0x00001e00, 0x00008700, 0x0000e900, 0x0000ce00, 0x00005500, 0x00002800, 0x0000df00, 0x00008c00, 0x0000a100, 0x00008900, 0x00000d00, 0x0000bf00, 0x0000e600, 0x00004200, 0x00006800, 0x00004100, 0x00009900, 0x00002d00, 0x00000f00, 0x0000b000, 0x00005400, 0x0000bb00, 0x00001600, }, { 0x00630000, 0x007c0000, 0x00770000, 0x007b0000, 0x00f20000, 0x006b0000, 0x006f0000, 0x00c50000, 0x00300000, 0x00010000, 0x00670000, 0x002b0000, 0x00fe0000, 0x00d70000, 0x00ab0000, 0x00760000, 0x00ca0000, 0x00820000, 0x00c90000, 0x007d0000, 0x00fa0000, 0x00590000, 0x00470000, 0x00f00000, 0x00ad0000, 0x00d40000, 0x00a20000, 0x00af0000, 0x009c0000, 0x00a40000, 0x00720000, 0x00c00000, 0x00b70000, 0x00fd0000, 0x00930000, 0x00260000, 0x00360000, 0x003f0000, 0x00f70000, 0x00cc0000, 0x00340000, 0x00a50000, 0x00e50000, 0x00f10000, 0x00710000, 0x00d80000, 0x00310000, 0x00150000, 0x00040000, 0x00c70000, 0x00230000, 0x00c30000, 0x00180000, 0x00960000, 0x00050000, 0x009a0000, 0x00070000, 0x00120000, 0x00800000, 0x00e20000, 0x00eb0000, 0x00270000, 0x00b20000, 0x00750000, 0x00090000, 0x00830000, 0x002c0000, 0x001a0000, 0x001b0000, 0x006e0000, 0x005a0000, 0x00a00000, 0x00520000, 0x003b0000, 0x00d60000, 0x00b30000, 0x00290000, 0x00e30000, 0x002f0000, 0x00840000, 0x00530000, 0x00d10000, 0x00000000, 0x00ed0000, 0x00200000, 0x00fc0000, 0x00b10000, 0x005b0000, 0x006a0000, 0x00cb0000, 0x00be0000, 0x00390000, 0x004a0000, 0x004c0000, 0x00580000, 0x00cf0000, 0x00d00000, 0x00ef0000, 0x00aa0000, 0x00fb0000, 0x00430000, 0x004d0000, 0x00330000, 0x00850000, 0x00450000, 0x00f90000, 0x00020000, 0x007f0000, 0x00500000, 0x003c0000, 0x009f0000, 0x00a80000, 0x00510000, 0x00a30000, 0x00400000, 0x008f0000, 0x00920000, 0x009d0000, 0x00380000, 0x00f50000, 0x00bc0000, 0x00b60000, 0x00da0000, 0x00210000, 0x00100000, 0x00ff0000, 0x00f30000, 0x00d20000, 0x00cd0000, 0x000c0000, 0x00130000, 0x00ec0000, 0x005f0000, 0x00970000, 0x00440000, 0x00170000, 0x00c40000, 0x00a70000, 0x007e0000, 0x003d0000, 0x00640000, 0x005d0000, 0x00190000, 0x00730000, 0x00600000, 0x00810000, 0x004f0000, 0x00dc0000, 0x00220000, 0x002a0000, 0x00900000, 0x00880000, 0x00460000, 0x00ee0000, 0x00b80000, 0x00140000, 0x00de0000, 0x005e0000, 0x000b0000, 0x00db0000, 0x00e00000, 0x00320000, 0x003a0000, 0x000a0000, 0x00490000, 0x00060000, 0x00240000, 0x005c0000, 0x00c20000, 0x00d30000, 0x00ac0000, 0x00620000, 0x00910000, 0x00950000, 0x00e40000, 0x00790000, 0x00e70000, 0x00c80000, 0x00370000, 0x006d0000, 0x008d0000, 0x00d50000, 0x004e0000, 0x00a90000, 0x006c0000, 0x00560000, 0x00f40000, 0x00ea0000, 0x00650000, 0x007a0000, 0x00ae0000, 0x00080000, 0x00ba0000, 0x00780000, 0x00250000, 0x002e0000, 0x001c0000, 0x00a60000, 0x00b40000, 0x00c60000, 0x00e80000, 0x00dd0000, 0x00740000, 0x001f0000, 0x004b0000, 0x00bd0000, 0x008b0000, 0x008a0000, 0x00700000, 0x003e0000, 0x00b50000, 0x00660000, 0x00480000, 0x00030000, 0x00f60000, 0x000e0000, 0x00610000, 0x00350000, 0x00570000, 0x00b90000, 0x00860000, 0x00c10000, 0x001d0000, 0x009e0000, 0x00e10000, 0x00f80000, 0x00980000, 0x00110000, 0x00690000, 0x00d90000, 0x008e0000, 0x00940000, 0x009b0000, 0x001e0000, 0x00870000, 0x00e90000, 0x00ce0000, 0x00550000, 0x00280000, 0x00df0000, 0x008c0000, 0x00a10000, 0x00890000, 0x000d0000, 0x00bf0000, 0x00e60000, 0x00420000, 0x00680000, 0x00410000, 0x00990000, 0x002d0000, 0x000f0000, 0x00b00000, 0x00540000, 0x00bb0000, 0x00160000, }, { 0x63000000, 0x7c000000, 0x77000000, 0x7b000000, 0xf2000000, 0x6b000000, 0x6f000000, 0xc5000000, 0x30000000, 0x01000000, 0x67000000, 0x2b000000, 0xfe000000, 0xd7000000, 0xab000000, 0x76000000, 0xca000000, 0x82000000, 0xc9000000, 0x7d000000, 0xfa000000, 0x59000000, 0x47000000, 0xf0000000, 0xad000000, 0xd4000000, 0xa2000000, 0xaf000000, 0x9c000000, 0xa4000000, 0x72000000, 0xc0000000, 0xb7000000, 0xfd000000, 0x93000000, 0x26000000, 0x36000000, 0x3f000000, 0xf7000000, 0xcc000000, 0x34000000, 0xa5000000, 0xe5000000, 0xf1000000, 0x71000000, 0xd8000000, 0x31000000, 0x15000000, 0x04000000, 0xc7000000, 0x23000000, 0xc3000000, 0x18000000, 0x96000000, 0x05000000, 0x9a000000, 0x07000000, 0x12000000, 0x80000000, 0xe2000000, 0xeb000000, 0x27000000, 0xb2000000, 0x75000000, 0x09000000, 0x83000000, 0x2c000000, 0x1a000000, 0x1b000000, 0x6e000000, 0x5a000000, 0xa0000000, 0x52000000, 0x3b000000, 0xd6000000, 0xb3000000, 0x29000000, 0xe3000000, 0x2f000000, 0x84000000, 0x53000000, 0xd1000000, 0x00000000, 0xed000000, 0x20000000, 0xfc000000, 0xb1000000, 0x5b000000, 0x6a000000, 0xcb000000, 0xbe000000, 0x39000000, 0x4a000000, 0x4c000000, 0x58000000, 0xcf000000, 0xd0000000, 0xef000000, 0xaa000000, 0xfb000000, 0x43000000, 0x4d000000, 0x33000000, 0x85000000, 0x45000000, 0xf9000000, 0x02000000, 0x7f000000, 0x50000000, 0x3c000000, 0x9f000000, 0xa8000000, 0x51000000, 0xa3000000, 0x40000000, 0x8f000000, 0x92000000, 0x9d000000, 0x38000000, 0xf5000000, 0xbc000000, 0xb6000000, 0xda000000, 0x21000000, 0x10000000, 0xff000000, 0xf3000000, 0xd2000000, 0xcd000000, 0x0c000000, 0x13000000, 0xec000000, 0x5f000000, 0x97000000, 0x44000000, 0x17000000, 0xc4000000, 0xa7000000, 0x7e000000, 0x3d000000, 0x64000000, 0x5d000000, 0x19000000, 0x73000000, 0x60000000, 0x81000000, 0x4f000000, 0xdc000000, 0x22000000, 0x2a000000, 0x90000000, 0x88000000, 0x46000000, 0xee000000, 0xb8000000, 0x14000000, 0xde000000, 0x5e000000, 0x0b000000, 0xdb000000, 0xe0000000, 0x32000000, 0x3a000000, 0x0a000000, 0x49000000, 0x06000000, 0x24000000, 0x5c000000, 0xc2000000, 0xd3000000, 0xac000000, 0x62000000, 0x91000000, 0x95000000, 0xe4000000, 0x79000000, 0xe7000000, 0xc8000000, 0x37000000, 0x6d000000, 0x8d000000, 0xd5000000, 0x4e000000, 0xa9000000, 0x6c000000, 0x56000000, 0xf4000000, 0xea000000, 0x65000000, 0x7a000000, 0xae000000, 0x08000000, 0xba000000, 0x78000000, 0x25000000, 0x2e000000, 0x1c000000, 0xa6000000, 0xb4000000, 0xc6000000, 0xe8000000, 0xdd000000, 0x74000000, 0x1f000000, 0x4b000000, 0xbd000000, 0x8b000000, 0x8a000000, 0x70000000, 0x3e000000, 0xb5000000, 0x66000000, 0x48000000, 0x03000000, 0xf6000000, 0x0e000000, 0x61000000, 0x35000000, 0x57000000, 0xb9000000, 0x86000000, 0xc1000000, 0x1d000000, 0x9e000000, 0xe1000000, 0xf8000000, 0x98000000, 0x11000000, 0x69000000, 0xd9000000, 0x8e000000, 0x94000000, 0x9b000000, 0x1e000000, 0x87000000, 0xe9000000, 0xce000000, 0x55000000, 0x28000000, 0xdf000000, 0x8c000000, 0xa1000000, 0x89000000, 0x0d000000, 0xbf000000, 0xe6000000, 0x42000000, 0x68000000, 0x41000000, 0x99000000, 0x2d000000, 0x0f000000, 0xb0000000, 0x54000000, 0xbb000000, 0x16000000, } }; __visible const u32 crypto_it_tab[4][256] ____cacheline_aligned = { { 0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b, 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, 0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b, 0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e, 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, 0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9, 0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566, 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, 0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4, 0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd, 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, 0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879, 0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c, 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, 0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c, 0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14, 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, 0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684, 0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177, 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, 0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f, 0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382, 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, 0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef, 0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235, 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, 0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546, 0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d, 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, 0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478, 0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff, 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0, }, { 0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93, 0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, 0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f, 0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6, 0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44, 0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, 0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4, 0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994, 0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a, 0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, 0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c, 0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a, 0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51, 0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, 0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff, 0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db, 0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e, 0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, 0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a, 0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16, 0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8, 0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, 0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34, 0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420, 0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0, 0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, 0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef, 0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4, 0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5, 0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, 0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b, 0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6, 0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0, 0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, 0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f, 0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f, 0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13, 0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, 0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c, 0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886, 0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41, 0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, 0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042, }, { 0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303, 0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, 0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3, 0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9, 0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8, 0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, 0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a, 0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b, 0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab, 0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, 0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82, 0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe, 0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110, 0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, 0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15, 0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee, 0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72, 0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, 0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e, 0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a, 0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9, 0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, 0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e, 0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011, 0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3, 0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, 0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90, 0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf, 0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af, 0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, 0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb, 0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8, 0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066, 0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, 0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6, 0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51, 0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347, 0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, 0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1, 0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db, 0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195, 0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, 0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257, }, { 0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3, 0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, 0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362, 0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3, 0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9, 0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, 0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace, 0x63184adf, 0xe582311a, 0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08, 0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55, 0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, 0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216, 0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6, 0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e, 0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, 0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550, 0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8, 0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a, 0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, 0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36, 0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12, 0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e, 0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, 0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb, 0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6, 0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1, 0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, 0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033, 0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad, 0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3, 0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, 0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b, 0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815, 0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2, 0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, 0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691, 0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165, 0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6, 0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, 0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147, 0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44, 0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d, 0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, 0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8, } }; static const u32 crypto_il_tab[4][256] ____cacheline_aligned = { { 0x00000052, 0x00000009, 0x0000006a, 0x000000d5, 0x00000030, 0x00000036, 0x000000a5, 0x00000038, 0x000000bf, 0x00000040, 0x000000a3, 0x0000009e, 0x00000081, 0x000000f3, 0x000000d7, 0x000000fb, 0x0000007c, 0x000000e3, 0x00000039, 0x00000082, 0x0000009b, 0x0000002f, 0x000000ff, 0x00000087, 0x00000034, 0x0000008e, 0x00000043, 0x00000044, 0x000000c4, 0x000000de, 0x000000e9, 0x000000cb, 0x00000054, 0x0000007b, 0x00000094, 0x00000032, 0x000000a6, 0x000000c2, 0x00000023, 0x0000003d, 0x000000ee, 0x0000004c, 0x00000095, 0x0000000b, 0x00000042, 0x000000fa, 0x000000c3, 0x0000004e, 0x00000008, 0x0000002e, 0x000000a1, 0x00000066, 0x00000028, 0x000000d9, 0x00000024, 0x000000b2, 0x00000076, 0x0000005b, 0x000000a2, 0x00000049, 0x0000006d, 0x0000008b, 0x000000d1, 0x00000025, 0x00000072, 0x000000f8, 0x000000f6, 0x00000064, 0x00000086, 0x00000068, 0x00000098, 0x00000016, 0x000000d4, 0x000000a4, 0x0000005c, 0x000000cc, 0x0000005d, 0x00000065, 0x000000b6, 0x00000092, 0x0000006c, 0x00000070, 0x00000048, 0x00000050, 0x000000fd, 0x000000ed, 0x000000b9, 0x000000da, 0x0000005e, 0x00000015, 0x00000046, 0x00000057, 0x000000a7, 0x0000008d, 0x0000009d, 0x00000084, 0x00000090, 0x000000d8, 0x000000ab, 0x00000000, 0x0000008c, 0x000000bc, 0x000000d3, 0x0000000a, 0x000000f7, 0x000000e4, 0x00000058, 0x00000005, 0x000000b8, 0x000000b3, 0x00000045, 0x00000006, 0x000000d0, 0x0000002c, 0x0000001e, 0x0000008f, 0x000000ca, 0x0000003f, 0x0000000f, 0x00000002, 0x000000c1, 0x000000af, 0x000000bd, 0x00000003, 0x00000001, 0x00000013, 0x0000008a, 0x0000006b, 0x0000003a, 0x00000091, 0x00000011, 0x00000041, 0x0000004f, 0x00000067, 0x000000dc, 0x000000ea, 0x00000097, 0x000000f2, 0x000000cf, 0x000000ce, 0x000000f0, 0x000000b4, 0x000000e6, 0x00000073, 0x00000096, 0x000000ac, 0x00000074, 0x00000022, 0x000000e7, 0x000000ad, 0x00000035, 0x00000085, 0x000000e2, 0x000000f9, 0x00000037, 0x000000e8, 0x0000001c, 0x00000075, 0x000000df, 0x0000006e, 0x00000047, 0x000000f1, 0x0000001a, 0x00000071, 0x0000001d, 0x00000029, 0x000000c5, 0x00000089, 0x0000006f, 0x000000b7, 0x00000062, 0x0000000e, 0x000000aa, 0x00000018, 0x000000be, 0x0000001b, 0x000000fc, 0x00000056, 0x0000003e, 0x0000004b, 0x000000c6, 0x000000d2, 0x00000079, 0x00000020, 0x0000009a, 0x000000db, 0x000000c0, 0x000000fe, 0x00000078, 0x000000cd, 0x0000005a, 0x000000f4, 0x0000001f, 0x000000dd, 0x000000a8, 0x00000033, 0x00000088, 0x00000007, 0x000000c7, 0x00000031, 0x000000b1, 0x00000012, 0x00000010, 0x00000059, 0x00000027, 0x00000080, 0x000000ec, 0x0000005f, 0x00000060, 0x00000051, 0x0000007f, 0x000000a9, 0x00000019, 0x000000b5, 0x0000004a, 0x0000000d, 0x0000002d, 0x000000e5, 0x0000007a, 0x0000009f, 0x00000093, 0x000000c9, 0x0000009c, 0x000000ef, 0x000000a0, 0x000000e0, 0x0000003b, 0x0000004d, 0x000000ae, 0x0000002a, 0x000000f5, 0x000000b0, 0x000000c8, 0x000000eb, 0x000000bb, 0x0000003c, 0x00000083, 0x00000053, 0x00000099, 0x00000061, 0x00000017, 0x0000002b, 0x00000004, 0x0000007e, 0x000000ba, 0x00000077, 0x000000d6, 0x00000026, 0x000000e1, 0x00000069, 0x00000014, 0x00000063, 0x00000055, 0x00000021, 0x0000000c, 0x0000007d, }, { 0x00005200, 0x00000900, 0x00006a00, 0x0000d500, 0x00003000, 0x00003600, 0x0000a500, 0x00003800, 0x0000bf00, 0x00004000, 0x0000a300, 0x00009e00, 0x00008100, 0x0000f300, 0x0000d700, 0x0000fb00, 0x00007c00, 0x0000e300, 0x00003900, 0x00008200, 0x00009b00, 0x00002f00, 0x0000ff00, 0x00008700, 0x00003400, 0x00008e00, 0x00004300, 0x00004400, 0x0000c400, 0x0000de00, 0x0000e900, 0x0000cb00, 0x00005400, 0x00007b00, 0x00009400, 0x00003200, 0x0000a600, 0x0000c200, 0x00002300, 0x00003d00, 0x0000ee00, 0x00004c00, 0x00009500, 0x00000b00, 0x00004200, 0x0000fa00, 0x0000c300, 0x00004e00, 0x00000800, 0x00002e00, 0x0000a100, 0x00006600, 0x00002800, 0x0000d900, 0x00002400, 0x0000b200, 0x00007600, 0x00005b00, 0x0000a200, 0x00004900, 0x00006d00, 0x00008b00, 0x0000d100, 0x00002500, 0x00007200, 0x0000f800, 0x0000f600, 0x00006400, 0x00008600, 0x00006800, 0x00009800, 0x00001600, 0x0000d400, 0x0000a400, 0x00005c00, 0x0000cc00, 0x00005d00, 0x00006500, 0x0000b600, 0x00009200, 0x00006c00, 0x00007000, 0x00004800, 0x00005000, 0x0000fd00, 0x0000ed00, 0x0000b900, 0x0000da00, 0x00005e00, 0x00001500, 0x00004600, 0x00005700, 0x0000a700, 0x00008d00, 0x00009d00, 0x00008400, 0x00009000, 0x0000d800, 0x0000ab00, 0x00000000, 0x00008c00, 0x0000bc00, 0x0000d300, 0x00000a00, 0x0000f700, 0x0000e400, 0x00005800, 0x00000500, 0x0000b800, 0x0000b300, 0x00004500, 0x00000600, 0x0000d000, 0x00002c00, 0x00001e00, 0x00008f00, 0x0000ca00, 0x00003f00, 0x00000f00, 0x00000200, 0x0000c100, 0x0000af00, 0x0000bd00, 0x00000300, 0x00000100, 0x00001300, 0x00008a00, 0x00006b00, 0x00003a00, 0x00009100, 0x00001100, 0x00004100, 0x00004f00, 0x00006700, 0x0000dc00, 0x0000ea00, 0x00009700, 0x0000f200, 0x0000cf00, 0x0000ce00, 0x0000f000, 0x0000b400, 0x0000e600, 0x00007300, 0x00009600, 0x0000ac00, 0x00007400, 0x00002200, 0x0000e700, 0x0000ad00, 0x00003500, 0x00008500, 0x0000e200, 0x0000f900, 0x00003700, 0x0000e800, 0x00001c00, 0x00007500, 0x0000df00, 0x00006e00, 0x00004700, 0x0000f100, 0x00001a00, 0x00007100, 0x00001d00, 0x00002900, 0x0000c500, 0x00008900, 0x00006f00, 0x0000b700, 0x00006200, 0x00000e00, 0x0000aa00, 0x00001800, 0x0000be00, 0x00001b00, 0x0000fc00, 0x00005600, 0x00003e00, 0x00004b00, 0x0000c600, 0x0000d200, 0x00007900, 0x00002000, 0x00009a00, 0x0000db00, 0x0000c000, 0x0000fe00, 0x00007800, 0x0000cd00, 0x00005a00, 0x0000f400, 0x00001f00, 0x0000dd00, 0x0000a800, 0x00003300, 0x00008800, 0x00000700, 0x0000c700, 0x00003100, 0x0000b100, 0x00001200, 0x00001000, 0x00005900, 0x00002700, 0x00008000, 0x0000ec00, 0x00005f00, 0x00006000, 0x00005100, 0x00007f00, 0x0000a900, 0x00001900, 0x0000b500, 0x00004a00, 0x00000d00, 0x00002d00, 0x0000e500, 0x00007a00, 0x00009f00, 0x00009300, 0x0000c900, 0x00009c00, 0x0000ef00, 0x0000a000, 0x0000e000, 0x00003b00, 0x00004d00, 0x0000ae00, 0x00002a00, 0x0000f500, 0x0000b000, 0x0000c800, 0x0000eb00, 0x0000bb00, 0x00003c00, 0x00008300, 0x00005300, 0x00009900, 0x00006100, 0x00001700, 0x00002b00, 0x00000400, 0x00007e00, 0x0000ba00, 0x00007700, 0x0000d600, 0x00002600, 0x0000e100, 0x00006900, 0x00001400, 0x00006300, 0x00005500, 0x00002100, 0x00000c00, 0x00007d00, }, { 0x00520000, 0x00090000, 0x006a0000, 0x00d50000, 0x00300000, 0x00360000, 0x00a50000, 0x00380000, 0x00bf0000, 0x00400000, 0x00a30000, 0x009e0000, 0x00810000, 0x00f30000, 0x00d70000, 0x00fb0000, 0x007c0000, 0x00e30000, 0x00390000, 0x00820000, 0x009b0000, 0x002f0000, 0x00ff0000, 0x00870000, 0x00340000, 0x008e0000, 0x00430000, 0x00440000, 0x00c40000, 0x00de0000, 0x00e90000, 0x00cb0000, 0x00540000, 0x007b0000, 0x00940000, 0x00320000, 0x00a60000, 0x00c20000, 0x00230000, 0x003d0000, 0x00ee0000, 0x004c0000, 0x00950000, 0x000b0000, 0x00420000, 0x00fa0000, 0x00c30000, 0x004e0000, 0x00080000, 0x002e0000, 0x00a10000, 0x00660000, 0x00280000, 0x00d90000, 0x00240000, 0x00b20000, 0x00760000, 0x005b0000, 0x00a20000, 0x00490000, 0x006d0000, 0x008b0000, 0x00d10000, 0x00250000, 0x00720000, 0x00f80000, 0x00f60000, 0x00640000, 0x00860000, 0x00680000, 0x00980000, 0x00160000, 0x00d40000, 0x00a40000, 0x005c0000, 0x00cc0000, 0x005d0000, 0x00650000, 0x00b60000, 0x00920000, 0x006c0000, 0x00700000, 0x00480000, 0x00500000, 0x00fd0000, 0x00ed0000, 0x00b90000, 0x00da0000, 0x005e0000, 0x00150000, 0x00460000, 0x00570000, 0x00a70000, 0x008d0000, 0x009d0000, 0x00840000, 0x00900000, 0x00d80000, 0x00ab0000, 0x00000000, 0x008c0000, 0x00bc0000, 0x00d30000, 0x000a0000, 0x00f70000, 0x00e40000, 0x00580000, 0x00050000, 0x00b80000, 0x00b30000, 0x00450000, 0x00060000, 0x00d00000, 0x002c0000, 0x001e0000, 0x008f0000, 0x00ca0000, 0x003f0000, 0x000f0000, 0x00020000, 0x00c10000, 0x00af0000, 0x00bd0000, 0x00030000, 0x00010000, 0x00130000, 0x008a0000, 0x006b0000, 0x003a0000, 0x00910000, 0x00110000, 0x00410000, 0x004f0000, 0x00670000, 0x00dc0000, 0x00ea0000, 0x00970000, 0x00f20000, 0x00cf0000, 0x00ce0000, 0x00f00000, 0x00b40000, 0x00e60000, 0x00730000, 0x00960000, 0x00ac0000, 0x00740000, 0x00220000, 0x00e70000, 0x00ad0000, 0x00350000, 0x00850000, 0x00e20000, 0x00f90000, 0x00370000, 0x00e80000, 0x001c0000, 0x00750000, 0x00df0000, 0x006e0000, 0x00470000, 0x00f10000, 0x001a0000, 0x00710000, 0x001d0000, 0x00290000, 0x00c50000, 0x00890000, 0x006f0000, 0x00b70000, 0x00620000, 0x000e0000, 0x00aa0000, 0x00180000, 0x00be0000, 0x001b0000, 0x00fc0000, 0x00560000, 0x003e0000, 0x004b0000, 0x00c60000, 0x00d20000, 0x00790000, 0x00200000, 0x009a0000, 0x00db0000, 0x00c00000, 0x00fe0000, 0x00780000, 0x00cd0000, 0x005a0000, 0x00f40000, 0x001f0000, 0x00dd0000, 0x00a80000, 0x00330000, 0x00880000, 0x00070000, 0x00c70000, 0x00310000, 0x00b10000, 0x00120000, 0x00100000, 0x00590000, 0x00270000, 0x00800000, 0x00ec0000, 0x005f0000, 0x00600000, 0x00510000, 0x007f0000, 0x00a90000, 0x00190000, 0x00b50000, 0x004a0000, 0x000d0000, 0x002d0000, 0x00e50000, 0x007a0000, 0x009f0000, 0x00930000, 0x00c90000, 0x009c0000, 0x00ef0000, 0x00a00000, 0x00e00000, 0x003b0000, 0x004d0000, 0x00ae0000, 0x002a0000, 0x00f50000, 0x00b00000, 0x00c80000, 0x00eb0000, 0x00bb0000, 0x003c0000, 0x00830000, 0x00530000, 0x00990000, 0x00610000, 0x00170000, 0x002b0000, 0x00040000, 0x007e0000, 0x00ba0000, 0x00770000, 0x00d60000, 0x00260000, 0x00e10000, 0x00690000, 0x00140000, 0x00630000, 0x00550000, 0x00210000, 0x000c0000, 0x007d0000, }, { 0x52000000, 0x09000000, 0x6a000000, 0xd5000000, 0x30000000, 0x36000000, 0xa5000000, 0x38000000, 0xbf000000, 0x40000000, 0xa3000000, 0x9e000000, 0x81000000, 0xf3000000, 0xd7000000, 0xfb000000, 0x7c000000, 0xe3000000, 0x39000000, 0x82000000, 0x9b000000, 0x2f000000, 0xff000000, 0x87000000, 0x34000000, 0x8e000000, 0x43000000, 0x44000000, 0xc4000000, 0xde000000, 0xe9000000, 0xcb000000, 0x54000000, 0x7b000000, 0x94000000, 0x32000000, 0xa6000000, 0xc2000000, 0x23000000, 0x3d000000, 0xee000000, 0x4c000000, 0x95000000, 0x0b000000, 0x42000000, 0xfa000000, 0xc3000000, 0x4e000000, 0x08000000, 0x2e000000, 0xa1000000, 0x66000000, 0x28000000, 0xd9000000, 0x24000000, 0xb2000000, 0x76000000, 0x5b000000, 0xa2000000, 0x49000000, 0x6d000000, 0x8b000000, 0xd1000000, 0x25000000, 0x72000000, 0xf8000000, 0xf6000000, 0x64000000, 0x86000000, 0x68000000, 0x98000000, 0x16000000, 0xd4000000, 0xa4000000, 0x5c000000, 0xcc000000, 0x5d000000, 0x65000000, 0xb6000000, 0x92000000, 0x6c000000, 0x70000000, 0x48000000, 0x50000000, 0xfd000000, 0xed000000, 0xb9000000, 0xda000000, 0x5e000000, 0x15000000, 0x46000000, 0x57000000, 0xa7000000, 0x8d000000, 0x9d000000, 0x84000000, 0x90000000, 0xd8000000, 0xab000000, 0x00000000, 0x8c000000, 0xbc000000, 0xd3000000, 0x0a000000, 0xf7000000, 0xe4000000, 0x58000000, 0x05000000, 0xb8000000, 0xb3000000, 0x45000000, 0x06000000, 0xd0000000, 0x2c000000, 0x1e000000, 0x8f000000, 0xca000000, 0x3f000000, 0x0f000000, 0x02000000, 0xc1000000, 0xaf000000, 0xbd000000, 0x03000000, 0x01000000, 0x13000000, 0x8a000000, 0x6b000000, 0x3a000000, 0x91000000, 0x11000000, 0x41000000, 0x4f000000, 0x67000000, 0xdc000000, 0xea000000, 0x97000000, 0xf2000000, 0xcf000000, 0xce000000, 0xf0000000, 0xb4000000, 0xe6000000, 0x73000000, 0x96000000, 0xac000000, 0x74000000, 0x22000000, 0xe7000000, 0xad000000, 0x35000000, 0x85000000, 0xe2000000, 0xf9000000, 0x37000000, 0xe8000000, 0x1c000000, 0x75000000, 0xdf000000, 0x6e000000, 0x47000000, 0xf1000000, 0x1a000000, 0x71000000, 0x1d000000, 0x29000000, 0xc5000000, 0x89000000, 0x6f000000, 0xb7000000, 0x62000000, 0x0e000000, 0xaa000000, 0x18000000, 0xbe000000, 0x1b000000, 0xfc000000, 0x56000000, 0x3e000000, 0x4b000000, 0xc6000000, 0xd2000000, 0x79000000, 0x20000000, 0x9a000000, 0xdb000000, 0xc0000000, 0xfe000000, 0x78000000, 0xcd000000, 0x5a000000, 0xf4000000, 0x1f000000, 0xdd000000, 0xa8000000, 0x33000000, 0x88000000, 0x07000000, 0xc7000000, 0x31000000, 0xb1000000, 0x12000000, 0x10000000, 0x59000000, 0x27000000, 0x80000000, 0xec000000, 0x5f000000, 0x60000000, 0x51000000, 0x7f000000, 0xa9000000, 0x19000000, 0xb5000000, 0x4a000000, 0x0d000000, 0x2d000000, 0xe5000000, 0x7a000000, 0x9f000000, 0x93000000, 0xc9000000, 0x9c000000, 0xef000000, 0xa0000000, 0xe0000000, 0x3b000000, 0x4d000000, 0xae000000, 0x2a000000, 0xf5000000, 0xb0000000, 0xc8000000, 0xeb000000, 0xbb000000, 0x3c000000, 0x83000000, 0x53000000, 0x99000000, 0x61000000, 0x17000000, 0x2b000000, 0x04000000, 0x7e000000, 0xba000000, 0x77000000, 0xd6000000, 0x26000000, 0xe1000000, 0x69000000, 0x14000000, 0x63000000, 0x55000000, 0x21000000, 0x0c000000, 0x7d000000, } }; EXPORT_SYMBOL_GPL(crypto_ft_tab); EXPORT_SYMBOL_GPL(crypto_it_tab); /** * crypto_aes_set_key - Set the AES key. * @tfm: The %crypto_tfm that is used in the context. * @in_key: The input key. * @key_len: The size of the key. * * This function uses aes_expand_key() to expand the key. &crypto_aes_ctx * _must_ be the private data embedded in @tfm which is retrieved with * crypto_tfm_ctx(). * * Return: 0 on success; -EINVAL on failure (only happens for bad key lengths) */ int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); return aes_expandkey(ctx, in_key, key_len); } EXPORT_SYMBOL_GPL(crypto_aes_set_key); /* encrypt a block of text */ #define f_rn(bo, bi, n, k) do { \ bo[n] = crypto_ft_tab[0][byte(bi[n], 0)] ^ \ crypto_ft_tab[1][byte(bi[(n + 1) & 3], 1)] ^ \ crypto_ft_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_ft_tab[3][byte(bi[(n + 3) & 3], 3)] ^ *(k + n); \ } while (0) #define f_nround(bo, bi, k) do {\ f_rn(bo, bi, 0, k); \ f_rn(bo, bi, 1, k); \ f_rn(bo, bi, 2, k); \ f_rn(bo, bi, 3, k); \ k += 4; \ } while (0) #define f_rl(bo, bi, n, k) do { \ bo[n] = crypto_fl_tab[0][byte(bi[n], 0)] ^ \ crypto_fl_tab[1][byte(bi[(n + 1) & 3], 1)] ^ \ crypto_fl_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_fl_tab[3][byte(bi[(n + 3) & 3], 3)] ^ *(k + n); \ } while (0) #define f_lround(bo, bi, k) do {\ f_rl(bo, bi, 0, k); \ f_rl(bo, bi, 1, k); \ f_rl(bo, bi, 2, k); \ f_rl(bo, bi, 3, k); \ } while (0) static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); u32 b0[4], b1[4]; const u32 *kp = ctx->key_enc + 4; const int key_len = ctx->key_length; b0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in); b0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4); b0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8); b0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12); if (key_len > 24) { f_nround(b1, b0, kp); f_nround(b0, b1, kp); } if (key_len > 16) { f_nround(b1, b0, kp); f_nround(b0, b1, kp); } f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_nround(b0, b1, kp); f_nround(b1, b0, kp); f_lround(b0, b1, kp); put_unaligned_le32(b0[0], out); put_unaligned_le32(b0[1], out + 4); put_unaligned_le32(b0[2], out + 8); put_unaligned_le32(b0[3], out + 12); } /* decrypt a block of text */ #define i_rn(bo, bi, n, k) do { \ bo[n] = crypto_it_tab[0][byte(bi[n], 0)] ^ \ crypto_it_tab[1][byte(bi[(n + 3) & 3], 1)] ^ \ crypto_it_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_it_tab[3][byte(bi[(n + 1) & 3], 3)] ^ *(k + n); \ } while (0) #define i_nround(bo, bi, k) do {\ i_rn(bo, bi, 0, k); \ i_rn(bo, bi, 1, k); \ i_rn(bo, bi, 2, k); \ i_rn(bo, bi, 3, k); \ k += 4; \ } while (0) #define i_rl(bo, bi, n, k) do { \ bo[n] = crypto_il_tab[0][byte(bi[n], 0)] ^ \ crypto_il_tab[1][byte(bi[(n + 3) & 3], 1)] ^ \ crypto_il_tab[2][byte(bi[(n + 2) & 3], 2)] ^ \ crypto_il_tab[3][byte(bi[(n + 1) & 3], 3)] ^ *(k + n); \ } while (0) #define i_lround(bo, bi, k) do {\ i_rl(bo, bi, 0, k); \ i_rl(bo, bi, 1, k); \ i_rl(bo, bi, 2, k); \ i_rl(bo, bi, 3, k); \ } while (0) static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); u32 b0[4], b1[4]; const int key_len = ctx->key_length; const u32 *kp = ctx->key_dec + 4; b0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in); b0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4); b0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8); b0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12); if (key_len > 24) { i_nround(b1, b0, kp); i_nround(b0, b1, kp); } if (key_len > 16) { i_nround(b1, b0, kp); i_nround(b0, b1, kp); } i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_nround(b0, b1, kp); i_nround(b1, b0, kp); i_lround(b0, b1, kp); put_unaligned_le32(b0[0], out); put_unaligned_le32(b0[1], out + 4); put_unaligned_le32(b0[2], out + 8); put_unaligned_le32(b0[3], out + 12); } static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_driver_name = "aes-generic", .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, .cia_setkey = crypto_aes_set_key, .cia_encrypt = crypto_aes_encrypt, .cia_decrypt = crypto_aes_decrypt } } }; static int __init aes_init(void) { return crypto_register_alg(&aes_alg); } static void __exit aes_fini(void) { crypto_unregister_alg(&aes_alg); } subsys_initcall(aes_init); module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_CRYPTO("aes"); MODULE_ALIAS_CRYPTO("aes-generic");
153 153 153 127 127 125 5 73 15 4 38 36 2 545 546 542 542 63 1 52 49 540 543 546 34 8 20 6 28 4 6 13 12 8 122 121 121 121 36 34 122 45 1 49 49 47 46 40 1 2 1 1 37 37 33 2 37 4 17 8 8 8 9 14 2 33 10 112 8 73 59 34 221 1 1 1 129 11 11 83 154 25 53 34 171 171 167 5 171 171 2 32 2 3 197 197 96 63 32 98 10 198 197 40 157 181 172 40 127 12 2 11 131 171 34 215 7 5 12 23 24 19 2 16 1 17 17 36 1 4 6 2 26 2 24 7 19 11 14 19 7 32 23 9 1 3 1 1 1 1 1 320 316 3 1 1 6 68 59 8 18 1 2 1 14 2 10 10 7 2 6 7 6 10 10 10 10 7 10 5 5 3 80 80 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * RAW - implementation of IP "raw" sockets. * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * Fixes: * Alan Cox : verify_area() fixed up * Alan Cox : ICMP error handling * Alan Cox : EMSGSIZE if you send too big a packet * Alan Cox : Now uses generic datagrams and shared * skbuff library. No more peek crashes, * no more backlogs * Alan Cox : Checks sk->broadcast. * Alan Cox : Uses skb_free_datagram/skb_copy_datagram * Alan Cox : Raw passes ip options too * Alan Cox : Setsocketopt added * Alan Cox : Fixed error return for broadcasts * Alan Cox : Removed wake_up calls * Alan Cox : Use ttl/tos * Alan Cox : Cleaned up old debugging * Alan Cox : Use new kernel side addresses * Arnt Gulbrandsen : Fixed MSG_DONTROUTE in raw sockets. * Alan Cox : BSD style RAW socket demultiplexing. * Alan Cox : Beginnings of mrouted support. * Alan Cox : Added IP_HDRINCL option. * Alan Cox : Skip broadcast check if BSDism set. * David S. Miller : New socket lookup architecture. */ #include <linux/types.h> #include <linux/atomic.h> #include <asm/byteorder.h> #include <asm/current.h> #include <linux/uaccess.h> #include <asm/ioctls.h> #include <linux/stddef.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/spinlock.h> #include <linux/sockios.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/mroute.h> #include <linux/netdevice.h> #include <linux/in_route.h> #include <linux/route.h> #include <linux/skbuff.h> #include <linux/igmp.h> #include <net/net_namespace.h> #include <net/dst.h> #include <net/sock.h> #include <linux/ip.h> #include <linux/net.h> #include <net/ip.h> #include <net/icmp.h> #include <net/udp.h> #include <net/raw.h> #include <net/snmp.h> #include <net/tcp_states.h> #include <net/inet_common.h> #include <net/checksum.h> #include <net/xfrm.h> #include <linux/rtnetlink.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/compat.h> #include <linux/uio.h> struct raw_frag_vec { struct msghdr *msg; union { struct icmphdr icmph; char c[1]; } hdr; int hlen; }; struct raw_hashinfo raw_v4_hashinfo; EXPORT_SYMBOL_GPL(raw_v4_hashinfo); int raw_hash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; struct hlist_head *hlist; hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)]; spin_lock(&h->lock); sk_add_node_rcu(sk, hlist); sock_set_flag(sk, SOCK_RCU_FREE); spin_unlock(&h->lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); return 0; } EXPORT_SYMBOL_GPL(raw_hash_sk); void raw_unhash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; spin_lock(&h->lock); if (sk_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock(&h->lock); } EXPORT_SYMBOL_GPL(raw_unhash_sk); bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif, int sdif) { const struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && inet->inet_num == num && !(inet->inet_daddr && inet->inet_daddr != raddr) && !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return true; return false; } EXPORT_SYMBOL_GPL(raw_v4_match); /* * 0 - deliver * 1 - block */ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { struct icmphdr _hdr; const struct icmphdr *hdr; hdr = skb_header_pointer(skb, skb_transport_offset(skb), sizeof(_hdr), &_hdr); if (!hdr) return 1; if (hdr->type < 32) { __u32 data = raw_sk(sk)->filter.data; return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ return 0; } /* IP input processing comes here for RAW socket delivery. * Caller owns SKB, so we must make clones. * * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ static int raw_v4_input(struct net *net, struct sk_buff *skb, const struct iphdr *iph, int hash) { int sdif = inet_sdif(skb); struct hlist_head *hlist; int dif = inet_iif(skb); int delivered = 0; struct sock *sk; hlist = &raw_v4_hashinfo.ht[hash]; rcu_read_lock(); sk_for_each_rcu(sk, hlist) { if (!raw_v4_match(net, sk, iph->protocol, iph->saddr, iph->daddr, dif, sdif)) continue; if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { atomic_inc(&sk->sk_drops); continue; } delivered = 1; if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && ip_mc_sf_allow(sk, iph->daddr, iph->saddr, skb->dev->ifindex, sdif)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ if (clone) raw_rcv(sk, clone); } } rcu_read_unlock(); return delivered; } int raw_local_deliver(struct sk_buff *skb, int protocol) { struct net *net = dev_net(skb->dev); return raw_v4_input(net, skb, ip_hdr(skb), raw_hashfunc(net, protocol)); } static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) { struct inet_sock *inet = inet_sk(sk); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; int harderr = 0; bool recverr; int err = 0; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) ipv4_sk_update_pmtu(skb, sk, info); else if (type == ICMP_REDIRECT) { ipv4_sk_redirect(skb, sk); return; } /* Report error on raw socket, if: 1. User requested ip_recverr. 2. Socket is connected (otherwise the error indication is useless without ip_recverr and error is hard. */ recverr = inet_test_bit(RECVERR, sk); if (!recverr && sk->sk_state != TCP_ESTABLISHED) return; switch (type) { default: case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; break; case ICMP_SOURCE_QUENCH: return; case ICMP_PARAMETERPROB: err = EPROTO; harderr = 1; break; case ICMP_DEST_UNREACH: err = EHOSTUNREACH; if (code > NR_ICMP_UNREACH) break; if (code == ICMP_FRAG_NEEDED) { harderr = READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT; err = EMSGSIZE; } else { err = icmp_err_convert[code].errno; harderr = icmp_err_convert[code].fatal; } } if (recverr) { const struct iphdr *iph = (const struct iphdr *)skb->data; u8 *payload = skb->data + (iph->ihl << 2); if (inet_test_bit(HDRINCL, sk)) payload = skb->data; ip_icmp_error(sk, skb, err, 0, info, payload); } if (recverr || harderr) { sk->sk_err = err; sk_error_report(sk); } } void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) { struct net *net = dev_net(skb->dev); int dif = skb->dev->ifindex; int sdif = inet_sdif(skb); struct hlist_head *hlist; const struct iphdr *iph; struct sock *sk; int hash; hash = raw_hashfunc(net, protocol); hlist = &raw_v4_hashinfo.ht[hash]; rcu_read_lock(); sk_for_each_rcu(sk, hlist) { iph = (const struct iphdr *)skb->data; if (!raw_v4_match(net, sk, iph->protocol, iph->daddr, iph->saddr, dif, sdif)) continue; raw_err(sk, skb, info); } rcu_read_unlock(); } static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason reason; /* Charge it to the socket. */ ipv4_pktinfo_prepare(sk, skb, true); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { sk_skb_reason_drop(sk, skb, reason); return NET_RX_DROP; } return NET_RX_SUCCESS; } int raw_rcv(struct sock *sk, struct sk_buff *skb) { if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { atomic_inc(&sk->sk_drops); sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } nf_reset_ct(skb); skb_push(skb, -skb_network_offset(skb)); raw_rcv_skb(sk, skb); return 0; } static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, struct msghdr *msg, size_t length, struct rtable **rtp, unsigned int flags, const struct sockcm_cookie *sockc) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); struct iphdr *iph; struct sk_buff *skb; unsigned int iphlen; int err; struct rtable *rt = *rtp; int hlen, tlen; if (length > rt->dst.dev->mtu) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, rt->dst.dev->mtu); return -EMSGSIZE; } if (length < sizeof(struct iphdr)) return -EINVAL; if (flags&MSG_PROBE) goto out; hlen = LL_RESERVED_SPACE(rt->dst.dev); tlen = rt->dst.dev->needed_tailroom; skb = sock_alloc_send_skb(sk, length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); if (!skb) goto error; skb_reserve(skb, hlen); skb->protocol = htons(ETH_P_IP); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_dst_set(skb, &rt->dst); *rtp = NULL; skb_reset_network_header(skb); iph = ip_hdr(skb); skb_put(skb, length); skb->ip_summed = CHECKSUM_NONE; skb_setup_tx_timestamp(skb, sockc->tsflags); if (flags & MSG_CONFIRM) skb_set_dst_pending_confirm(skb, 1); skb->transport_header = skb->network_header; err = -EFAULT; if (memcpy_from_msg(iph, msg, length)) goto error_free; iphlen = iph->ihl * 4; /* * We don't want to modify the ip header, but we do need to * be sure that it won't cause problems later along the network * stack. Specifically we want to make sure that iph->ihl is a * sane value. If ihl points beyond the length of the buffer passed * in, reject the frame as invalid */ err = -EINVAL; if (iphlen > length) goto error_free; if (iphlen >= sizeof(*iph)) { if (!iph->saddr) iph->saddr = fl4->saddr; iph->check = 0; iph->tot_len = htons(length); if (!iph->id) ip_select_ident(net, skb, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); skb->transport_header += iphlen; if (iph->protocol == IPPROTO_ICMP && length >= iphlen + sizeof(struct icmphdr)) icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); } err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); if (err) goto error; out: return 0; error_free: kfree_skb(skb); error: IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk)) err = 0; return err; } static int raw_probe_proto_opt(struct raw_frag_vec *rfv, struct flowi4 *fl4) { int err; if (fl4->flowi4_proto != IPPROTO_ICMP) return 0; /* We only need the first two bytes. */ rfv->hlen = 2; err = memcpy_from_msg(rfv->hdr.c, rfv->msg, rfv->hlen); if (err) return err; fl4->fl4_icmp_type = rfv->hdr.icmph.type; fl4->fl4_icmp_code = rfv->hdr.icmph.code; return 0; } static int raw_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct raw_frag_vec *rfv = from; if (offset < rfv->hlen) { int copy = min(rfv->hlen - offset, len); if (skb->ip_summed == CHECKSUM_PARTIAL) memcpy(to, rfv->hdr.c + offset, copy); else skb->csum = csum_block_add( skb->csum, csum_partial_copy_nocheck(rfv->hdr.c + offset, to, copy), odd); odd = 0; offset += copy; to += copy; len -= copy; if (!len) return 0; } offset -= rfv->hlen; return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); struct ipcm_cookie ipc; struct rtable *rt = NULL; struct flowi4 fl4; u8 tos, scope; int free = 0; __be32 daddr; __be32 saddr; int uc_index, err; struct ip_options_data opt_copy; struct raw_frag_vec rfv; int hdrincl; err = -EMSGSIZE; if (len > 0xFFFF) goto out; hdrincl = inet_test_bit(HDRINCL, sk); /* * Check the flags. */ err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */ goto out; /* compatibility */ /* * Get and verify the address. */ if (msg->msg_namelen) { DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); err = -EINVAL; if (msg->msg_namelen < sizeof(*usin)) goto out; if (usin->sin_family != AF_INET) { pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n", __func__, current->comm); err = -EAFNOSUPPORT; if (usin->sin_family) goto out; } daddr = usin->sin_addr.s_addr; /* ANK: I did not forget to get protocol from port field. * I just do not know, who uses this weirdness. * IP_HDRINCL is much more convenient. */ } else { err = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) goto out; daddr = inet->inet_daddr; } ipcm_init_sk(&ipc, inet); /* Keep backward compat */ if (hdrincl) ipc.protocol = IPPROTO_RAW; if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, false); if (unlikely(err)) { kfree(ipc.opt); goto out; } if (ipc.opt) free = 1; } saddr = ipc.addr; ipc.addr = daddr; if (!ipc.opt) { struct ip_options_rcu *inet_opt; rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt) { memcpy(&opt_copy, inet_opt, sizeof(*inet_opt) + inet_opt->opt.optlen); ipc.opt = &opt_copy.opt; } rcu_read_unlock(); } if (ipc.opt) { err = -EINVAL; /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ if (hdrincl) goto done; if (ipc.opt->opt.srr) { if (!daddr) goto done; daddr = ipc.opt->opt.faddr; } } tos = get_rttos(&ipc, inet); scope = ip_sendmsg_scope(inet, &ipc, msg); uc_index = READ_ONCE(inet->uc_index); if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) ipc.oif = READ_ONCE(inet->mc_index); if (!saddr) saddr = READ_ONCE(inet->mc_addr); } else if (!ipc.oif) { ipc.oif = uc_index; } else if (ipv4_is_lbcast(daddr) && uc_index) { /* oif is set, packet is to local broadcast * and uc_index is set. oif is most likely set * by sk_bound_dev_if. If uc_index != oif check if the * oif is an L3 master and uc_index is an L3 slave. * If so, we want to allow the send using the uc_index. */ if (ipc.oif != uc_index && ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk), uc_index)) { ipc.oif = uc_index; } } flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope, hdrincl ? ipc.protocol : sk->sk_protocol, inet_sk_flowi_flags(sk) | (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sk->sk_uid); fl4.fl4_icmp_type = 0; fl4.fl4_icmp_code = 0; if (!hdrincl) { rfv.msg = msg; rfv.hlen = 0; err = raw_probe_proto_opt(&rfv, &fl4); if (err) goto done; } security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto done; } err = -EACCES; if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) goto done; if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; back_from_confirm: if (hdrincl) err = raw_send_hdrinc(sk, &fl4, msg, len, &rt, msg->msg_flags, &ipc.sockc); else { if (!ipc.addr) ipc.addr = fl4.daddr; lock_sock(sk); err = ip_append_data(sk, &fl4, raw_getfrag, &rfv, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) { err = ip_push_pending_frames(sk, &fl4); if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk)) err = 0; } release_sock(sk); } done: if (free) kfree(ipc.opt); ip_rt_put(rt); out: if (err < 0) return err; return len; do_confirm: if (msg->msg_flags & MSG_PROBE) dst_confirm_neigh(&rt->dst, &fl4.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; goto done; } static void raw_close(struct sock *sk, long timeout) { /* * Raw sockets may have direct kernel references. Kill them. */ ip_ra_control(sk, 0, NULL); sk_common_release(sk); } static void raw_destroy(struct sock *sk) { lock_sock(sk); ip_flush_pending_frames(sk); release_sock(sk); } /* This gets rid of all the nasties in af_inet. -DaveM */ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; struct net *net = sock_net(sk); u32 tb_id = RT_TABLE_LOCAL; int ret = -EINVAL; int chk_addr_ret; lock_sock(sk); if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; if (sk->sk_bound_dev_if) tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id; chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); ret = -EADDRNOTAVAIL; if (!inet_addr_valid_or_nonlocal(net, inet, addr->sin_addr.s_addr, chk_addr_ret)) goto out; inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; out: release_sock(sk); return ret; } /* * This should be easy, if there is something there * we return it, otherwise we block. */ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; int err = -EOPNOTSUPP; DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct sk_buff *skb; if (flags & MSG_OOB) goto out; if (flags & MSG_ERRQUEUE) { err = ip_recv_error(sk, msg, len, addr_len); goto out; } skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto done; sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (sin) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); *addr_len = sizeof(*sin); } if (inet_cmsg_flags(inet)) ip_cmsg_recv(msg, skb); if (flags & MSG_TRUNC) copied = skb->len; done: skb_free_datagram(sk, skb); out: if (err) return err; return copied; } static int raw_sk_init(struct sock *sk) { struct raw_sock *rp = raw_sk(sk); if (inet_sk(sk)->inet_num == IPPROTO_ICMP) memset(&rp->filter, 0, sizeof(rp->filter)); return 0; } static int raw_seticmpfilter(struct sock *sk, sockptr_t optval, int optlen) { if (optlen > sizeof(struct icmp_filter)) optlen = sizeof(struct icmp_filter); if (copy_from_sockptr(&raw_sk(sk)->filter, optval, optlen)) return -EFAULT; return 0; } static int raw_geticmpfilter(struct sock *sk, char __user *optval, int __user *optlen) { int len, ret = -EFAULT; if (get_user(len, optlen)) goto out; ret = -EINVAL; if (len < 0) goto out; if (len > sizeof(struct icmp_filter)) len = sizeof(struct icmp_filter); ret = -EFAULT; if (put_user(len, optlen) || copy_to_user(optval, &raw_sk(sk)->filter, len)) goto out; ret = 0; out: return ret; } static int do_raw_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { if (optname == ICMP_FILTER) { if (inet_sk(sk)->inet_num != IPPROTO_ICMP) return -EOPNOTSUPP; else return raw_seticmpfilter(sk, optval, optlen); } return -ENOPROTOOPT; } static int raw_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { if (level != SOL_RAW) return ip_setsockopt(sk, level, optname, optval, optlen); return do_raw_setsockopt(sk, optname, optval, optlen); } static int do_raw_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { if (optname == ICMP_FILTER) { if (inet_sk(sk)->inet_num != IPPROTO_ICMP) return -EOPNOTSUPP; else return raw_geticmpfilter(sk, optval, optlen); } return -ENOPROTOOPT; } static int raw_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (level != SOL_RAW) return ip_getsockopt(sk, level, optname, optval, optlen); return do_raw_getsockopt(sk, optname, optval, optlen); } static int raw_ioctl(struct sock *sk, int cmd, int *karg) { switch (cmd) { case SIOCOUTQ: { *karg = sk_wmem_alloc_get(sk); return 0; } case SIOCINQ: { struct sk_buff *skb; spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) *karg = skb->len; else *karg = 0; spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; } default: #ifdef CONFIG_IP_MROUTE return ipmr_ioctl(sk, cmd, karg); #else return -ENOIOCTLCMD; #endif } } #ifdef CONFIG_COMPAT static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { switch (cmd) { case SIOCOUTQ: case SIOCINQ: return -ENOIOCTLCMD; default: #ifdef CONFIG_IP_MROUTE return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg)); #else return -ENOIOCTLCMD; #endif } } #endif int raw_abort(struct sock *sk, int err) { lock_sock(sk); sk->sk_err = err; sk_error_report(sk); __udp_disconnect(sk, 0); release_sock(sk); return 0; } EXPORT_SYMBOL_GPL(raw_abort); struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, .close = raw_close, .destroy = raw_destroy, .connect = ip4_datagram_connect, .disconnect = __udp_disconnect, .ioctl = raw_ioctl, .init = raw_sk_init, .setsockopt = raw_setsockopt, .getsockopt = raw_getsockopt, .sendmsg = raw_sendmsg, .recvmsg = raw_recvmsg, .bind = raw_bind, .backlog_rcv = raw_rcv_skb, .release_cb = ip4_datagram_release_cb, .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw_sock), .useroffset = offsetof(struct raw_sock, filter), .usersize = sizeof_field(struct raw_sock, filter), .h.raw_hash = &raw_v4_hashinfo, #ifdef CONFIG_COMPAT .compat_ioctl = compat_raw_ioctl, #endif .diag_destroy = raw_abort, }; #ifdef CONFIG_PROC_FS static struct sock *raw_get_first(struct seq_file *seq, int bucket) { struct raw_hashinfo *h = pde_data(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); struct hlist_head *hlist; struct sock *sk; for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { hlist = &h->ht[state->bucket]; sk_for_each(sk, hlist) { if (sock_net(sk) == seq_file_net(seq)) return sk; } } return NULL; } static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) { struct raw_iter_state *state = raw_seq_private(seq); do { sk = sk_next(sk); } while (sk && sock_net(sk) != seq_file_net(seq)); if (!sk) return raw_get_first(seq, state->bucket + 1); return sk; } static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) { struct sock *sk = raw_get_first(seq, 0); if (sk) while (pos && (sk = raw_get_next(seq, sk)) != NULL) --pos; return pos ? NULL : sk; } void *raw_seq_start(struct seq_file *seq, loff_t *pos) __acquires(&h->lock) { struct raw_hashinfo *h = pde_data(file_inode(seq->file)); spin_lock(&h->lock); return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } EXPORT_SYMBOL_GPL(raw_seq_start); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; if (v == SEQ_START_TOKEN) sk = raw_get_first(seq, 0); else sk = raw_get_next(seq, v); ++*pos; return sk; } EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) __releases(&h->lock) { struct raw_hashinfo *h = pde_data(file_inode(seq->file)); spin_unlock(&h->lock); } EXPORT_SYMBOL_GPL(raw_seq_stop); static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) { struct inet_sock *inet = inet_sk(sp); __be32 dest = inet->inet_daddr, src = inet->inet_rcv_saddr; __u16 destp = 0, srcp = inet->inet_num; seq_printf(seq, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n", i, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 0, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } static int raw_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops\n"); else raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); return 0; } static const struct seq_operations raw_seq_ops = { .start = raw_seq_start, .next = raw_seq_next, .stop = raw_seq_stop, .show = raw_seq_show, }; static __net_init int raw_init_net(struct net *net) { if (!proc_create_net_data("raw", 0444, net->proc_net, &raw_seq_ops, sizeof(struct raw_iter_state), &raw_v4_hashinfo)) return -ENOMEM; return 0; } static __net_exit void raw_exit_net(struct net *net) { remove_proc_entry("raw", net->proc_net); } static __net_initdata struct pernet_operations raw_net_ops = { .init = raw_init_net, .exit = raw_exit_net, }; int __init raw_proc_init(void) { return register_pernet_subsys(&raw_net_ops); } void __init raw_proc_exit(void) { unregister_pernet_subsys(&raw_net_ops); } #endif /* CONFIG_PROC_FS */ static void raw_sysctl_init_net(struct net *net) { #ifdef CONFIG_NET_L3_MASTER_DEV net->ipv4.sysctl_raw_l3mdev_accept = 1; #endif } static int __net_init raw_sysctl_init(struct net *net) { raw_sysctl_init_net(net); return 0; } static struct pernet_operations __net_initdata raw_sysctl_ops = { .init = raw_sysctl_init, }; void __init raw_init(void) { raw_sysctl_init_net(&init_net); if (register_pernet_subsys(&raw_sysctl_ops)) panic("RAW: failed to init sysctl parameters.\n"); }
10 91 17 5 5 5 5 2 1 6 80 80 6 2 4 81 81 2 1 81 17 18 18 18 1 7 27 13 13 1 1 11 4 4 2 1 1 27 19 8 1 7 7 2 7 5 4 2 7 4 7 1 6 3 5 7 19 10 4 7 1 7 6 30 30 1 2 13 11 8 7 2 4 6 23 23 2 23 13 11 23 23 12 12 12 11 1 1 1 24 5 19 19 1 1 1 1 19 7 14 19 19 19 19 19 19 13 1 12 7 2 5 3 9 1 3 4 4 9 3 3 3 3 3 79 3 3 69 53 4 7 1 1 2 1 30 2 4 1 6 14 3 19 1 10 1 1 2 1 1 1 2 2 1 1 18 12 3 3 6 2 1 1 1 1 1 1 2 14 3 9 2 2 2 2 2 2 2 1839 1838 304 305 38 1 2 1 5 5 5 5 2 2 1 1 6 6 5 5 4 20 20 19 19 6 24 23 6 2 11 4 9 5 1 6 5 5 30 30 10 8 8 4 4 7 7 7 1 27 8 20 6 5 1 1 15 6 5 1 1 26 2 1 22 5 2 3 5 6 3 3 12 2 31 31 14 9 1 1 5 1 4 10 3 8 4 2 2 3 3 3 3 3 3 3 5 5 5 5 2 80 80 80 80 80 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 // SPDX-License-Identifier: GPL-2.0-or-later /* * IP multicast routing support for mrouted 3.6/3.8 * * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> * Linux Consultancy and Custom Driver Development * * Fixes: * Michael Chastain : Incorrect size of copying. * Alan Cox : Added the cache manager code * Alan Cox : Fixed the clone/copy bug and device race. * Mike McLagan : Routing by source * Malcolm Beattie : Buffer handling fixes. * Alexey Kuznetsov : Double buffer free and other fixes. * SVR Anand : Fixed several multicast bugs and problems. * Alexey Kuznetsov : Status, optimisations and more. * Brad Parker : Better behaviour on mrouted upcall * overflow. * Carlos Picoto : PIMv1 Support * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header * Relax this requirement to work with older peers. */ #include <linux/uaccess.h> #include <linux/types.h> #include <linux/cache.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/mroute.h> #include <linux/init.h> #include <linux/if_ether.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/route.h> #include <net/icmp.h> #include <net/udp.h> #include <net/raw.h> #include <linux/notifier.h> #include <linux/if_arp.h> #include <linux/netfilter_ipv4.h> #include <linux/compat.h> #include <linux/export.h> #include <linux/rhashtable.h> #include <net/ip_tunnels.h> #include <net/checksum.h> #include <net/netlink.h> #include <net/fib_rules.h> #include <linux/netconf.h> #include <net/rtnh.h> #include <linux/nospec.h> struct ipmr_rule { struct fib_rule common; }; struct ipmr_result { struct mr_table *mrt; }; /* Big lock, protecting vif table, mrt cache and mroute socket state. * Note that the changes are semaphored via rtnl_lock. */ static DEFINE_SPINLOCK(mrt_lock); static struct net_device *vif_dev_read(const struct vif_device *vif) { return rcu_dereference(vif->dev); } /* Multicast router control variables */ /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); /* We return to original Alan's scheme. Hash table of resolved * entries is changed only in process context and protected * with weak lock mrt_lock. Queue of unresolved entries is protected * with strong spinlock mfc_unres_lock. * * In this case data path is free of exclusive locks at all. */ static struct kmem_cache *mrt_cachep __ro_after_init; static struct mr_table *ipmr_new_table(struct net *net, u32 id); static void ipmr_free_table(struct mr_table *mrt); static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc_cache *cache, int local); static int ipmr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); static void mroute_clean_tables(struct mr_table *mrt, int flags); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES #define ipmr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ lockdep_rtnl_is_held() || \ list_empty(&net->ipv4.mr_tables)) static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { struct mr_table *ret; if (!mrt) ret = list_entry_rcu(net->ipv4.mr_tables.next, struct mr_table, list); else ret = list_entry_rcu(mrt->list.next, struct mr_table, list); if (&ret->list == &net->ipv4.mr_tables) return NULL; return ret; } static struct mr_table *ipmr_get_table(struct net *net, u32 id) { struct mr_table *mrt; ipmr_for_each_table(mrt, net) { if (mrt->id == id) return mrt; } return NULL; } static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { int err; struct ipmr_result res; struct fib_lookup_arg arg = { .result = &res, .flags = FIB_LOOKUP_NOREF, }; /* update flow if oif or iif point to device enslaved to l3mdev */ l3mdev_update_flow(net, flowi4_to_flowi(flp4)); err = fib_rules_lookup(net->ipv4.mr_rules_ops, flowi4_to_flowi(flp4), 0, &arg); if (err < 0) return err; *mrt = res.mrt; return 0; } static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { struct ipmr_result *res = arg->result; struct mr_table *mrt; switch (rule->action) { case FR_ACT_TO_TBL: break; case FR_ACT_UNREACHABLE: return -ENETUNREACH; case FR_ACT_PROHIBIT: return -EACCES; case FR_ACT_BLACKHOLE: default: return -EINVAL; } arg->table = fib_rule_get_table(rule, arg); mrt = ipmr_get_table(rule->fr_net, arg->table); if (!mrt) return -EAGAIN; res->mrt = mrt; return 0; } static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { return 1; } static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, struct netlink_ext_ack *extack) { return 0; } static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, struct nlattr **tb) { return 1; } static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh) { frh->dst_len = 0; frh->src_len = 0; frh->tos = 0; return 0; } static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { .family = RTNL_FAMILY_IPMR, .rule_size = sizeof(struct ipmr_rule), .addr_size = sizeof(u32), .action = ipmr_rule_action, .match = ipmr_rule_match, .configure = ipmr_rule_configure, .compare = ipmr_rule_compare, .fill = ipmr_rule_fill, .nlgroup = RTNLGRP_IPV4_RULE, .owner = THIS_MODULE, }; static int __net_init ipmr_rules_init(struct net *net) { struct fib_rules_ops *ops; struct mr_table *mrt; int err; ops = fib_rules_register(&ipmr_rules_ops_template, net); if (IS_ERR(ops)) return PTR_ERR(ops); INIT_LIST_HEAD(&net->ipv4.mr_tables); mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); if (IS_ERR(mrt)) { err = PTR_ERR(mrt); goto err1; } err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT); if (err < 0) goto err2; net->ipv4.mr_rules_ops = ops; return 0; err2: rtnl_lock(); ipmr_free_table(mrt); rtnl_unlock(); err1: fib_rules_unregister(ops); return err; } static void __net_exit ipmr_rules_exit(struct net *net) { struct mr_table *mrt, *next; ASSERT_RTNL(); list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); ipmr_free_table(mrt); } fib_rules_unregister(net->ipv4.mr_rules_ops); } static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack); } static unsigned int ipmr_rules_seq_read(struct net *net) { return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); } bool ipmr_rule_default(const struct fib_rule *rule) { return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; } EXPORT_SYMBOL(ipmr_rule_default); #else #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { if (!mrt) return net->ipv4.mrt; return NULL; } static struct mr_table *ipmr_get_table(struct net *net, u32 id) { return net->ipv4.mrt; } static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { *mrt = net->ipv4.mrt; return 0; } static int __net_init ipmr_rules_init(struct net *net) { struct mr_table *mrt; mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); if (IS_ERR(mrt)) return PTR_ERR(mrt); net->ipv4.mrt = mrt; return 0; } static void __net_exit ipmr_rules_exit(struct net *net) { ASSERT_RTNL(); ipmr_free_table(net->ipv4.mrt); net->ipv4.mrt = NULL; } static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return 0; } static unsigned int ipmr_rules_seq_read(struct net *net) { return 0; } bool ipmr_rule_default(const struct fib_rule *rule) { return true; } EXPORT_SYMBOL(ipmr_rule_default); #endif static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct mfc_cache_cmp_arg *cmparg = arg->key; const struct mfc_cache *c = ptr; return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || cmparg->mfc_origin != c->mfc_origin; } static const struct rhashtable_params ipmr_rht_params = { .head_offset = offsetof(struct mr_mfc, mnode), .key_offset = offsetof(struct mfc_cache, cmparg), .key_len = sizeof(struct mfc_cache_cmp_arg), .nelem_hint = 3, .obj_cmpfn = ipmr_hash_cmp, .automatic_shrinking = true, }; static void ipmr_new_table_set(struct mr_table *mrt, struct net *net) { #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); #endif } static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = { .mfc_mcastgrp = htonl(INADDR_ANY), .mfc_origin = htonl(INADDR_ANY), }; static struct mr_table_ops ipmr_mr_table_ops = { .rht_params = &ipmr_rht_params, .cmparg_any = &ipmr_mr_table_ops_cmparg_any, }; static struct mr_table *ipmr_new_table(struct net *net, u32 id) { struct mr_table *mrt; /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ if (id != RT_TABLE_DEFAULT && id >= 1000000000) return ERR_PTR(-EINVAL); mrt = ipmr_get_table(net, id); if (mrt) return mrt; return mr_table_alloc(net, id, &ipmr_mr_table_ops, ipmr_expire_process, ipmr_new_table_set); } static void ipmr_free_table(struct mr_table *mrt) { timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC); rhltable_destroy(&mrt->mfc_hash); kfree(mrt); } /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ /* Initialize ipmr pimreg/tunnel in_device */ static bool ipmr_init_vif_indev(const struct net_device *dev) { struct in_device *in_dev; ASSERT_RTNL(); in_dev = __in_dev_get_rtnl(dev); if (!in_dev) return false; ipv4_devconf_setall(in_dev); neigh_parms_data_state_setall(in_dev->arp_parms); IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; return true; } static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { struct net_device *tunnel_dev, *new_dev; struct ip_tunnel_parm_kern p = { }; int err; tunnel_dev = __dev_get_by_name(net, "tunl0"); if (!tunnel_dev) goto out; p.iph.daddr = v->vifc_rmt_addr.s_addr; p.iph.saddr = v->vifc_lcl_addr.s_addr; p.iph.version = 4; p.iph.ihl = 5; p.iph.protocol = IPPROTO_IPIP; sprintf(p.name, "dvmrp%d", v->vifc_vifi); if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl) goto out; err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, SIOCADDTUNNEL); if (err) goto out; new_dev = __dev_get_by_name(net, p.name); if (!new_dev) goto out; new_dev->flags |= IFF_MULTICAST; if (!ipmr_init_vif_indev(new_dev)) goto out_unregister; if (dev_open(new_dev, NULL)) goto out_unregister; dev_hold(new_dev); err = dev_set_allmulti(new_dev, 1); if (err) { dev_close(new_dev); tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, SIOCDELTUNNEL); dev_put(new_dev); new_dev = ERR_PTR(err); } return new_dev; out_unregister: unregister_netdevice(new_dev); out: return ERR_PTR(-ENOBUFS); } #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); struct mr_table *mrt; struct flowi4 fl4 = { .flowi4_oif = dev->ifindex, .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi4_mark = skb->mark, }; int err; err = ipmr_fib_lookup(net, &fl4, &mrt); if (err < 0) { kfree_skb(skb); return err; } DEV_STATS_ADD(dev, tx_bytes, skb->len); DEV_STATS_INC(dev, tx_packets); rcu_read_lock(); /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), IGMPMSG_WHOLEPKT); rcu_read_unlock(); kfree_skb(skb); return NETDEV_TX_OK; } static int reg_vif_get_iflink(const struct net_device *dev) { return 0; } static const struct net_device_ops reg_vif_netdev_ops = { .ndo_start_xmit = reg_vif_xmit, .ndo_get_iflink = reg_vif_get_iflink, }; static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; dev->netdev_ops = &reg_vif_netdev_ops; dev->needs_free_netdev = true; dev->features |= NETIF_F_NETNS_LOCAL; } static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) { struct net_device *dev; char name[IFNAMSIZ]; if (mrt->id == RT_TABLE_DEFAULT) sprintf(name, "pimreg"); else sprintf(name, "pimreg%u", mrt->id); dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); if (!dev) return NULL; dev_net_set(dev, net); if (register_netdevice(dev)) { free_netdev(dev); return NULL; } if (!ipmr_init_vif_indev(dev)) goto failure; if (dev_open(dev, NULL)) goto failure; dev_hold(dev); return dev; failure: unregister_netdevice(dev); return NULL; } /* called with rcu_read_lock() */ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, unsigned int pimlen) { struct net_device *reg_dev = NULL; struct iphdr *encap; int vif_num; encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); /* Check that: * a. packet is really sent to a multicast group * b. packet is not a NULL-REGISTER * c. packet is not truncated */ if (!ipv4_is_multicast(encap->daddr) || encap->tot_len == 0 || ntohs(encap->tot_len) + pimlen > skb->len) return 1; /* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */ vif_num = READ_ONCE(mrt->mroute_reg_vif_num); if (vif_num >= 0) reg_dev = vif_dev_read(&mrt->vif_table[vif_num]); if (!reg_dev) return 1; skb->mac_header = skb->network_header; skb_pull(skb, (u8 *)encap - skb->data); skb_reset_network_header(skb); skb->protocol = htons(ETH_P_IP); skb->ip_summed = CHECKSUM_NONE; skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); netif_rx(skb); return NET_RX_SUCCESS; } #else static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) { return NULL; } #endif static int call_ipmr_vif_entry_notifiers(struct net *net, enum fib_event_type event_type, struct vif_device *vif, struct net_device *vif_dev, vifi_t vif_index, u32 tb_id) { return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, vif, vif_dev, vif_index, tb_id, &net->ipv4.ipmr_seq); } static int call_ipmr_mfc_entry_notifiers(struct net *net, enum fib_event_type event_type, struct mfc_cache *mfc, u32 tb_id) { return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type, &mfc->_c, tb_id, &net->ipv4.ipmr_seq); } /** * vif_delete - Delete a VIF entry * @mrt: Table to delete from * @vifi: VIF identifier to delete * @notify: Set to 1, if the caller is a notifier_call * @head: if unregistering the VIF, place it on this queue */ static int vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { struct net *net = read_pnet(&mrt->net); struct vif_device *v; struct net_device *dev; struct in_device *in_dev; if (vifi < 0 || vifi >= mrt->maxvif) return -EADDRNOTAVAIL; v = &mrt->vif_table[vifi]; dev = rtnl_dereference(v->dev); if (!dev) return -EADDRNOTAVAIL; spin_lock(&mrt_lock); call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev, vifi, mrt->id); RCU_INIT_POINTER(v->dev, NULL); if (vifi == mrt->mroute_reg_vif_num) { /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ WRITE_ONCE(mrt->mroute_reg_vif_num, -1); } if (vifi + 1 == mrt->maxvif) { int tmp; for (tmp = vifi - 1; tmp >= 0; tmp--) { if (VIF_EXISTS(mrt, tmp)) break; } WRITE_ONCE(mrt->maxvif, tmp + 1); } spin_unlock(&mrt_lock); dev_set_allmulti(dev, -1); in_dev = __in_dev_get_rtnl(dev); if (in_dev) { IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in_dev->cnf); ip_rt_multicast_event(in_dev); } if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); netdev_put(dev, &v->dev_tracker); return 0; } static void ipmr_cache_free_rcu(struct rcu_head *head) { struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); kmem_cache_free(mrt_cachep, (struct mfc_cache *)c); } static void ipmr_cache_free(struct mfc_cache *c) { call_rcu(&c->_c.rcu, ipmr_cache_free_rcu); } /* Destroy an unresolved cache entry, killing queued skbs * and reporting error to netlink readers. */ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) { struct net *net = read_pnet(&mrt->net); struct sk_buff *skb; struct nlmsgerr *e; atomic_dec(&mrt->cache_resolve_queue_len); while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct iphdr)); nlh->nlmsg_type = NLMSG_ERROR; nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); e = nlmsg_data(nlh); e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { kfree_skb(skb); } } ipmr_cache_free(c); } /* Timer process for the unresolved queue. */ static void ipmr_expire_process(struct timer_list *t) { struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); struct mr_mfc *c, *next; unsigned long expires; unsigned long now; if (!spin_trylock(&mfc_unres_lock)) { mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); return; } if (list_empty(&mrt->mfc_unres_queue)) goto out; now = jiffies; expires = 10*HZ; list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { if (time_after(c->mfc_un.unres.expires, now)) { unsigned long interval = c->mfc_un.unres.expires - now; if (interval < expires) expires = interval; continue; } list_del(&c->list); mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE); ipmr_destroy_unres(mrt, (struct mfc_cache *)c); } if (!list_empty(&mrt->mfc_unres_queue)) mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); out: spin_unlock(&mfc_unres_lock); } /* Fill oifs list. It is called under locked mrt_lock. */ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, unsigned char *ttls) { int vifi; cache->mfc_un.res.minvif = MAXVIFS; cache->mfc_un.res.maxvif = 0; memset(cache->mfc_un.res.ttls, 255, MAXVIFS); for (vifi = 0; vifi < mrt->maxvif; vifi++) { if (VIF_EXISTS(mrt, vifi) && ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) cache->mfc_un.res.minvif = vifi; if (cache->mfc_un.res.maxvif <= vifi) cache->mfc_un.res.maxvif = vifi + 1; } } cache->mfc_un.res.lastuse = jiffies; } static int vif_add(struct net *net, struct mr_table *mrt, struct vifctl *vifc, int mrtsock) { struct netdev_phys_item_id ppid = { }; int vifi = vifc->vifc_vifi; struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; int err; /* Is vif busy ? */ if (VIF_EXISTS(mrt, vifi)) return -EADDRINUSE; switch (vifc->vifc_flags) { case VIFF_REGISTER: if (!ipmr_pimsm_enabled()) return -EINVAL; /* Special Purpose VIF in PIM * All the packets will be sent to the daemon */ if (mrt->mroute_reg_vif_num >= 0) return -EADDRINUSE; dev = ipmr_reg_vif(net, mrt); if (!dev) return -ENOBUFS; err = dev_set_allmulti(dev, 1); if (err) { unregister_netdevice(dev); dev_put(dev); return err; } break; case VIFF_TUNNEL: dev = ipmr_new_tunnel(net, vifc); if (IS_ERR(dev)) return PTR_ERR(dev); break; case VIFF_USE_IFINDEX: case 0: if (vifc->vifc_flags == VIFF_USE_IFINDEX) { dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); if (dev && !__in_dev_get_rtnl(dev)) { dev_put(dev); return -EADDRNOTAVAIL; } } else { dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); } if (!dev) return -EADDRNOTAVAIL; err = dev_set_allmulti(dev, 1); if (err) { dev_put(dev); return err; } break; default: return -EINVAL; } in_dev = __in_dev_get_rtnl(dev); if (!in_dev) { dev_put(dev); return -EADDRNOTAVAIL; } IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in_dev->cnf); ip_rt_multicast_event(in_dev); /* Fill in the VIF structures */ vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), (VIFF_TUNNEL | VIFF_REGISTER)); err = dev_get_port_parent_id(dev, &ppid, true); if (err == 0) { memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len); v->dev_parent_id.id_len = ppid.id_len; } else { v->dev_parent_id.id_len = 0; } v->local = vifc->vifc_lcl_addr.s_addr; v->remote = vifc->vifc_rmt_addr.s_addr; /* And finish update writing critical data */ spin_lock(&mrt_lock); rcu_assign_pointer(v->dev, dev); netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); if (v->flags & VIFF_REGISTER) { /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); } if (vifi+1 > mrt->maxvif) WRITE_ONCE(mrt->maxvif, vifi + 1); spin_unlock(&mrt_lock); call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, vifi, mrt->id); return 0; } /* called with rcu_read_lock() */ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, __be32 origin, __be32 mcastgrp) { struct mfc_cache_cmp_arg arg = { .mfc_mcastgrp = mcastgrp, .mfc_origin = origin }; return mr_mfc_find(mrt, &arg); } /* Look for a (*,G) entry */ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, __be32 mcastgrp, int vifi) { struct mfc_cache_cmp_arg arg = { .mfc_mcastgrp = mcastgrp, .mfc_origin = htonl(INADDR_ANY) }; if (mcastgrp == htonl(INADDR_ANY)) return mr_mfc_find_any_parent(mrt, vifi); return mr_mfc_find_any(mrt, vifi, &arg); } /* Look for a (S,G,iif) entry if parent != -1 */ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, __be32 origin, __be32 mcastgrp, int parent) { struct mfc_cache_cmp_arg arg = { .mfc_mcastgrp = mcastgrp, .mfc_origin = origin, }; return mr_mfc_find_parent(mrt, &arg, parent); } /* Allocate a multicast cache entry */ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (c) { c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->_c.mfc_un.res.minvif = MAXVIFS; c->_c.free = ipmr_cache_free_rcu; refcount_set(&c->_c.mfc_un.res.refcount, 1); } return c; } static struct mfc_cache *ipmr_cache_alloc_unres(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (c) { skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; } return c; } /* A cache entry has gone into a resolved state from queued */ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct mfc_cache *uc, struct mfc_cache *c) { struct sk_buff *skb; struct nlmsgerr *e; /* Play the pending entries through our router */ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct iphdr)); if (mr_fill_mroute(mrt, skb, &c->_c, nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; } else { nlh->nlmsg_type = NLMSG_ERROR; nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); e = nlmsg_data(nlh); e->error = -EMSGSIZE; memset(&e->msg, 0, sizeof(e->msg)); } rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { rcu_read_lock(); ip_mr_forward(net, mrt, skb->dev, skb, c, 0); rcu_read_unlock(); } } } /* Bounce a cache query up to mrouted and netlink. * * Called under rcu_read_lock(). */ static int ipmr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert) { const int ihl = ip_hdrlen(pkt); struct sock *mroute_sk; struct igmphdr *igmp; struct igmpmsg *msg; struct sk_buff *skb; int ret; mroute_sk = rcu_dereference(mrt->mroute_sk); if (!mroute_sk) return -EINVAL; if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); else skb = alloc_skb(128, GFP_ATOMIC); if (!skb) return -ENOBUFS; if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) { /* Ugly, but we have no choice with this interface. * Duplicate old header, fix ihl, length etc. * And all this only to mangle msg->im_msgtype and * to set msg->im_mbz to "mbz" :-) */ skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); skb_reset_transport_header(skb); msg = (struct igmpmsg *)skb_network_header(skb); memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); msg->im_msgtype = assert; msg->im_mbz = 0; if (assert == IGMPMSG_WRVIFWHOLE) { msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; } else { /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ int vif_num = READ_ONCE(mrt->mroute_reg_vif_num); msg->im_vif = vif_num; msg->im_vif_hi = vif_num >> 8; } ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + sizeof(struct iphdr)); } else { /* Copy the IP header */ skb_set_network_header(skb, skb->len); skb_put(skb, ihl); skb_copy_to_linear_data(skb, pkt->data, ihl); /* Flag to the kernel this is a route add */ ip_hdr(skb)->protocol = 0; msg = (struct igmpmsg *)skb_network_header(skb); msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); /* Add our header */ igmp = skb_put(skb, sizeof(struct igmphdr)); igmp->type = assert; msg->im_msgtype = assert; igmp->code = 0; ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ skb->transport_header = skb->network_header; } igmpmsg_netlink_event(mrt, skb); /* Deliver to mrouted */ ret = sock_queue_rcv_skb(mroute_sk, skb); if (ret < 0) { net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); kfree_skb(skb); } return ret; } /* Queue a packet for resolution. It gets locked cache entry! */ /* Called under rcu_read_lock() */ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb, struct net_device *dev) { const struct iphdr *iph = ip_hdr(skb); struct mfc_cache *c; bool found = false; int err; spin_lock_bh(&mfc_unres_lock); list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { if (c->mfc_mcastgrp == iph->daddr && c->mfc_origin == iph->saddr) { found = true; break; } } if (!found) { /* Create a new entry if allowable */ c = ipmr_cache_alloc_unres(); if (!c) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); return -ENOBUFS; } /* Fill in the new cache entry */ c->_c.mfc_parent = -1; c->mfc_origin = iph->saddr; c->mfc_mcastgrp = iph->daddr; /* Reflect first query at mrouted. */ err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); if (err < 0) { /* If the report failed throw the cache entry out - Brad Parker */ spin_unlock_bh(&mfc_unres_lock); ipmr_cache_free(c); kfree_skb(skb); return err; } atomic_inc(&mrt->cache_resolve_queue_len); list_add(&c->_c.list, &mrt->mfc_unres_queue); mroute_netlink_event(mrt, c, RTM_NEWROUTE); if (atomic_read(&mrt->cache_resolve_queue_len) == 1) mod_timer(&mrt->ipmr_expire_timer, c->_c.mfc_un.unres.expires); } /* See if we can append the packet */ if (c->_c.mfc_un.unres.unresolved.qlen > 3) { kfree_skb(skb); err = -ENOBUFS; } else { if (dev) { skb->dev = dev; skb->skb_iif = dev->ifindex; } skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); err = 0; } spin_unlock_bh(&mfc_unres_lock); return err; } /* MFC cache manipulation by user space mroute daemon */ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { struct net *net = read_pnet(&mrt->net); struct mfc_cache *c; /* The entries are added/deleted only under RTNL */ rcu_read_lock(); c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, mfc->mfcc_mcastgrp.s_addr, parent); rcu_read_unlock(); if (!c) return -ENOENT; rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params); list_del_rcu(&c->_c.list); call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); mroute_netlink_event(mrt, c, RTM_DELROUTE); mr_cache_put(&c->_c); return 0; } static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, struct mfcctl *mfc, int mrtsock, int parent) { struct mfc_cache *uc, *c; struct mr_mfc *_uc; bool found; int ret; if (mfc->mfcc_parent >= MAXVIFS) return -ENFILE; /* The entries are added/deleted only under RTNL */ rcu_read_lock(); c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, mfc->mfcc_mcastgrp.s_addr, parent); rcu_read_unlock(); if (c) { spin_lock(&mrt_lock); c->_c.mfc_parent = mfc->mfcc_parent; ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); if (!mrtsock) c->_c.mfc_flags |= MFC_STATIC; spin_unlock(&mrt_lock); call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, mrt->id); mroute_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) && !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; c = ipmr_cache_alloc(); if (!c) return -ENOMEM; c->mfc_origin = mfc->mfcc_origin.s_addr; c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; c->_c.mfc_parent = mfc->mfcc_parent; ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); if (!mrtsock) c->_c.mfc_flags |= MFC_STATIC; ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, ipmr_rht_params); if (ret) { pr_err("ipmr: rhtable insert error %d\n", ret); ipmr_cache_free(c); return ret; } list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); /* Check to see if we resolved a queued list. If so we * need to send on the frames and tidy up. */ found = false; spin_lock_bh(&mfc_unres_lock); list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { uc = (struct mfc_cache *)_uc; if (uc->mfc_origin == c->mfc_origin && uc->mfc_mcastgrp == c->mfc_mcastgrp) { list_del(&_uc->list); atomic_dec(&mrt->cache_resolve_queue_len); found = true; break; } } if (list_empty(&mrt->mfc_unres_queue)) del_timer(&mrt->ipmr_expire_timer); spin_unlock_bh(&mfc_unres_lock); if (found) { ipmr_cache_resolve(net, mrt, uc, c); ipmr_cache_free(uc); } call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); mroute_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } /* Close the multicast socket, and clear the vif tables etc */ static void mroute_clean_tables(struct mr_table *mrt, int flags) { struct net *net = read_pnet(&mrt->net); struct mr_mfc *c, *tmp; struct mfc_cache *cache; LIST_HEAD(list); int i; /* Shut down all active vif entries */ if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) { for (i = 0; i < mrt->maxvif; i++) { if (((mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS_STATIC)) || (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS))) continue; vif_delete(mrt, i, 0, &list); } unregister_netdevice_many(&list); } /* Wipe the cache */ if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) { list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) || (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC))) continue; rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); cache = (struct mfc_cache *)c; call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, mrt->id); mroute_netlink_event(mrt, cache, RTM_DELROUTE); mr_cache_put(c); } } if (flags & MRT_FLUSH_MFC) { if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { spin_lock_bh(&mfc_unres_lock); list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); cache = (struct mfc_cache *)c; mroute_netlink_event(mrt, cache, RTM_DELROUTE); ipmr_destroy_unres(mrt, cache); } spin_unlock_bh(&mfc_unres_lock); } } } /* called from ip_ra_control(), before an RCU grace period, * we don't need to call synchronize_rcu() here */ static void mrtsock_destruct(struct sock *sk) { struct net *net = sock_net(sk); struct mr_table *mrt; rtnl_lock(); ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC); } } rtnl_unlock(); } /* Socket options and virtual interface manipulation. The whole * virtual interface system is a complete heap, but unfortunately * that's how BSD mrouted happens to think. Maybe one day with a proper * MOSPF/PIM router set up we can clean this up. */ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { struct net *net = sock_net(sk); int val, ret = 0, parent = 0; struct mr_table *mrt; struct vifctl vif; struct mfcctl mfc; bool do_wrvifwhole; u32 uval; /* There's one exception to the lock - MRT_DONE which needs to unlock */ rtnl_lock(); if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_IGMP) { ret = -EOPNOTSUPP; goto out_unlock; } mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); if (!mrt) { ret = -ENOENT; goto out_unlock; } if (optname != MRT_INIT) { if (sk != rcu_access_pointer(mrt->mroute_sk) && !ns_capable(net->user_ns, CAP_NET_ADMIN)) { ret = -EACCES; goto out_unlock; } } switch (optname) { case MRT_INIT: if (optlen != sizeof(int)) { ret = -EINVAL; break; } if (rtnl_dereference(mrt->mroute_sk)) { ret = -EADDRINUSE; break; } ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { rcu_assign_pointer(mrt->mroute_sk, sk); IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); } break; case MRT_DONE: if (sk != rcu_access_pointer(mrt->mroute_sk)) { ret = -EACCES; } else { /* We need to unlock here because mrtsock_destruct takes * care of rtnl itself and we can't change that due to * the IP_ROUTER_ALERT setsockopt which runs without it. */ rtnl_unlock(); ret = ip_ra_control(sk, 0, NULL); goto out; } break; case MRT_ADD_VIF: case MRT_DEL_VIF: if (optlen != sizeof(vif)) { ret = -EINVAL; break; } if (copy_from_sockptr(&vif, optval, sizeof(vif))) { ret = -EFAULT; break; } if (vif.vifc_vifi >= MAXVIFS) { ret = -ENFILE; break; } if (optname == MRT_ADD_VIF) { ret = vif_add(net, mrt, &vif, sk == rtnl_dereference(mrt->mroute_sk)); } else { ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); } break; /* Manipulate the forwarding caches. These live * in a sort of kernel/user symbiosis. */ case MRT_ADD_MFC: case MRT_DEL_MFC: parent = -1; fallthrough; case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) { ret = -EINVAL; break; } if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) { ret = -EFAULT; break; } if (parent == 0) parent = mfc.mfcc_parent; if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, sk == rtnl_dereference(mrt->mroute_sk), parent); break; case MRT_FLUSH: if (optlen != sizeof(val)) { ret = -EINVAL; break; } if (copy_from_sockptr(&val, optval, sizeof(val))) { ret = -EFAULT; break; } mroute_clean_tables(mrt, val); break; /* Control PIM assert. */ case MRT_ASSERT: if (optlen != sizeof(val)) { ret = -EINVAL; break; } if (copy_from_sockptr(&val, optval, sizeof(val))) { ret = -EFAULT; break; } mrt->mroute_do_assert = val; break; case MRT_PIM: if (!ipmr_pimsm_enabled()) { ret = -ENOPROTOOPT; break; } if (optlen != sizeof(val)) { ret = -EINVAL; break; } if (copy_from_sockptr(&val, optval, sizeof(val))) { ret = -EFAULT; break; } do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE); val = !!val; if (val != mrt->mroute_do_pim) { mrt->mroute_do_pim = val; mrt->mroute_do_assert = val; mrt->mroute_do_wrvifwhole = do_wrvifwhole; } break; case MRT_TABLE: if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { ret = -ENOPROTOOPT; break; } if (optlen != sizeof(uval)) { ret = -EINVAL; break; } if (copy_from_sockptr(&uval, optval, sizeof(uval))) { ret = -EFAULT; break; } if (sk == rtnl_dereference(mrt->mroute_sk)) { ret = -EBUSY; } else { mrt = ipmr_new_table(net, uval); if (IS_ERR(mrt)) ret = PTR_ERR(mrt); else raw_sk(sk)->ipmr_table = uval; } break; /* Spurious command, or MRT_VERSION which you cannot set. */ default: ret = -ENOPROTOOPT; } out_unlock: rtnl_unlock(); out: return ret; } /* Execute if this ioctl is a special mroute ioctl */ int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) { switch (cmd) { /* These userspace buffers will be consumed by ipmr_ioctl() */ case SIOCGETVIFCNT: { struct sioc_vif_req buffer; return sock_ioctl_inout(sk, cmd, arg, &buffer, sizeof(buffer)); } case SIOCGETSGCNT: { struct sioc_sg_req buffer; return sock_ioctl_inout(sk, cmd, arg, &buffer, sizeof(buffer)); } } /* return code > 0 means that the ioctl was not executed */ return 1; } /* Getsock opt support for the multicast routing system. */ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, sockptr_t optlen) { int olr; int val; struct net *net = sock_net(sk); struct mr_table *mrt; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_IGMP) return -EOPNOTSUPP; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); if (!mrt) return -ENOENT; switch (optname) { case MRT_VERSION: val = 0x0305; break; case MRT_PIM: if (!ipmr_pimsm_enabled()) return -ENOPROTOOPT; val = mrt->mroute_do_pim; break; case MRT_ASSERT: val = mrt->mroute_do_assert; break; default: return -ENOPROTOOPT; } if (copy_from_sockptr(&olr, optlen, sizeof(int))) return -EFAULT; if (olr < 0) return -EINVAL; olr = min_t(unsigned int, olr, sizeof(int)); if (copy_to_sockptr(optlen, &olr, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &val, olr)) return -EFAULT; return 0; } /* The IP multicast ioctl support routines. */ int ipmr_ioctl(struct sock *sk, int cmd, void *arg) { struct vif_device *vif; struct mfc_cache *c; struct net *net = sock_net(sk); struct sioc_vif_req *vr; struct sioc_sg_req *sr; struct mr_table *mrt; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); if (!mrt) return -ENOENT; switch (cmd) { case SIOCGETVIFCNT: vr = (struct sioc_vif_req *)arg; if (vr->vifi >= mrt->maxvif) return -EINVAL; vr->vifi = array_index_nospec(vr->vifi, mrt->maxvif); rcu_read_lock(); vif = &mrt->vif_table[vr->vifi]; if (VIF_EXISTS(mrt, vr->vifi)) { vr->icount = READ_ONCE(vif->pkt_in); vr->ocount = READ_ONCE(vif->pkt_out); vr->ibytes = READ_ONCE(vif->bytes_in); vr->obytes = READ_ONCE(vif->bytes_out); rcu_read_unlock(); return 0; } rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT: sr = (struct sioc_sg_req *)arg; rcu_read_lock(); c = ipmr_cache_find(mrt, sr->src.s_addr, sr->grp.s_addr); if (c) { sr->pktcnt = c->_c.mfc_un.res.pkt; sr->bytecnt = c->_c.mfc_un.res.bytes; sr->wrong_if = c->_c.mfc_un.res.wrong_if; rcu_read_unlock(); return 0; } rcu_read_unlock(); return -EADDRNOTAVAIL; default: return -ENOIOCTLCMD; } } #ifdef CONFIG_COMPAT struct compat_sioc_sg_req { struct in_addr src; struct in_addr grp; compat_ulong_t pktcnt; compat_ulong_t bytecnt; compat_ulong_t wrong_if; }; struct compat_sioc_vif_req { vifi_t vifi; /* Which iface */ compat_ulong_t icount; compat_ulong_t ocount; compat_ulong_t ibytes; compat_ulong_t obytes; }; int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) { struct compat_sioc_sg_req sr; struct compat_sioc_vif_req vr; struct vif_device *vif; struct mfc_cache *c; struct net *net = sock_net(sk); struct mr_table *mrt; mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); if (!mrt) return -ENOENT; switch (cmd) { case SIOCGETVIFCNT: if (copy_from_user(&vr, arg, sizeof(vr))) return -EFAULT; if (vr.vifi >= mrt->maxvif) return -EINVAL; vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); rcu_read_lock(); vif = &mrt->vif_table[vr.vifi]; if (VIF_EXISTS(mrt, vr.vifi)) { vr.icount = READ_ONCE(vif->pkt_in); vr.ocount = READ_ONCE(vif->pkt_out); vr.ibytes = READ_ONCE(vif->bytes_in); vr.obytes = READ_ONCE(vif->bytes_out); rcu_read_unlock(); if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT: if (copy_from_user(&sr, arg, sizeof(sr))) return -EFAULT; rcu_read_lock(); c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); if (c) { sr.pktcnt = c->_c.mfc_un.res.pkt; sr.bytecnt = c->_c.mfc_un.res.bytes; sr.wrong_if = c->_c.mfc_un.res.wrong_if; rcu_read_unlock(); if (copy_to_user(arg, &sr, sizeof(sr))) return -EFAULT; return 0; } rcu_read_unlock(); return -EADDRNOTAVAIL; default: return -ENOIOCTLCMD; } } #endif static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr_table *mrt; struct vif_device *v; int ct; if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; ipmr_for_each_table(mrt, net) { v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { if (rcu_access_pointer(v->dev) == dev) vif_delete(mrt, ct, 1, NULL); } } return NOTIFY_DONE; } static struct notifier_block ip_mr_notifier = { .notifier_call = ipmr_device_event, }; /* Encapsulate a packet by attaching a valid IPIP header to it. * This avoids tunnel drivers and other mess and gives us the speed so * important for multicast video. */ static void ip_encap(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct iphdr *iph; const struct iphdr *old_iph = ip_hdr(skb); skb_push(skb, sizeof(struct iphdr)); skb->transport_header = skb->network_header; skb_reset_network_header(skb); iph = ip_hdr(skb); iph->version = 4; iph->tos = old_iph->tos; iph->ttl = old_iph->ttl; iph->frag_off = 0; iph->daddr = daddr; iph->saddr = saddr; iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); ip_select_ident(net, skb, NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); nf_reset_ct(skb); } static inline int ipmr_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); if (unlikely(opt->optlen)) ip_forward_options(skb); return dst_output(net, sk, skb); } #ifdef CONFIG_NET_SWITCHDEV static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, int in_vifi, int out_vifi) { struct vif_device *out_vif = &mrt->vif_table[out_vifi]; struct vif_device *in_vif = &mrt->vif_table[in_vifi]; if (!skb->offload_l3_fwd_mark) return false; if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) return false; return netdev_phys_item_id_same(&out_vif->dev_parent_id, &in_vif->dev_parent_id); } #else static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, int in_vifi, int out_vifi) { return false; } #endif /* Processing handlers for ipmr_forward, under rcu_read_lock() */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, int in_vifi, struct sk_buff *skb, int vifi) { const struct iphdr *iph = ip_hdr(skb); struct vif_device *vif = &mrt->vif_table[vifi]; struct net_device *vif_dev; struct net_device *dev; struct rtable *rt; struct flowi4 fl4; int encap = 0; vif_dev = vif_dev_read(vif); if (!vif_dev) goto out_free; if (vif->flags & VIFF_REGISTER) { WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); DEV_STATS_INC(vif_dev, tx_packets); ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); goto out_free; } if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) goto out_free; if (vif->flags & VIFF_TUNNEL) { rt = ip_route_output_ports(net, &fl4, NULL, vif->remote, vif->local, 0, 0, IPPROTO_IPIP, RT_TOS(iph->tos), vif->link); if (IS_ERR(rt)) goto out_free; encap = sizeof(struct iphdr); } else { rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, 0, 0, IPPROTO_IPIP, RT_TOS(iph->tos), vif->link); if (IS_ERR(rt)) goto out_free; } dev = rt->dst.dev; if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { /* Do not fragment multicasts. Alas, IPv4 does not * allow to send ICMP, so that packets will disappear * to blackhole. */ IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; } encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; if (skb_cow(skb, encap)) { ip_rt_put(rt); goto out_free; } WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); ip_decrease_ttl(ip_hdr(skb)); /* FIXME: forward and output firewalls used to be called here. * What do we do with netfilter? -- RR */ if (vif->flags & VIFF_TUNNEL) { ip_encap(net, skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ DEV_STATS_INC(vif_dev, tx_packets); DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); } IPCB(skb)->flags |= IPSKB_FORWARDED; /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output * interfaces. It is clear, if mrouter runs a multicasting * program, it should receive packets not depending to what interface * program is joined. * If we will not make it, the program will have to join on all * interfaces. On the other hand, multihoming host (or router, but * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets. */ NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, net, NULL, skb, skb->dev, dev, ipmr_forward_finish); return; out_free: kfree_skb(skb); } /* Called with mrt_lock or rcu_read_lock() */ static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) { int ct; /* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */ for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) break; } return ct; } /* "local" means that we should preserve one skb (for local delivery) */ /* Called uner rcu_read_lock() */ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc_cache *c, int local) { int true_vifi = ipmr_find_vif(mrt, dev); int psend = -1; int vif, ct; vif = c->_c.mfc_parent; c->_c.mfc_un.res.pkt++; c->_c.mfc_un.res.bytes += skb->len; c->_c.mfc_un.res.lastuse = jiffies; if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { struct mfc_cache *cache_proxy; /* For an (*,G) entry, we only check that the incoming * interface is part of the static tree. */ cache_proxy = mr_mfc_find_any_parent(mrt, vif); if (cache_proxy && cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) goto forward; } /* Wrong interface: drop packet and (maybe) send PIM assert. */ if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... * * The best workaround until routing daemons will be * fixed is not to redistribute packet, if it was * send through wrong interface. It means, that * multicast applications WILL NOT work for * (S,G), which have default multicast route pointing * to wrong oif. In any case, it is not a good * idea to use multicasting applications on router. */ goto dont_forward; } c->_c.mfc_un.res.wrong_if++; if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, * so that we cannot check that packet arrived on an oif. * It is bad, but otherwise we would need to move pretty * large chunk of pimd to kernel. Ough... --ANK */ (mrt->mroute_do_pim || c->_c.mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, c->_c.mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { c->_c.mfc_un.res.last_assert = jiffies; ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); if (mrt->mroute_do_wrvifwhole) ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRVIFWHOLE); } goto dont_forward; } forward: WRITE_ONCE(mrt->vif_table[vif].pkt_in, mrt->vif_table[vif].pkt_in + 1); WRITE_ONCE(mrt->vif_table[vif].bytes_in, mrt->vif_table[vif].bytes_in + skb->len); /* Forward the frame */ if (c->mfc_origin == htonl(INADDR_ANY) && c->mfc_mcastgrp == htonl(INADDR_ANY)) { if (true_vifi >= 0 && true_vifi != c->_c.mfc_parent && ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { /* It's an (*,*) entry and the packet is not coming from * the upstream: forward the packet to the upstream * only. */ psend = c->_c.mfc_parent; goto last_forward; } goto dont_forward; } for (ct = c->_c.mfc_un.res.maxvif - 1; ct >= c->_c.mfc_un.res.minvif; ct--) { /* For (*,G) entry, don't forward to the incoming interface */ if ((c->mfc_origin != htonl(INADDR_ANY) || ct != true_vifi) && ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) ipmr_queue_xmit(net, mrt, true_vifi, skb2, psend); } psend = ct; } } last_forward: if (psend != -1) { if (local) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) ipmr_queue_xmit(net, mrt, true_vifi, skb2, psend); } else { ipmr_queue_xmit(net, mrt, true_vifi, skb, psend); return; } } dont_forward: if (!local) kfree_skb(skb); } static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); struct iphdr *iph = ip_hdr(skb); struct flowi4 fl4 = { .daddr = iph->daddr, .saddr = iph->saddr, .flowi4_tos = RT_TOS(iph->tos), .flowi4_oif = (rt_is_output_route(rt) ? skb->dev->ifindex : 0), .flowi4_iif = (rt_is_output_route(rt) ? LOOPBACK_IFINDEX : skb->dev->ifindex), .flowi4_mark = skb->mark, }; struct mr_table *mrt; int err; err = ipmr_fib_lookup(net, &fl4, &mrt); if (err) return ERR_PTR(err); return mrt; } /* Multicast packets for forwarding arrive here * Called with rcu_read_lock(); */ int ip_mr_input(struct sk_buff *skb) { struct mfc_cache *cache; struct net *net = dev_net(skb->dev); int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; struct mr_table *mrt; struct net_device *dev; /* skb->dev passed in is the loX master dev for vrfs. * As there are no vifs associated with loopback devices, * get the proper interface that does have a vif associated with it. */ dev = skb->dev; if (netif_is_l3_master(skb->dev)) { dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); if (!dev) { kfree_skb(skb); return -ENODEV; } } /* Packet is looped back after forward, it should not be * forwarded second time, but still can be delivered locally. */ if (IPCB(skb)->flags & IPSKB_FORWARDED) goto dont_forward; mrt = ipmr_rt_fib_lookup(net, skb); if (IS_ERR(mrt)) { kfree_skb(skb); return PTR_ERR(mrt); } if (!local) { if (IPCB(skb)->opt.router_alert) { if (ip_call_ra_chain(skb)) return 0; } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) { /* IGMPv1 (and broken IGMPv2 implementations sort of * Cisco IOS <= 11.2(8)) do not put router alert * option to IGMP packets destined to routable * groups. It is very bad, because it means * that we can forward NO IGMP messages. */ struct sock *mroute_sk; mroute_sk = rcu_dereference(mrt->mroute_sk); if (mroute_sk) { nf_reset_ct(skb); raw_rcv(mroute_sk, skb); return 0; } } } /* already under rcu_read_lock() */ cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); if (!cache) { int vif = ipmr_find_vif(mrt, dev); if (vif >= 0) cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, vif); } /* No usable cache entry */ if (!cache) { int vif; if (local) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); ip_local_deliver(skb); if (!skb2) return -ENOBUFS; skb = skb2; } vif = ipmr_find_vif(mrt, dev); if (vif >= 0) return ipmr_cache_unresolved(mrt, vif, skb, dev); kfree_skb(skb); return -ENODEV; } ip_mr_forward(net, mrt, dev, skb, cache, local); if (local) return ip_local_deliver(skb); return 0; dont_forward: if (local) return ip_local_deliver(skb); kfree_skb(skb); return 0; } #ifdef CONFIG_IP_PIMSM_V1 /* Handle IGMP messages of PIMv1 */ int pim_rcv_v1(struct sk_buff *skb) { struct igmphdr *pim; struct net *net = dev_net(skb->dev); struct mr_table *mrt; if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) goto drop; pim = igmp_hdr(skb); mrt = ipmr_rt_fib_lookup(net, skb); if (IS_ERR(mrt)) goto drop; if (!mrt->mroute_do_pim || pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) goto drop; if (__pim_rcv(mrt, skb, sizeof(*pim))) { drop: kfree_skb(skb); } return 0; } #endif #ifdef CONFIG_IP_PIMSM_V2 static int pim_rcv(struct sk_buff *skb) { struct pimreghdr *pim; struct net *net = dev_net(skb->dev); struct mr_table *mrt; if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) goto drop; pim = (struct pimreghdr *)skb_transport_header(skb); if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || (pim->flags & PIM_NULL_REGISTER) || (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; mrt = ipmr_rt_fib_lookup(net, skb); if (IS_ERR(mrt)) goto drop; if (__pim_rcv(mrt, skb, sizeof(*pim))) { drop: kfree_skb(skb); } return 0; } #endif int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, struct rtmsg *rtm, u32 portid) { struct mfc_cache *cache; struct mr_table *mrt; int err; mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); if (!mrt) return -ENOENT; rcu_read_lock(); cache = ipmr_cache_find(mrt, saddr, daddr); if (!cache && skb->dev) { int vif = ipmr_find_vif(mrt, skb->dev); if (vif >= 0) cache = ipmr_cache_find_any(mrt, daddr, vif); } if (!cache) { struct sk_buff *skb2; struct iphdr *iph; struct net_device *dev; int vif = -1; dev = skb->dev; if (dev) vif = ipmr_find_vif(mrt, dev); if (vif < 0) { rcu_read_unlock(); return -ENODEV; } skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr)); if (!skb2) { rcu_read_unlock(); return -ENOMEM; } NETLINK_CB(skb2).portid = portid; skb_push(skb2, sizeof(struct iphdr)); skb_reset_network_header(skb2); iph = ip_hdr(skb2); iph->ihl = sizeof(struct iphdr) >> 2; iph->saddr = saddr; iph->daddr = daddr; iph->version = 0; err = ipmr_cache_unresolved(mrt, vif, skb2, dev); rcu_read_unlock(); return err; } err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); rcu_read_unlock(); return err; } static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, u32 portid, u32 seq, struct mfc_cache *c, int cmd, int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; rtm = nlmsg_data(nlh); rtm->rtm_family = RTNL_FAMILY_IPMR; rtm->rtm_dst_len = 32; rtm->rtm_src_len = 32; rtm->rtm_tos = 0; rtm->rtm_table = mrt->id; if (nla_put_u32(skb, RTA_TABLE, mrt->id)) goto nla_put_failure; rtm->rtm_type = RTN_MULTICAST; rtm->rtm_scope = RT_SCOPE_UNIVERSE; if (c->_c.mfc_flags & MFC_STATIC) rtm->rtm_protocol = RTPROT_STATIC; else rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) goto nla_put_failure; err = mr_fill_mroute(mrt, skb, &c->_c, rtm); /* do not break the dump if cache is unresolved */ if (err < 0 && err != -ENOENT) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags) { return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c, cmd, flags); } static size_t mroute_msgsize(bool unresolved, int maxvif) { size_t len = NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(4) /* RTA_SRC */ + nla_total_size(4) /* RTA_DST */ ; if (!unresolved) len = len + nla_total_size(4) /* RTA_IIF */ + nla_total_size(0) /* RTA_MULTIPATH */ + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) /* RTA_MFC_STATS */ + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) ; return len; } static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd) { struct net *net = read_pnet(&mrt->net); struct sk_buff *skb; int err = -ENOBUFS; skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS, mrt->maxvif), GFP_ATOMIC); if (!skb) goto errout; err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC); return; errout: kfree_skb(skb); if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); } static size_t igmpmsg_netlink_msgsize(size_t payloadlen) { size_t len = NLMSG_ALIGN(sizeof(struct rtgenmsg)) + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ + nla_total_size(4) /* IPMRA_CREPORT_TABLE */ /* IPMRA_CREPORT_PKT */ + nla_total_size(payloadlen) ; return len; } static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) { struct net *net = read_pnet(&mrt->net); struct nlmsghdr *nlh; struct rtgenmsg *rtgenm; struct igmpmsg *msg; struct sk_buff *skb; struct nlattr *nla; int payloadlen; payloadlen = pkt->len - sizeof(struct igmpmsg); msg = (struct igmpmsg *)skb_network_header(pkt); skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); if (!skb) goto errout; nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, sizeof(struct rtgenmsg), 0); if (!nlh) goto errout; rtgenm = nlmsg_data(nlh); rtgenm->rtgen_family = RTNL_FAMILY_IPMR; if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif | (msg->im_vif_hi << 8)) || nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, msg->im_src.s_addr) || nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, msg->im_dst.s_addr) || nla_put_u32(skb, IPMRA_CREPORT_TABLE, mrt->id)) goto nla_put_failure; nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), nla_data(nla), payloadlen)) goto nla_put_failure; nlmsg_end(skb, nlh); rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); return; nla_put_failure: nlmsg_cancel(skb, nlh); errout: kfree_skb(skb); rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); } static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, const struct nlmsghdr *nlh, struct nlattr **tb, struct netlink_ext_ack *extack) { struct rtmsg *rtm; int i, err; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request"); return -EINVAL; } if (!netlink_strict_get_check(skb)) return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, extack); rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request"); return -EINVAL; } err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, extack); if (err) return err; if ((tb[RTA_SRC] && !rtm->rtm_src_len) || (tb[RTA_DST] && !rtm->rtm_dst_len)) { NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); return -EINVAL; } for (i = 0; i <= RTA_MAX; i++) { if (!tb[i]) continue; switch (i) { case RTA_SRC: case RTA_DST: case RTA_TABLE: break; default: NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request"); return -EINVAL; } } return 0; } static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX + 1]; struct sk_buff *skb = NULL; struct mfc_cache *cache; struct mr_table *mrt; __be32 src, grp; u32 tableid; int err; err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); if (!mrt) { err = -ENOENT; goto errout_free; } /* entries are added/deleted only under RTNL */ rcu_read_lock(); cache = ipmr_cache_find(mrt, src, grp); rcu_read_unlock(); if (!cache) { err = -ENOENT; goto errout_free; } skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); if (!skb) { err = -ENOBUFS; goto errout_free; } err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0); if (err < 0) goto errout_free; err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; errout_free: kfree_skb(skb); goto errout; } static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { struct fib_dump_filter filter = { .rtnl_held = true, }; int err; if (cb->strict_check) { err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, &filter, cb); if (err < 0) return err; } if (filter.table_id) { struct mr_table *mrt; mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) {